mirror of
https://github.com/viq/NewsBlur.git
synced 2025-11-01 09:09:16 +00:00
Don't collide stories that are too short to be collided.
This commit is contained in:
parent
4f6edc2acf
commit
79c6ea09cd
2 changed files with 10 additions and 4 deletions
|
|
@ -791,7 +791,7 @@ class Feed(models.Model):
|
|||
logging.debug(" ---> [%-30s] ~FBChecking ~SB%s~SN new/updated against ~SB%s~SN stories" % (
|
||||
self.title[:30],
|
||||
len(stories),
|
||||
len(existing_stories)))
|
||||
len(existing_stories.keys())))
|
||||
|
||||
for story in stories:
|
||||
if not story.get('title'):
|
||||
|
|
@ -1112,12 +1112,13 @@ class Feed(models.Model):
|
|||
story_in_system = None
|
||||
story_has_changed = False
|
||||
story_link = self.get_permalink(story)
|
||||
existing_stories_guids = existing_stories.keys()
|
||||
# story_pub_date = story.get('published')
|
||||
# story_published_now = story.get('published_now', False)
|
||||
# start_date = story_pub_date - datetime.timedelta(hours=8)
|
||||
# end_date = story_pub_date + datetime.timedelta(hours=8)
|
||||
|
||||
for existing_story in existing_stories:
|
||||
for existing_story in existing_stories.values():
|
||||
content_ratio = 0
|
||||
# existing_story_pub_date = existing_story.story_date
|
||||
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
|
||||
|
|
@ -1135,7 +1136,10 @@ class Feed(models.Model):
|
|||
|
||||
if isinstance(existing_story.id, unicode):
|
||||
existing_story.story_guid = existing_story.id
|
||||
if story.get('guid') and story.get('guid') == existing_story.story_guid:
|
||||
if (story.get('guid') in existing_stories_guids and
|
||||
story.get('guid') != existing_story.story_guid):
|
||||
continue
|
||||
elif story.get('guid') == existing_story.story_guid:
|
||||
story_in_system = existing_story
|
||||
|
||||
# Title distance + content distance, checking if story changed
|
||||
|
|
@ -1167,8 +1171,10 @@ class Feed(models.Model):
|
|||
|
||||
if story_in_system and not story_has_changed:
|
||||
if story_content != existing_story_content:
|
||||
# print "Content difference - %s/%s" % (story_content, existing_story_content)
|
||||
story_has_changed = True
|
||||
if story_link != existing_story.story_permalink:
|
||||
# print "Permalink difference - %s/%s" % (story_link, existing_story.story_permalink)
|
||||
story_has_changed = True
|
||||
# if story_pub_date != existing_story.story_date:
|
||||
# story_has_changed = True
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ class ProcessFeed:
|
|||
stories.append(story)
|
||||
story_guids.append(story.get('guid'))
|
||||
|
||||
existing_stories = list(MStory.objects(
|
||||
existing_stories = dict((s.story_guid, s) for s in MStory.objects(
|
||||
# story_guid__in=story_guids,
|
||||
story_date__gte=start_date,
|
||||
story_feed_id=self.feed.pk
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue