Nice middle ground between guids and dates. Now only retrieving as many guids are in the feed, but then comparing them with N stories. Very old stories shouldnt change, and they'll get thwarted by an IntegrityCheck.

This commit is contained in:
Samuel Clay 2010-12-16 17:10:13 -05:00
parent 5b3f199852
commit ae4646e979
2 changed files with 11 additions and 7 deletions

View file

@ -353,9 +353,9 @@ class Feed(models.Model):
s.save()
ret_values[ENTRY_NEW] += 1
cache.set('updated_feed:%s' % self.id, 1)
except (IntegrityError, OperationError):
except (IntegrityError, OperationError), e:
ret_values[ENTRY_ERR] += 1
# print('Saving new story, IntegrityError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
# logging.info('Saving new story, IntegrityError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
elif existing_story and story_has_changed:
# update story
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))

View file

@ -195,21 +195,25 @@ class ProcessFeed:
self.feed.save()
# Compare new stories to existing stories, adding and updating
# start_date = datetime.datetime.utcnow()
start_date = datetime.datetime.utcnow()
# end_date = datetime.datetime.utcnow()
story_guids = []
for entry in self.fpf.entries:
story = pre_process_story(entry)
# if story.get('published') < start_date:
# start_date = story.get('published')
if story.get('published') < start_date:
start_date = story.get('published')
# if story.get('published') > end_date:
# end_date = story.get('published')
story_guids.append(story.get('guid') or story.get('link'))
existing_stories = MStory.objects(
story_guid__in=story_guids,
# | Q(story_date__gte=start_date),
# story_guid__in=story_guids,
story_date__gte=start_date,
story_feed_id=self.feed.pk
).limit(len(story_guids))
logging.info(u' ---> [%-30s] Parsing: %s existing stories' % (
unicode(self.feed)[:30],
len(existing_stories)))
# MStory.objects(
# (Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
# | (Q(story_guid__in=story_guids)),