Merge branch 'social' into redis

* social:
  Better watching for story changes. This is both a speed improvement and usability fix, since now stories that haven't changed recently but did change before will no longer be declared as updated every time they are fetched. Props to @textfiles for the push.
This commit is contained in:
Samuel Clay 2012-07-22 12:25:13 -07:00
commit 71f03f1ccd
2 changed files with 19 additions and 7 deletions

View file

@ -797,6 +797,7 @@ class Feed(models.Model):
# existing_story.story_date = story.get('published')
existing_story.story_title = story.get('title')
existing_story.story_content = story_content_diff
existing_story.story_latest_content = story_content
existing_story.story_original_content = original_content
existing_story.story_author_name = story.get('author')
existing_story.story_permalink = story_link
@ -1007,14 +1008,17 @@ class Feed(models.Model):
end_date = story_pub_date + datetime.timedelta(hours=8)
for existing_story in existing_stories:
content_ratio = 0
existing_story_pub_date = existing_story.story_date
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
if (story_published_now or
(existing_story_pub_date > start_date and existing_story_pub_date < end_date)):
if 'story_content_z' in existing_story:
if 'story_latest_content_z' in existing_story:
existing_story_content = unicode(zlib.decompress(existing_story.story_latest_content_z))
elif 'story_latest_content' in existing_story:
existing_story_content = existing_story.story_latest_content
elif 'story_content_z' in existing_story:
existing_story_content = unicode(zlib.decompress(existing_story.story_content_z))
elif 'story_content' in existing_story:
existing_story_content = existing_story.story_content
@ -1053,7 +1057,7 @@ class Feed(models.Model):
story_has_changed = True
break
if story_in_system:
if story_in_system and not story_has_changed:
if story_content != existing_story_content:
story_has_changed = True
if story_link != existing_story.story_permalink:
@ -1062,7 +1066,7 @@ class Feed(models.Model):
# if story_has_changed or not story_in_system:
# print 'New/updated story: %s' % (story),
# print 'New/updated story: %s' % (story),
return story_in_system, story_has_changed
def get_next_scheduled_update(self, force=False, verbose=True):
@ -1282,6 +1286,8 @@ class MStory(mongo.Document):
story_content_z = mongo.BinaryField()
story_original_content = mongo.StringField()
story_original_content_z = mongo.BinaryField()
story_latest_content = mongo.StringField()
story_latest_content_z = mongo.BinaryField()
story_content_type = mongo.StringField(max_length=255)
story_author_name = mongo.StringField()
story_permalink = mongo.StringField()
@ -1315,6 +1321,9 @@ class MStory(mongo.Document):
if self.story_original_content:
self.story_original_content_z = zlib.compress(self.story_original_content)
self.story_original_content = None
if self.story_latest_content:
self.story_latest_content_z = zlib.compress(self.story_latest_content)
self.story_latest_content = None
if self.story_title and len(self.story_title) > story_title_max:
self.story_title = self.story_title[:story_title_max]
if self.story_content_type and len(self.story_content_type) > story_content_type_max:

View file

@ -256,9 +256,12 @@ class ProcessFeed:
self.feed.title[:30], hub_url))
PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url)
logging.debug(u' ---> [%-30s] ~FYParsed Feed: new=~FG~SB%s~SN~FY up=~FY~SB%s~SN same=~FY%s err=~FR~SB%s~SN~FY total=~SB%s' % (
logging.debug(u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
self.feed.title[:30],
ret_values[ENTRY_NEW], ret_values[ENTRY_UPDATED], ret_values[ENTRY_SAME], ret_values[ENTRY_ERR],
'~FG~SB' if ret_values[ENTRY_NEW] else '', ret_values[ENTRY_NEW],
'~FY~SB' if ret_values[ENTRY_UPDATED] else '', ret_values[ENTRY_UPDATED],
'~SB' if ret_values[ENTRY_SAME] else '', ret_values[ENTRY_SAME],
'~FR~SB' if ret_values[ENTRY_ERR] else '', ret_values[ENTRY_ERR],
len(self.fpf.entries)))
self.feed.update_all_statistics(full=bool(ret_values[ENTRY_NEW]), force=self.options['force'])
self.feed.trim_feed()
@ -447,7 +450,7 @@ class Dispatcher:
if ret_entries[ENTRY_NEW]:
self.publish_to_subscribers(feed)
done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FG~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % (
done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % (
identity, feed.feed_title[:30], delta,
feed.pk, self.feed_trans[ret_feed],))
logging.debug(done_msg)