diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 42b2b4d4c..6c0744708 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -797,6 +797,7 @@ class Feed(models.Model): # existing_story.story_date = story.get('published') existing_story.story_title = story.get('title') existing_story.story_content = story_content_diff + existing_story.story_latest_content = story_content existing_story.story_original_content = original_content existing_story.story_author_name = story.get('author') existing_story.story_permalink = story_link @@ -1007,14 +1008,17 @@ class Feed(models.Model): end_date = story_pub_date + datetime.timedelta(hours=8) for existing_story in existing_stories: - content_ratio = 0 existing_story_pub_date = existing_story.story_date # print 'Story pub date: %s %s' % (story_published_now, story_pub_date) if (story_published_now or (existing_story_pub_date > start_date and existing_story_pub_date < end_date)): - if 'story_content_z' in existing_story: + if 'story_latest_content_z' in existing_story: + existing_story_content = unicode(zlib.decompress(existing_story.story_latest_content_z)) + elif 'story_latest_content' in existing_story: + existing_story_content = existing_story.story_latest_content + elif 'story_content_z' in existing_story: existing_story_content = unicode(zlib.decompress(existing_story.story_content_z)) elif 'story_content' in existing_story: existing_story_content = existing_story.story_content @@ -1053,7 +1057,7 @@ class Feed(models.Model): story_has_changed = True break - if story_in_system: + if story_in_system and not story_has_changed: if story_content != existing_story_content: story_has_changed = True if story_link != existing_story.story_permalink: @@ -1062,7 +1066,7 @@ class Feed(models.Model): # if story_has_changed or not story_in_system: - # print 'New/updated story: %s' % (story), + # print 'New/updated story: %s' % (story), return story_in_system, story_has_changed def get_next_scheduled_update(self, force=False, verbose=True): @@ -1282,6 +1286,8 @@ class MStory(mongo.Document): story_content_z = mongo.BinaryField() story_original_content = mongo.StringField() story_original_content_z = mongo.BinaryField() + story_latest_content = mongo.StringField() + story_latest_content_z = mongo.BinaryField() story_content_type = mongo.StringField(max_length=255) story_author_name = mongo.StringField() story_permalink = mongo.StringField() @@ -1315,6 +1321,9 @@ class MStory(mongo.Document): if self.story_original_content: self.story_original_content_z = zlib.compress(self.story_original_content) self.story_original_content = None + if self.story_latest_content: + self.story_latest_content_z = zlib.compress(self.story_latest_content) + self.story_latest_content = None if self.story_title and len(self.story_title) > story_title_max: self.story_title = self.story_title[:story_title_max] if self.story_content_type and len(self.story_content_type) > story_content_type_max: diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 8e0eee278..acc1474c0 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -256,9 +256,12 @@ class ProcessFeed: self.feed.title[:30], hub_url)) PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url) - logging.debug(u' ---> [%-30s] ~FYParsed Feed: new=~FG~SB%s~SN~FY up=~FY~SB%s~SN same=~FY%s err=~FR~SB%s~SN~FY total=~SB%s' % ( + logging.debug(u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % ( self.feed.title[:30], - ret_values[ENTRY_NEW], ret_values[ENTRY_UPDATED], ret_values[ENTRY_SAME], ret_values[ENTRY_ERR], + '~FG~SB' if ret_values[ENTRY_NEW] else '', ret_values[ENTRY_NEW], + '~FY~SB' if ret_values[ENTRY_UPDATED] else '', ret_values[ENTRY_UPDATED], + '~SB' if ret_values[ENTRY_SAME] else '', ret_values[ENTRY_SAME], + '~FR~SB' if ret_values[ENTRY_ERR] else '', ret_values[ENTRY_ERR], len(self.fpf.entries))) self.feed.update_all_statistics(full=bool(ret_values[ENTRY_NEW]), force=self.options['force']) self.feed.trim_feed() @@ -447,7 +450,7 @@ class Dispatcher: if ret_entries[ENTRY_NEW]: self.publish_to_subscribers(feed) - done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FG~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % ( + done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % ( identity, feed.feed_title[:30], delta, feed.pk, self.feed_trans[ret_feed],)) logging.debug(done_msg)