diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index eca06e1e9..dc5b6cda0 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -771,19 +771,19 @@ class Feed(models.Model): story_feed_id=self.pk, ).order_by('-story_date') if stories.count() > trim_cutoff: - if verbose: - print 'Found %s stories in %s. Trimming to %s...' % (stories.count(), self, trim_cutoff) + logging.debug(' ---> [%-30s] Found %s stories. Trimming to %s...' % (self, stories.count(), trim_cutoff)) try: story_trim_date = stories[trim_cutoff].story_date except IndexError, e: logging.debug(' ***> [%-30s] Error trimming feed: %s' % (self, e)) return extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date) + extra_stories_count = extra_stories.count() extra_stories.delete() - # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count() - userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date) + print "Deleted %s stories, %s left." % (extra_stories_count, MStory.objects(story_feed_id=self.pk).count()) + userstories = MUserStory.objects(feed_id=self.pk, story_date__lte=story_trim_date) if userstories.count(): - # print "Found %s user stories. Deleting..." % userstories.count() + print "Found %s user stories. Deleting..." % userstories.count() userstories.delete() def get_stories(self, offset=0, limit=25, force=False, slave=False): @@ -874,6 +874,7 @@ class Feed(models.Model): story_has_changed = False story_pub_date = story.get('published') story_published_now = story.get('published_now', False) + story_link = self.get_permalink(story) start_date = story_pub_date - datetime.timedelta(hours=8) end_date = story_pub_date + datetime.timedelta(hours=8) @@ -883,23 +884,22 @@ class Feed(models.Model): # print 'Story pub date: %s %s' % (story_published_now, story_pub_date) if (story_published_now or (existing_story_pub_date > start_date and existing_story_pub_date < end_date)): - story_link = self.get_permalink(story) - if isinstance(existing_story.id, unicode): - existing_story.story_guid = existing_story.id - if story.get('guid') and story.get('guid') == existing_story.story_guid: - story_in_system = existing_story - elif story_link == existing_story.story_permalink: - story_in_system = existing_story - # Title distance + content distance, checking if story changed - story_title_difference = levenshtein_distance(story.get('title'), - existing_story.story_title) if 'story_content_z' in existing_story: existing_story_content = unicode(zlib.decompress(existing_story.story_content_z)) elif 'story_content' in existing_story: existing_story_content = existing_story.story_content else: existing_story_content = u'' + + if isinstance(existing_story.id, unicode): + existing_story.story_guid = existing_story.id + if story.get('guid') and story.get('guid') == existing_story.story_guid: + story_in_system = existing_story + + # Title distance + content distance, checking if story changed + story_title_difference = levenshtein_distance(story.get('title'), + existing_story.story_title) seq = difflib.SequenceMatcher(None, story_content, existing_story_content) @@ -923,7 +923,7 @@ class Feed(models.Model): story_in_system = existing_story story_has_changed = True break - + if story_in_system: if story_content != existing_story_content: story_has_changed = True diff --git a/apps/rss_feeds/views.py b/apps/rss_feeds/views.py index ff0194f65..e50f2689c 100644 --- a/apps/rss_feeds/views.py +++ b/apps/rss_feeds/views.py @@ -135,7 +135,7 @@ def exception_retry(request): feed.fetched_once = True feed.save() - feed = feed.update(force=True, compute_scores=False) + feed = feed.update(force=True, compute_scores=False, verbose=True) usersub = UserSubscription.objects.get(user=user, feed=feed) usersub.calculate_feed_scores(silent=False) diff --git a/utils/story_functions.py b/utils/story_functions.py index e2769b47f..3359d7c4d 100644 --- a/utils/story_functions.py +++ b/utils/story_functions.py @@ -2,6 +2,7 @@ from django.utils.dateformat import DateFormat import datetime from django.utils.http import urlquote from django.conf import settings +from itertools import chain def story_score(story, bottom_delta=None): # A) Date - Assumes story is unread and within unread range @@ -75,7 +76,7 @@ def pre_process_story(entry): entry['story_content'] = entry.get('summary', '') # Add each media enclosure as a Download link - for media_content in entry.get('media_content', []): + for media_content in chain(entry.get('media_content', []), entry.get('links', [])): media_url = media_content.get('url', '') media_type = media_content.get('type', '') if media_url and media_type and media_url not in entry['story_content']: @@ -89,6 +90,8 @@ def pre_process_story(entry): } elif 'image' in media_type and media_url: entry['story_content'] += """

""" % media_url + elif media_content.get('rel') == 'alternative' or 'text' in media_content.get('type'): + continue entry['story_content'] += """

Download %(media_type)s: %(media_url)s""" % { 'media_url': media_url,