FIXING THE WORST BUG OF MY LIFE -- finally figured out what was causing the story-shows-as-unread bug. Also fixed enclosures on certain types of feeds.

2025-04-13 09:42:01 +00:00 · 2011-12-14 23:26:07 -08:00 · 2011-12-14 23:26:07 -08:00 · 9d0ce2011a
commit 9d0ce2011a
parent 378a741da9
3 changed files with 21 additions and 18 deletions
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@ -771,19 +771,19 @@ class Feed(models.Model):
            story_feed_id=self.pk,
        ).order_by('-story_date')
        if stories.count() > trim_cutoff:
-            if verbose:
-                print 'Found %s stories in %s. Trimming to %s...' % (stories.count(), self, trim_cutoff)
+            logging.debug(' ---> [%-30s] Found %s stories. Trimming to %s...' % (self, stories.count(), trim_cutoff))
            try:
                story_trim_date = stories[trim_cutoff].story_date
            except IndexError, e:
                logging.debug(' ***> [%-30s] Error trimming feed: %s' % (self, e))
                return
            extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date)
+            extra_stories_count = extra_stories.count()
            extra_stories.delete()
-            # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count()
-            userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date)
+            print "Deleted %s stories, %s left." % (extra_stories_count, MStory.objects(story_feed_id=self.pk).count())
+            userstories = MUserStory.objects(feed_id=self.pk, story_date__lte=story_trim_date)
            if userstories.count():
-                # print "Found %s user stories. Deleting..." % userstories.count()
+                print "Found %s user stories. Deleting..." % userstories.count()
                userstories.delete()
        
    def get_stories(self, offset=0, limit=25, force=False, slave=False):
@ -874,6 +874,7 @@ class Feed(models.Model):
        story_has_changed = False
        story_pub_date = story.get('published')
        story_published_now = story.get('published_now', False)
+        story_link = self.get_permalink(story)
        start_date = story_pub_date - datetime.timedelta(hours=8)
        end_date = story_pub_date + datetime.timedelta(hours=8)
        
@ -883,23 +884,22 @@ class Feed(models.Model):
            # print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
            if (story_published_now or
                (existing_story_pub_date > start_date and existing_story_pub_date < end_date)):
-                story_link = self.get_permalink(story)
-                if isinstance(existing_story.id, unicode):
-                    existing_story.story_guid = existing_story.id
-                if story.get('guid') and story.get('guid') == existing_story.story_guid:
-                    story_in_system = existing_story
-                elif story_link == existing_story.story_permalink:
-                    story_in_system = existing_story
                
-                # Title distance + content distance, checking if story changed
-                story_title_difference = levenshtein_distance(story.get('title'),
-                                                              existing_story.story_title)
                if 'story_content_z' in existing_story:
                    existing_story_content = unicode(zlib.decompress(existing_story.story_content_z))
                elif 'story_content' in existing_story:
                    existing_story_content = existing_story.story_content
                else:
                    existing_story_content = u''
+                    
+                if isinstance(existing_story.id, unicode):
+                    existing_story.story_guid = existing_story.id
+                if story.get('guid') and story.get('guid') == existing_story.story_guid:
+                    story_in_system = existing_story
+                
+                # Title distance + content distance, checking if story changed
+                story_title_difference = levenshtein_distance(story.get('title'),
+                                                              existing_story.story_title)
                
                seq = difflib.SequenceMatcher(None, story_content, existing_story_content)
                
@ -923,7 +923,7 @@ class Feed(models.Model):
                    story_in_system = existing_story
                    story_has_changed = True
                    break
-                                        
+                    
                if story_in_system:
                    if story_content != existing_story_content:
                        story_has_changed = True
--- a/apps/rss_feeds/views.py
+++ b/apps/rss_feeds/views.py
@ -135,7 +135,7 @@ def exception_retry(request):
        feed.fetched_once = True
    feed.save()
    
-    feed = feed.update(force=True, compute_scores=False)
+    feed = feed.update(force=True, compute_scores=False, verbose=True)
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    usersub.calculate_feed_scores(silent=False)
    
--- a/utils/story_functions.py
+++ b/utils/story_functions.py
@ -2,6 +2,7 @@ from django.utils.dateformat import DateFormat
 import datetime
 from django.utils.http import urlquote
 from django.conf import settings
+from itertools import chain

 def story_score(story, bottom_delta=None):
    # A) Date - Assumes story is unread and within unread range
@ -75,7 +76,7 @@ def pre_process_story(entry):
        entry['story_content'] = entry.get('summary', '')
    
    # Add each media enclosure as a Download link
-    for media_content in entry.get('media_content', []):
+    for media_content in chain(entry.get('media_content', []), entry.get('links', [])):
        media_url = media_content.get('url', '')
        media_type = media_content.get('type', '')
        if media_url and media_type and media_url not in entry['story_content']:
@ -89,6 +90,8 @@ def pre_process_story(entry):
                    }
            elif 'image' in media_type and media_url:
                entry['story_content'] += """<br><br><img src="%s" />"""  % media_url
+            elif media_content.get('rel') == 'alternative' or 'text' in media_content.get('type'):
+                continue
            entry['story_content'] += """<br><br>
                Download %(media_type)s: <a href="%(media_url)s">%(media_url)s</a>"""  % {
                'media_url': media_url,