FIXING THE WORST BUG OF MY LIFE -- finally figured out what was causing the story-shows-as-unread bug. Also fixed enclosures on certain types of feeds.

This commit is contained in:
Samuel Clay 2011-12-14 23:26:07 -08:00
parent 378a741da9
commit 9d0ce2011a
3 changed files with 21 additions and 18 deletions

View file

@ -771,19 +771,19 @@ class Feed(models.Model):
story_feed_id=self.pk,
).order_by('-story_date')
if stories.count() > trim_cutoff:
if verbose:
print 'Found %s stories in %s. Trimming to %s...' % (stories.count(), self, trim_cutoff)
logging.debug(' ---> [%-30s] Found %s stories. Trimming to %s...' % (self, stories.count(), trim_cutoff))
try:
story_trim_date = stories[trim_cutoff].story_date
except IndexError, e:
logging.debug(' ***> [%-30s] Error trimming feed: %s' % (self, e))
return
extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date)
extra_stories_count = extra_stories.count()
extra_stories.delete()
# print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count()
userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date)
print "Deleted %s stories, %s left." % (extra_stories_count, MStory.objects(story_feed_id=self.pk).count())
userstories = MUserStory.objects(feed_id=self.pk, story_date__lte=story_trim_date)
if userstories.count():
# print "Found %s user stories. Deleting..." % userstories.count()
print "Found %s user stories. Deleting..." % userstories.count()
userstories.delete()
def get_stories(self, offset=0, limit=25, force=False, slave=False):
@ -874,6 +874,7 @@ class Feed(models.Model):
story_has_changed = False
story_pub_date = story.get('published')
story_published_now = story.get('published_now', False)
story_link = self.get_permalink(story)
start_date = story_pub_date - datetime.timedelta(hours=8)
end_date = story_pub_date + datetime.timedelta(hours=8)
@ -883,23 +884,22 @@ class Feed(models.Model):
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
if (story_published_now or
(existing_story_pub_date > start_date and existing_story_pub_date < end_date)):
story_link = self.get_permalink(story)
if isinstance(existing_story.id, unicode):
existing_story.story_guid = existing_story.id
if story.get('guid') and story.get('guid') == existing_story.story_guid:
story_in_system = existing_story
elif story_link == existing_story.story_permalink:
story_in_system = existing_story
# Title distance + content distance, checking if story changed
story_title_difference = levenshtein_distance(story.get('title'),
existing_story.story_title)
if 'story_content_z' in existing_story:
existing_story_content = unicode(zlib.decompress(existing_story.story_content_z))
elif 'story_content' in existing_story:
existing_story_content = existing_story.story_content
else:
existing_story_content = u''
if isinstance(existing_story.id, unicode):
existing_story.story_guid = existing_story.id
if story.get('guid') and story.get('guid') == existing_story.story_guid:
story_in_system = existing_story
# Title distance + content distance, checking if story changed
story_title_difference = levenshtein_distance(story.get('title'),
existing_story.story_title)
seq = difflib.SequenceMatcher(None, story_content, existing_story_content)
@ -923,7 +923,7 @@ class Feed(models.Model):
story_in_system = existing_story
story_has_changed = True
break
if story_in_system:
if story_content != existing_story_content:
story_has_changed = True

View file

@ -135,7 +135,7 @@ def exception_retry(request):
feed.fetched_once = True
feed.save()
feed = feed.update(force=True, compute_scores=False)
feed = feed.update(force=True, compute_scores=False, verbose=True)
usersub = UserSubscription.objects.get(user=user, feed=feed)
usersub.calculate_feed_scores(silent=False)

View file

@ -2,6 +2,7 @@ from django.utils.dateformat import DateFormat
import datetime
from django.utils.http import urlquote
from django.conf import settings
from itertools import chain
def story_score(story, bottom_delta=None):
# A) Date - Assumes story is unread and within unread range
@ -75,7 +76,7 @@ def pre_process_story(entry):
entry['story_content'] = entry.get('summary', '')
# Add each media enclosure as a Download link
for media_content in entry.get('media_content', []):
for media_content in chain(entry.get('media_content', []), entry.get('links', [])):
media_url = media_content.get('url', '')
media_type = media_content.get('type', '')
if media_url and media_type and media_url not in entry['story_content']:
@ -89,6 +90,8 @@ def pre_process_story(entry):
}
elif 'image' in media_type and media_url:
entry['story_content'] += """<br><br><img src="%s" />""" % media_url
elif media_content.get('rel') == 'alternative' or 'text' in media_content.get('type'):
continue
entry['story_content'] += """<br><br>
Download %(media_type)s: <a href="%(media_url)s">%(media_url)s</a>""" % {
'media_url': media_url,