diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py
index eca06e1e9..dc5b6cda0 100644
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@@ -771,19 +771,19 @@ class Feed(models.Model):
story_feed_id=self.pk,
).order_by('-story_date')
if stories.count() > trim_cutoff:
- if verbose:
- print 'Found %s stories in %s. Trimming to %s...' % (stories.count(), self, trim_cutoff)
+ logging.debug(' ---> [%-30s] Found %s stories. Trimming to %s...' % (self, stories.count(), trim_cutoff))
try:
story_trim_date = stories[trim_cutoff].story_date
except IndexError, e:
logging.debug(' ***> [%-30s] Error trimming feed: %s' % (self, e))
return
extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date)
+ extra_stories_count = extra_stories.count()
extra_stories.delete()
- # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count()
- userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date)
+ print "Deleted %s stories, %s left." % (extra_stories_count, MStory.objects(story_feed_id=self.pk).count())
+ userstories = MUserStory.objects(feed_id=self.pk, story_date__lte=story_trim_date)
if userstories.count():
- # print "Found %s user stories. Deleting..." % userstories.count()
+ print "Found %s user stories. Deleting..." % userstories.count()
userstories.delete()
def get_stories(self, offset=0, limit=25, force=False, slave=False):
@@ -874,6 +874,7 @@ class Feed(models.Model):
story_has_changed = False
story_pub_date = story.get('published')
story_published_now = story.get('published_now', False)
+ story_link = self.get_permalink(story)
start_date = story_pub_date - datetime.timedelta(hours=8)
end_date = story_pub_date + datetime.timedelta(hours=8)
@@ -883,23 +884,22 @@ class Feed(models.Model):
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
if (story_published_now or
(existing_story_pub_date > start_date and existing_story_pub_date < end_date)):
- story_link = self.get_permalink(story)
- if isinstance(existing_story.id, unicode):
- existing_story.story_guid = existing_story.id
- if story.get('guid') and story.get('guid') == existing_story.story_guid:
- story_in_system = existing_story
- elif story_link == existing_story.story_permalink:
- story_in_system = existing_story
- # Title distance + content distance, checking if story changed
- story_title_difference = levenshtein_distance(story.get('title'),
- existing_story.story_title)
if 'story_content_z' in existing_story:
existing_story_content = unicode(zlib.decompress(existing_story.story_content_z))
elif 'story_content' in existing_story:
existing_story_content = existing_story.story_content
else:
existing_story_content = u''
+
+ if isinstance(existing_story.id, unicode):
+ existing_story.story_guid = existing_story.id
+ if story.get('guid') and story.get('guid') == existing_story.story_guid:
+ story_in_system = existing_story
+
+ # Title distance + content distance, checking if story changed
+ story_title_difference = levenshtein_distance(story.get('title'),
+ existing_story.story_title)
seq = difflib.SequenceMatcher(None, story_content, existing_story_content)
@@ -923,7 +923,7 @@ class Feed(models.Model):
story_in_system = existing_story
story_has_changed = True
break
-
+
if story_in_system:
if story_content != existing_story_content:
story_has_changed = True
diff --git a/apps/rss_feeds/views.py b/apps/rss_feeds/views.py
index ff0194f65..e50f2689c 100644
--- a/apps/rss_feeds/views.py
+++ b/apps/rss_feeds/views.py
@@ -135,7 +135,7 @@ def exception_retry(request):
feed.fetched_once = True
feed.save()
- feed = feed.update(force=True, compute_scores=False)
+ feed = feed.update(force=True, compute_scores=False, verbose=True)
usersub = UserSubscription.objects.get(user=user, feed=feed)
usersub.calculate_feed_scores(silent=False)
diff --git a/utils/story_functions.py b/utils/story_functions.py
index e2769b47f..3359d7c4d 100644
--- a/utils/story_functions.py
+++ b/utils/story_functions.py
@@ -2,6 +2,7 @@ from django.utils.dateformat import DateFormat
import datetime
from django.utils.http import urlquote
from django.conf import settings
+from itertools import chain
def story_score(story, bottom_delta=None):
# A) Date - Assumes story is unread and within unread range
@@ -75,7 +76,7 @@ def pre_process_story(entry):
entry['story_content'] = entry.get('summary', '')
# Add each media enclosure as a Download link
- for media_content in entry.get('media_content', []):
+ for media_content in chain(entry.get('media_content', []), entry.get('links', [])):
media_url = media_content.get('url', '')
media_type = media_content.get('type', '')
if media_url and media_type and media_url not in entry['story_content']:
@@ -89,6 +90,8 @@ def pre_process_story(entry):
}
elif 'image' in media_type and media_url:
entry['story_content'] += """
""" % media_url
+ elif media_content.get('rel') == 'alternative' or 'text' in media_content.get('type'):
+ continue
entry['story_content'] += """
Download %(media_type)s: %(media_url)s""" % {
'media_url': media_url,