Finished loading redis unreads into mongo. That's it for the risky part. Now for the cleanup.

2025-09-18 21:50:56 +00:00 · 2012-07-16 19:13:32 -07:00 · 2012-07-16 19:13:32 -07:00 · 1c16662be3
commit 1c16662be3
parent b5327c0ce6
2 changed files with 31 additions and 97 deletions
--- a/apps/reader/models.py
+++ b/apps/reader/models.py
@ -91,7 +91,12 @@ class UserSubscription(models.Model):
        stories_key         = 'F:%s' % (self.feed_id)
        read_stories_key    = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key  = 'U:%s:%s' % (self.user_id, self.feed_id)
-        r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
+        if not r.exists(stories_key):
+            return []
+        elif not r.exists(read_stories_key):
+            unread_stories_key = stories_key
+        else:
+            r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
        
        sorted_stories_key          = 'zF:%s' % (self.feed_id)
        unread_ranked_stories_key   = 'zU:%s:%s' % (self.user_id, self.feed_id)
@ -104,14 +109,18 @@ class UserSubscription(models.Model):
        return story_guids
        
    @classmethod
-    def unread_feed_stories(cls, user_id, feed_ids):
+    def unread_feed_stories(cls, user_id, feed_ids, offset=0, limit=6):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)

        if not isinstance(feed_ids, list):
            feed_ids = [feed_ids]

        unread_ranked_stories_keys  = 'zU:%s' % (user_id)
-        r.delete(unread_ranked_stories_keys)
+        if offset and r.exists(unread_ranked_stories_keys):
+            story_guids = r.zrevrange(unread_ranked_stories_keys, offset, limit)
+            return story_guids
+        else:
+            r.delete(unread_ranked_stories_keys)
        
        for feed_id in feed_ids:
            us = cls.objects.get(user=user_id, feed=feed_id)
@ -119,7 +128,12 @@ class UserSubscription(models.Model):
            stories_key         = 'F:%s' % (feed_id)
            read_stories_key    = 'RS:%s:%s' % (user_id, feed_id)
            unread_stories_key  = 'U:%s:%s' % (user_id, feed_id)
-            r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
+            if not r.exists(stories_key):
+                continue
+            elif not r.exists(read_stories_key):
+                unread_stories_key = stories_key
+            else:
+                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
        
            sorted_stories_key          = 'zF:%s' % (feed_id)
            unread_ranked_stories_key   = 'zU:%s:%s' % (user_id, feed_id)
@ -133,7 +147,7 @@ class UserSubscription(models.Model):
            if story_guids:
                r.zadd(unread_ranked_stories_keys, **dict(story_guids))
            
-        story_guids = r.zrevrange(unread_ranked_stories_keys, 0, 6)
+        story_guids = r.zrevrange(unread_ranked_stories_keys, offset, limit)
        
        return story_guids
        
--- a/apps/reader/views.py
+++ b/apps/reader/views.py
@ -18,8 +18,6 @@ from django.core.validators import email_re
 from django.core.mail import EmailMultiAlternatives
 from django.contrib.sites.models import Site
 from mongoengine.queryset import OperationError
-from pymongo.helpers import OperationFailure
-from operator import itemgetter
 from apps.recommendations.models import RecommendedFeed
 from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
 from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
@ -42,8 +40,6 @@ from utils.user_functions import get_user, ajax_login_required
 from utils.feed_functions import relative_timesince
 from utils.story_functions import format_story_link_date__short
 from utils.story_functions import format_story_link_date__long
-from utils.story_functions import bunch
-from utils.story_functions import story_score
 from utils import log as logging
 from utils.view_functions import get_argument_or_404, render_to, is_true
 from utils.ratelimit import ratelimit
@ -572,100 +568,27 @@ def load_starred_stories(request):

@json.json_view
 def load_river_stories(request):
-    limit                = 18
-    offset               = int(request.REQUEST.get('offset', 0))
-    start                = time.time()
-    user                 = get_user(request)
-    feed_ids             = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
-    original_feed_ids    = list(feed_ids)
-    page                 = int(request.REQUEST.get('page', 1))
-    read_stories_count   = int(request.REQUEST.get('read_stories_count', 0))
-    days_to_keep_unreads = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
-    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
+    limit             = 6
+    start             = time.time()
+    user              = get_user(request)
+    feed_ids          = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
+    original_feed_ids = list(feed_ids)
+    page              = int(request.REQUEST.get('page', 1))
+    now               = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)

    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
-    # Fetch all stories at and before the page number.
-    # Not a single page, because reading stories can move them up in the unread order.
-    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
-    offset = (page-1) * limit - read_stories_count
-    limit = page * limit - read_stories_count
+    offset = (page-1) * limit
+    limit = page * limit - 1
    
-    # Read stories to exclude
-    read_stories = MUserStory.objects(user_id=user.pk, 
-                                      feed_id__in=feed_ids
-                                      ).only('story_id').hint([('user_id', 1), ('feed_id', 1), ('story_id', 1)])
-    read_stories = [rs.story_id for rs in read_stories]
-    
-    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
-    feed_counts     = {}
-    feed_last_reads = {}
-    for feed_id in feed_ids:
-        try:
-            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
-        except UserSubscription.DoesNotExist:
-            continue
-        if not usersub: continue
-        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
-                                usersub.unread_count_neutral * 10 +
-                                usersub.unread_count_positive * 20)
-        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
-
-    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40]
-    feed_ids = [f[0] for f in feed_counts]
-    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
-                            if feed_id in feed_last_reads])
-    feed_counts = dict(feed_counts)
-
-    # After excluding read stories, all that's left are stories 
-    # past the mark_read_date. Everything returned is guaranteed to be unread.
-    mstories = MStory.objects(
-        story_guid__nin=read_stories,
-        story_feed_id__in=feed_ids,
-        # story_date__gte=start - days_to_keep_unreads
-    ).map_reduce("""function() {
-            var d = feed_last_reads[this[~story_feed_id]];
-            if (this[~story_date].getTime()/1000 > d) {
-                emit(this[~id], this);
-            }
-        }""",
-        """function(key, values) {
-            return values[0];
-        }""",
-        output='inline',
-        scope={
-            'feed_last_reads': feed_last_reads
-        }
-    )
-    try:
-        mstories = [story.value for story in mstories if story and story.value]
-    except OperationFailure, e:
-        return dict(error=str(e), code=-1)
-
-    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, days_to_keep_unreads), 
-                                                     story_score(x, days_to_keep_unreads)))
-
-    # Prune the river to only include a set number of stories per feed
-    # story_feed_counts = defaultdict(int)
-    # mstories_pruned = []
-    # for story in mstories:
-    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
-    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
-    #     mstories_pruned.append(story)
-    #     story_feed_counts[story['story_feed_id']] += 1
-    
-    stories = []
-    for i, story in enumerate(mstories):
-        if i < offset: continue
-        if i >= limit: break
-        stories.append(bunch(story))
-    stories = Feed.format_stories(stories)
+    story_ids = UserSubscription.unread_feed_stories(user.pk, feed_ids, offset=offset, limit=limit)
+    mstories = MStory.objects(id__in=story_ids)
+    stories = Feed.format_stories(mstories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
-    # try:
    if found_feed_ids:
        starred_stories = MStarredStory.objects(
            user_id=user.pk,
@ -675,9 +598,6 @@ def load_river_stories(request):
                                for story in starred_stories])
    else:
        starred_stories = {}
-    # except OperationFailure:
-    #     logging.info(" ***> Starred stories failure")
-    #     starred_stories = {}
    
    # Intelligence classifiers for all feeds involved
    if found_feed_ids: