Finished loading redis unreads into mongo. That's it for the risky part. Now for the cleanup.

This commit is contained in:
Samuel Clay 2012-07-16 19:13:32 -07:00
parent b5327c0ce6
commit 1c16662be3
2 changed files with 31 additions and 97 deletions

View file

@ -91,7 +91,12 @@ class UserSubscription(models.Model):
stories_key = 'F:%s' % (self.feed_id)
read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id)
unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id)
r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
if not r.exists(stories_key):
return []
elif not r.exists(read_stories_key):
unread_stories_key = stories_key
else:
r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
sorted_stories_key = 'zF:%s' % (self.feed_id)
unread_ranked_stories_key = 'zU:%s:%s' % (self.user_id, self.feed_id)
@ -104,14 +109,18 @@ class UserSubscription(models.Model):
return story_guids
@classmethod
def unread_feed_stories(cls, user_id, feed_ids):
def unread_feed_stories(cls, user_id, feed_ids, offset=0, limit=6):
r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)
if not isinstance(feed_ids, list):
feed_ids = [feed_ids]
unread_ranked_stories_keys = 'zU:%s' % (user_id)
r.delete(unread_ranked_stories_keys)
if offset and r.exists(unread_ranked_stories_keys):
story_guids = r.zrevrange(unread_ranked_stories_keys, offset, limit)
return story_guids
else:
r.delete(unread_ranked_stories_keys)
for feed_id in feed_ids:
us = cls.objects.get(user=user_id, feed=feed_id)
@ -119,7 +128,12 @@ class UserSubscription(models.Model):
stories_key = 'F:%s' % (feed_id)
read_stories_key = 'RS:%s:%s' % (user_id, feed_id)
unread_stories_key = 'U:%s:%s' % (user_id, feed_id)
r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
if not r.exists(stories_key):
continue
elif not r.exists(read_stories_key):
unread_stories_key = stories_key
else:
r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
sorted_stories_key = 'zF:%s' % (feed_id)
unread_ranked_stories_key = 'zU:%s:%s' % (user_id, feed_id)
@ -133,7 +147,7 @@ class UserSubscription(models.Model):
if story_guids:
r.zadd(unread_ranked_stories_keys, **dict(story_guids))
story_guids = r.zrevrange(unread_ranked_stories_keys, 0, 6)
story_guids = r.zrevrange(unread_ranked_stories_keys, offset, limit)
return story_guids

View file

@ -18,8 +18,6 @@ from django.core.validators import email_re
from django.core.mail import EmailMultiAlternatives
from django.contrib.sites.models import Site
from mongoengine.queryset import OperationError
from pymongo.helpers import OperationFailure
from operator import itemgetter
from apps.recommendations.models import RecommendedFeed
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
@ -42,8 +40,6 @@ from utils.user_functions import get_user, ajax_login_required
from utils.feed_functions import relative_timesince
from utils.story_functions import format_story_link_date__short
from utils.story_functions import format_story_link_date__long
from utils.story_functions import bunch
from utils.story_functions import story_score
from utils import log as logging
from utils.view_functions import get_argument_or_404, render_to, is_true
from utils.ratelimit import ratelimit
@ -572,100 +568,27 @@ def load_starred_stories(request):
@json.json_view
def load_river_stories(request):
limit = 18
offset = int(request.REQUEST.get('offset', 0))
start = time.time()
user = get_user(request)
feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
original_feed_ids = list(feed_ids)
page = int(request.REQUEST.get('page', 1))
read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
days_to_keep_unreads = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
limit = 6
start = time.time()
user = get_user(request)
feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
original_feed_ids = list(feed_ids)
page = int(request.REQUEST.get('page', 1))
now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
if not feed_ids:
logging.user(request, "~FCLoading empty river stories: page %s" % (page))
return dict(stories=[])
# Fetch all stories at and before the page number.
# Not a single page, because reading stories can move them up in the unread order.
# `read_stories_count` is an optimization, works best when all 25 stories before have been read.
offset = (page-1) * limit - read_stories_count
limit = page * limit - read_stories_count
offset = (page-1) * limit
limit = page * limit - 1
# Read stories to exclude
read_stories = MUserStory.objects(user_id=user.pk,
feed_id__in=feed_ids
).only('story_id').hint([('user_id', 1), ('feed_id', 1), ('story_id', 1)])
read_stories = [rs.story_id for rs in read_stories]
# Determine mark_as_read dates for all feeds to ignore all stories before this date.
feed_counts = {}
feed_last_reads = {}
for feed_id in feed_ids:
try:
usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
except UserSubscription.DoesNotExist:
continue
if not usersub: continue
feed_counts[feed_id] = (usersub.unread_count_negative * 1 +
usersub.unread_count_neutral * 10 +
usersub.unread_count_positive * 20)
feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40]
feed_ids = [f[0] for f in feed_counts]
feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
if feed_id in feed_last_reads])
feed_counts = dict(feed_counts)
# After excluding read stories, all that's left are stories
# past the mark_read_date. Everything returned is guaranteed to be unread.
mstories = MStory.objects(
story_guid__nin=read_stories,
story_feed_id__in=feed_ids,
# story_date__gte=start - days_to_keep_unreads
).map_reduce("""function() {
var d = feed_last_reads[this[~story_feed_id]];
if (this[~story_date].getTime()/1000 > d) {
emit(this[~id], this);
}
}""",
"""function(key, values) {
return values[0];
}""",
output='inline',
scope={
'feed_last_reads': feed_last_reads
}
)
try:
mstories = [story.value for story in mstories if story and story.value]
except OperationFailure, e:
return dict(error=str(e), code=-1)
mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, days_to_keep_unreads),
story_score(x, days_to_keep_unreads)))
# Prune the river to only include a set number of stories per feed
# story_feed_counts = defaultdict(int)
# mstories_pruned = []
# for story in mstories:
# print story['story_title'], story_feed_counts[story['story_feed_id']]
# if story_feed_counts[story['story_feed_id']] >= 3: continue
# mstories_pruned.append(story)
# story_feed_counts[story['story_feed_id']] += 1
stories = []
for i, story in enumerate(mstories):
if i < offset: continue
if i >= limit: break
stories.append(bunch(story))
stories = Feed.format_stories(stories)
story_ids = UserSubscription.unread_feed_stories(user.pk, feed_ids, offset=offset, limit=limit)
mstories = MStory.objects(id__in=story_ids)
stories = Feed.format_stories(mstories)
found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
# Find starred stories
# try:
if found_feed_ids:
starred_stories = MStarredStory.objects(
user_id=user.pk,
@ -675,9 +598,6 @@ def load_river_stories(request):
for story in starred_stories])
else:
starred_stories = {}
# except OperationFailure:
# logging.info(" ***> Starred stories failure")
# starred_stories = {}
# Intelligence classifiers for all feeds involved
if found_feed_ids: