Refactoring river. Something's off about Oldest first.

This commit is contained in:
Samuel Clay 2013-06-27 17:21:01 -07:00
parent 2df28e5095
commit 86acc97e17
2 changed files with 65 additions and 62 deletions

View file

@ -17,7 +17,7 @@ from utils.feed_functions import add_object_to_folder
class UserSubscription(models.Model): class UserSubscription(models.Model):
""" """
A feed which a user has subscrubed to. Carries all of the cached information A feed which a user has subscribed to. Carries all of the cached information
about the subscription, including unread counts of the three primary scores. about the subscription, including unread counts of the three primary scores.
Also has a dirty flag (needs_unread_recalc) which means that the unread counts Also has a dirty flag (needs_unread_recalc) which means that the unread counts
@ -87,22 +87,31 @@ class UserSubscription(models.Model):
self.delete() self.delete()
@classmethod @classmethod
def story_hashes(cls, user_id, feed_ids=None, read_filter="unread", order="newest", def subs_for_feeds(cls, user_id, feed_ids=None, read_filter="unread"):
include_timestamps=False): usersubs = cls.objects
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) if read_filter == "unread":
usersubs = [] usersubs = usersubs.filter(Q(unread_count_neutral__gt=0) |
Q(unread_count_positive__gt=0))
if not feed_ids: if not feed_ids:
usersubs = UserSubscription.objects.filter(Q(unread_count_neutral__gt=0) | usersubs = usersubs.filter(user=user_id,
Q(unread_count_positive__gt=0), active=True).only('feed', 'mark_read_date')
user=user_id, active=True).only('feed', 'mark_read_date')
feed_ids = [sub.feed_id for sub in usersubs]
else: else:
usersubs = UserSubscription.objects.filter(Q(unread_count_neutral__gt=0) | usersubs = usersubs.filter(user=user_id,
Q(unread_count_positive__gt=0), active=True,
user=user_id, active=True, feed__in=feed_ids) feed__in=feed_ids).only('feed', 'mark_read_date')
return usersubs
@classmethod
def story_hashes(cls, user_id, feed_ids=None, usersubs=None, read_filter="unread", order="newest",
include_timestamps=False, store_key=None):
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
pipeline = r.pipeline() pipeline = r.pipeline()
if not usersubs:
usersubs = cls.subs_for_feeds(user_id, feed_ids=feed_ids, read_filter=read_filter)
feed_ids = [sub.feed_id for sub in usersubs]
read_dates = dict((us.feed_id, int(us.mark_read_date.strftime('%s'))) for us in usersubs) read_dates = dict((us.feed_id, int(us.mark_read_date.strftime('%s'))) for us in usersubs)
current_time = int(time.time() + 60*60*24) current_time = int(time.time() + 60*60*24)
unread_interval = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) unread_interval = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
@ -132,8 +141,13 @@ class UserSubscription(models.Model):
pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
byscorefunc(unread_ranked_stories_key, min_score, max_score, withscores=include_timestamps) byscorefunc(unread_ranked_stories_key, min_score, max_score, withscores=include_timestamps)
if store_key:
pipeline.zunionstore(store_key, [store_key, unread_ranked_stories_key])
results = pipeline.execute() results = pipeline.execute()
if store_key: return
story_hashes = {} story_hashes = {}
feed_counter = 0 feed_counter = 0
for hashes in results: for hashes in results:
@ -215,8 +229,8 @@ class UserSubscription(models.Model):
return [] return []
@classmethod @classmethod
def feed_stories(cls, user_id, feed_ids, offset=0, limit=6, order='newest', read_filter='all', def feed_stories(cls, user_id, feed_ids=None, offset=0, limit=6,
usersubs=None): order='newest', read_filter='all'):
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
if order == 'oldest': if order == 'oldest':
@ -224,41 +238,34 @@ class UserSubscription(models.Model):
else: else:
range_func = r.zrevrange range_func = r.zrevrange
if not isinstance(feed_ids, list):
feed_ids = [feed_ids]
ranked_stories_keys = 'zU:%s:feeds' % (user_id) ranked_stories_keys = 'zU:%s:feeds' % (user_id)
unread_ranked_stories_keys = 'zhU:%s:feeds' % (user_id) unread_ranked_stories_keys = 'zhU:%s:feeds' % (user_id)
unread_story_hashes = cache.get(unread_ranked_stories_keys) if offset and r.exists(ranked_stories_keys) and r.exists(unread_ranked_stories_keys):
if offset and r.exists(ranked_stories_keys) and unread_story_hashes:
story_hashes = range_func(ranked_stories_keys, offset, limit) story_hashes = range_func(ranked_stories_keys, offset, limit)
if read_filter == "unread":
unread_story_hashes = story_hashes
else:
unread_story_hashes = range_func(unread_ranked_stories_keys, offset, limit)
return story_hashes, unread_story_hashes return story_hashes, unread_story_hashes
else: else:
r.delete(ranked_stories_keys) r.delete(ranked_stories_keys)
cache.delete(unread_ranked_stories_keys) r.delete(unread_ranked_stories_keys)
if not usersubs and feed_ids:
usersubs = cls.objects.filter(user=user_id, feed__in=feed_ids)
if usersubs:
usersubs = dict((sub.feed_id, sub) for sub in usersubs)
unread_feed_story_hashes = {}
for feed_id in feed_ids:
if feed_id in usersubs:
us = usersubs[feed_id]
else:
continue
story_hashes = us.get_stories(offset=0, limit=200,
order=order, read_filter=read_filter,
withscores=True)
unread_feed_story_hashes[feed_id] = us.get_stories(read_filter='unread', limit=200,
hashes_only=True)
if story_hashes:
r.zadd(ranked_stories_keys, **dict(story_hashes))
cls.story_hashes(user_id, feed_ids=feed_ids,
read_filter=read_filter, order=order,
store_key=ranked_stories_keys)
story_hashes = range_func(ranked_stories_keys, offset, limit) story_hashes = range_func(ranked_stories_keys, offset, limit)
if read_filter == "unread":
unread_feed_story_hashes = story_hashes
else:
cls.story_hashes(user_id, feed_ids=feed_ids,
read_filter="unread", order=order,
store_key=unread_ranked_stories_keys)
unread_feed_story_hashes = range_func(unread_ranked_stories_keys, offset, limit)
r.expire(ranked_stories_keys, 60*60) r.expire(ranked_stories_keys, 60*60)
cache.set(unread_ranked_stories_keys, unread_feed_story_hashes, 24*60*60) r.expire(unread_ranked_stories_keys, 60*60)
return story_hashes, unread_feed_story_hashes return story_hashes, unread_feed_story_hashes

View file

@ -786,12 +786,6 @@ def load_river_stories__redis(request):
read_filter = request.REQUEST.get('read_filter', 'unread') read_filter = request.REQUEST.get('read_filter', 'unread')
now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
if not feed_ids and not story_hashes:
usersubs = UserSubscription.objects.filter(user=user, active=True)
feed_ids = [sub.feed_id for sub in usersubs]
else:
usersubs = UserSubscription.objects.filter(user=user, active=True, feed__in=feed_ids)
offset = (page-1) * limit offset = (page-1) * limit
limit = page * limit - 1 limit = page * limit - 1
story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
@ -803,15 +797,14 @@ def load_river_stories__redis(request):
story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(user.pk, feed_ids, story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(user.pk, feed_ids,
offset=offset, limit=limit, offset=offset, limit=limit,
order=order, order=order,
read_filter=read_filter, read_filter=read_filter)
usersubs=usersubs)
mstories = MStory.objects(story_hash__in=story_hashes).order_by(story_date_order) mstories = MStory.objects(story_hash__in=story_hashes).order_by(story_date_order)
stories = Feed.format_stories(mstories) stories = Feed.format_stories(mstories)
found_feed_ids = list(set([story['story_feed_id'] for story in stories])) found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk) stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk)
trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained] # trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained]
found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids)) # found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids))
found_trained_feed_ids = []
# Find starred stories # Find starred stories
if found_feed_ids: if found_feed_ids:
starred_stories = MStarredStory.objects( starred_stories = MStarredStory.objects(
@ -826,13 +819,13 @@ def load_river_stories__redis(request):
# Intelligence classifiers for all feeds involved # Intelligence classifiers for all feeds involved
if found_trained_feed_ids: if found_trained_feed_ids:
classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk,
feed_id__in=found_trained_feed_ids)) feed_id__in=found_trained_feed_ids))
classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk,
feed_id__in=found_trained_feed_ids)) feed_id__in=found_trained_feed_ids))
classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, classifier_titles = list(MClassifierTitle.objects(user_id=user.pk,
feed_id__in=found_trained_feed_ids)) feed_id__in=found_trained_feed_ids))
classifier_tags = list(MClassifierTag.objects(user_id=user.pk, classifier_tags = list(MClassifierTag.objects(user_id=user.pk,
feed_id__in=found_trained_feed_ids)) feed_id__in=found_trained_feed_ids))
else: else:
classifier_feeds = [] classifier_feeds = []
classifier_authors = [] classifier_authors = []
@ -849,7 +842,7 @@ def load_river_stories__redis(request):
for story in stories: for story in stories:
story['read_status'] = 0 story['read_status'] = 0
if read_filter == 'all': if read_filter == 'all':
if story['story_hash'] not in unread_feed_story_hashes.get(story['story_feed_id'], []): if unread_feed_story_hashes and story['story_hash'] not in unread_feed_story_hashes:
story['read_status'] = 1 story['read_status'] = 1
story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['short_parsed_date'] = format_story_link_date__short(story_date, now)
@ -922,11 +915,14 @@ def unread_story_hashes__old(request):
@json.json_view @json.json_view
def unread_story_hashes(request): def unread_story_hashes(request):
user = get_user(request) user = get_user(request)
feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feed_id') if feed_id] feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feed_id') if feed_id]
include_timestamps = is_true(request.REQUEST.get('include_timestamps', False)) include_timestamps = is_true(request.REQUEST.get('include_timestamps', False))
order = request.REQUEST.get('order', 'newest')
read_filter = request.REQUEST.get('read_filter', 'unread')
story_hashes = UserSubscription.story_hashes(user.pk, feed_ids=feed_ids, story_hashes = UserSubscription.story_hashes(user.pk, feed_ids=feed_ids,
order=order, read_filter=read_filter,
include_timestamps=include_timestamps) include_timestamps=include_timestamps)
logging.user(request, "~FYLoading ~FCunread story hashes~FY: ~SB%s feeds~SN (%s story hashes)" % logging.user(request, "~FYLoading ~FCunread story hashes~FY: ~SB%s feeds~SN (%s story hashes)" %