diff --git a/apps/reader/models.py b/apps/reader/models.py index 0a98e9919..f6294910a 100644 --- a/apps/reader/models.py +++ b/apps/reader/models.py @@ -2,6 +2,7 @@ import datetime import time import redis import hashlib +import re import mongoengine as mongo from utils import log as logging from utils import json_functions as json @@ -86,30 +87,8 @@ class UserSubscription(models.Model): break else: self.delete() - - @classmethod - def sync_all_redis(cls, user_id, skip_feed=False): - us = cls.objects.filter(user=user_id) - - for sub in us: - print " ---> Syncing usersub: %s" % sub - sub.sync_redis(skip_feed=skip_feed) - def sync_redis(self, skip_feed=False): - r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) - UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD+1) - - userstories = MUserStory.objects.filter(feed_id=self.feed_id, user_id=self.user_id, - read_date__gte=UNREAD_CUTOFF) - total = userstories.count() - logging.debug(" ---> ~SN~FMSyncing ~SB%s~SN stories (%s)" % (total, self)) - - pipeline = r.pipeline() - for userstory in userstories: - userstory.sync_redis(r=pipeline) - pipeline.execute() - - def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False): + def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False, hashes_only=False): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) ignore_user_stories = False @@ -117,7 +96,8 @@ class UserSubscription(models.Model): read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id) unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id) - unread_ranked_stories_key = 'zU:%s:%s' % (self.user_id, self.feed_id) + unread_ranked_stories_key = 'z%sU:%s:%s' % ('h' if hashes_only else '', + self.user_id, self.feed_id) if offset and not withscores and r.exists(unread_ranked_stories_key): pass else: @@ -167,7 +147,7 @@ class UserSubscription(models.Model): if not ignore_user_stories: r.delete(unread_stories_key) - if withscores: + if withscores or hashes_only: return story_ids elif story_ids: story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') @@ -189,13 +169,17 @@ class UserSubscription(models.Model): if not isinstance(feed_ids, list): feed_ids = [feed_ids] - unread_ranked_stories_keys = 'zU:%s:feeds' % (user_id) - if offset and r.exists(unread_ranked_stories_keys): - story_hashes = range_func(unread_ranked_stories_keys, offset, limit) - return story_hashes + ranked_stories_keys = 'zU:%s:feeds' % (user_id) + unread_ranked_stories_keys = 'zhU:%s:feeds' % (user_id) + unread_story_hashes = cache.get(unread_ranked_stories_keys) + if offset and r.exists(ranked_stories_keys) and unread_story_hashes: + story_hashes = range_func(ranked_stories_keys, offset, limit) + return story_hashes, unread_story_hashes else: - r.delete(unread_ranked_stories_keys) - + r.delete(ranked_stories_keys) + cache.delete(unread_ranked_stories_keys) + + unread_feed_story_hashes = {} for feed_id in feed_ids: try: us = cls.objects.get(user=user_id, feed=feed_id) @@ -204,14 +188,16 @@ class UserSubscription(models.Model): story_hashes = us.get_stories(offset=0, limit=200, order=order, read_filter=read_filter, withscores=True) - + unread_feed_story_hashes[feed_id] = us.get_stories(read_filter='unread', limit=200, + hashes_only=True) if story_hashes: - r.zadd(unread_ranked_stories_keys, **dict(story_hashes)) + r.zadd(ranked_stories_keys, **dict(story_hashes)) - story_hashes = range_func(unread_ranked_stories_keys, offset, limit) - r.expire(unread_ranked_stories_keys, 24*60*60) + story_hashes = range_func(ranked_stories_keys, offset, limit) + r.expire(ranked_stories_keys, 60*60) + cache.set(unread_ranked_stories_keys, unread_feed_story_hashes, 24*60*60) - return story_hashes + return story_hashes, unread_feed_story_hashes @classmethod def add_subscription(cls, user, feed_address, folder=None, bookmarklet=False, auto_active=True, @@ -336,10 +322,6 @@ class UserSubscription(models.Model): self.oldest_unread_story_date = now self.needs_unread_recalc = False - # No longer removing old user read stories, since they're needed for social, - # and they get cleaned up automatically when new stories come in. - # MUserStory.delete_old_stories(self.user_id, self.feed_id) - self.save() def mark_story_ids_as_read(self, story_ids, request=None): @@ -368,7 +350,8 @@ class UserSubscription(models.Model): 'story': story, 'story_date': story.story_date, }) - + RUserStory.mark_read(self.user_id, self.feed_id, story_id) + return data def calculate_feed_scores(self, silent=False, stories=None, force=False): @@ -381,12 +364,12 @@ class UserSubscription(models.Model): # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return - if not self.feed.fetched_once: - if not silent: - logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) - self.needs_unread_recalc = False - self.save() - return + # if not self.feed.fetched_once: + # if not silent: + # logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) + # self.needs_unread_recalc = False + # self.save() + # return feed_scores = dict(negative=0, neutral=0, positive=0) @@ -400,23 +383,18 @@ class UserSubscription(models.Model): if not stories: stories = cache.get('S:%s' % self.feed_id) + unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True) + if not stories: - stories_db = MStory.objects(story_feed_id=self.feed_id, - story_date__gte=date_delta) + stories_db = MStory.objects(story_hash__in=unread_story_hashes) stories = Feed.format_stories(stories_db, self.feed_id) - story_ids = [s['id'] for s in stories] - read_stories = MUserStory.objects(user_id=self.user_id, - feed_id=self.feed_id, - story_id__in=story_ids) - read_stories_ids = [us.story_id for us in read_stories] - oldest_unread_story_date = now unread_stories = [] for story in stories: if story['story_date'] < date_delta: continue - if story['id'] not in read_stories_ids: + if story['story_hash'] in unread_story_hashes: unread_stories.append(story) if story['story_date'] < oldest_unread_story_date: oldest_unread_story_date = story['story_date'] @@ -488,22 +466,11 @@ class UserSubscription(models.Model): logging.info(" *** ---> UserSubscriptionFolders error: %s" % e) return - # Switch to original feed for the user subscription logging.info(" ===> %s " % self.user) - self.feed = new_feed - self.needs_unread_recalc = True - try: - new_sub = UserSubscription.objects.get(user=self.user, feed=new_feed) - except UserSubscription.DoesNotExist: - self.save() - user_sub_folders.rewrite_feed(new_feed, old_feed) - else: - # except (IntegrityError, OperationError): - logging.info(" !!!!> %s already subscribed" % self.user) - self.delete() - return - + # Switch read stories + stories = RUserStory.switch_feed(user_id=self.user_id, old_feed_id=old_feed.pk, + new_feed_id=new_feed.pk) user_stories = MUserStory.objects(user_id=self.user_id, feed_id=old_feed.pk) if user_stories.count() > 0: logging.info(" ---> %s read stories" % user_stories.count()) @@ -528,7 +495,7 @@ class UserSubscription(models.Model): user_story.delete() else: user_story.delete() - + def switch_feed_for_classifier(model): duplicates = model.objects(feed_id=old_feed.pk, user_id=self.user_id) if duplicates.count(): @@ -548,6 +515,20 @@ class UserSubscription(models.Model): switch_feed_for_classifier(MClassifierAuthor) switch_feed_for_classifier(MClassifierFeed) switch_feed_for_classifier(MClassifierTag) + + # Switch to original feed for the user subscription + self.feed = new_feed + self.needs_unread_recalc = True + try: + new_sub = UserSubscription.objects.get(user=self.user, feed=new_feed) + except UserSubscription.DoesNotExist: + self.save() + user_sub_folders.rewrite_feed(new_feed, old_feed) + else: + # except (IntegrityError, OperationError): + logging.info(" !!!!> %s already subscribed" % self.user) + self.delete() + return @classmethod def collect_orphan_feeds(cls, user): @@ -583,6 +564,85 @@ class UserSubscription(models.Model): usf.save() +class RUserStory: + + RE_STORY_HASH = re.compile(r"^(\d{1,10}):(\w{6})$") + + @classmethod + def story_hash(cls, story_id, story_feed_id): + if not cls.RE_STORY_HASH.match(story_id): + story, _ = MStory.find_story(story_feed_id=story_feed_id, story_id=story_id) + if not story: return + story_id = story.story_hash + + return story_id + + @classmethod + def split_story_hash(cls, story_hash): + matches = cls.RE_STORY_HASH.match(story_hash) + if matches: + groups = matches.groups() + return groups[0], groups[1] + return None, None + + @classmethod + def story_hashes(cls, story_ids): + story_hashes = [] + for story_id in story_ids: + story_hash = cls.story_hash(story_id) + if not story_hash: continue + story_hashes.append(story_hash) + + return story_hashes + + @classmethod + def mark_read(cls, user_id, story_feed_id, story_hash, r=None): + if not r: + r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) + + story_hash = cls.story_hash(story_hash, story_feed_id=story_feed_id) + + if not story_hash: return + + all_read_stories_key = 'RS:%s' % (user_id) + r.sadd(all_read_stories_key, story_hash) + r.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60) + + read_story_key = 'RS:%s:%s' % (user_id, story_feed_id) + r.sadd(read_story_key, story_hash) + r.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60) + + @staticmethod + def mark_unread(user_id, story_feed_id, story_hash): + r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) + + r.srem('RS:%s' % user_id, story_hash) + r.srem('RS:%s:%s' % (user_id, story_feed_id), story_hash) + + @staticmethod + def get_stories(user_id, feed_id, r=None): + if not r: + r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) + story_hashes = r.smembers("RS:%s:%s" % (user_id, feed_id)) + return story_hashes + + @classmethod + def switch_feed(cls, user_id, old_feed_id, new_feed_id): + r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) + p = r.pipeline() + story_hashes = cls.get_stories(user_id, old_feed_id, r=r) + + for story_hash in story_hashes: + _, hash_story = cls.split_story_hash(story_hash) + new_story_hash = "%s:%s" % (new_feed_id, hash_story) + p.sadd("RS:%s:%s" % (user_id, new_feed_id), new_story_hash) + + p.execute() + + if len(story_hashes) > 0: + logging.info(" ---> %s read stories" % len(story_hashes)) + + class MUserStory(mongo.Document): """ Stories read by the user. These are deleted as the mark_read_date for the @@ -614,12 +674,12 @@ class MUserStory(mongo.Document): def save(self, *args, **kwargs): self.story_hash = self.feed_guid_hash - self.sync_redis() + # self.sync_redis() super(MUserStory, self).save(*args, **kwargs) def delete(self, *args, **kwargs): - self.remove_from_redis() + # self.remove_from_redis() super(MUserStory, self).delete(*args, **kwargs) @@ -655,44 +715,22 @@ class MUserStory(mongo.Document): cls.objects(user_id=user_id, feed_id=feed_id, read_date__lte=mark_read_date).delete() - @property - def story_db_id(self): - if self.story: - return self.story.id - elif self.found_story: - if '_ref' in self.found_story: - return self.found_story['_ref'].id - elif hasattr(self.found_story, 'id'): - return self.found_story.id - - story, found_original = MStory.find_story(self.feed_id, self.story_id) - if story: - if found_original: - self.story = story - else: - self.found_story = story - self.save() - - return story.id - def sync_redis(self, r=None): if not r: r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) - if self.story_db_id: - all_read_stories_key = 'RS:%s' % (self.user_id) - r.sadd(all_read_stories_key, self.feed_guid_hash) - r.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60) + all_read_stories_key = 'RS:%s' % (self.user_id) + r.sadd(all_read_stories_key, self.feed_guid_hash) + r.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60) - read_story_key = 'RS:%s:%s' % (self.user_id, self.feed_id) - r.sadd(read_story_key, self.feed_guid_hash) - r.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60) + read_story_key = 'RS:%s:%s' % (self.user_id, self.feed_id) + r.sadd(read_story_key, self.feed_guid_hash) + r.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60) def remove_from_redis(self): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) - if self.story_db_id: - r.srem('RS:%s' % self.user_id, self.feed_guid_hash) - r.srem('RS:%s:%s' % (self.user_id, self.feed_id), self.feed_guid_hash) + r.srem('RS:%s' % self.user_id, self.feed_guid_hash) + r.srem('RS:%s:%s' % (self.user_id, self.feed_id), self.feed_guid_hash) @classmethod def sync_all_redis(cls, user_id=None, feed_id=None, force=False): @@ -825,7 +863,6 @@ class UserSubscriptionFolders(models.Model): return if user_sub: user_sub.delete() - MUserStory.objects(user_id=self.user_id, feed_id=feed_id).delete() def delete_folder(self, folder_to_delete, in_folder, feed_ids_in_folder, commit_delete=True): def _find_folder_in_folders(old_folders, folder_name, feeds_to_delete, deleted_folder=None): diff --git a/apps/reader/views.py b/apps/reader/views.py index 3553f48d0..67b4f0eff 100644 --- a/apps/reader/views.py +++ b/apps/reader/views.py @@ -27,7 +27,7 @@ from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds from apps.analyzer.models import apply_classifier_authors, apply_classifier_tags from apps.analyzer.models import get_classifiers_for_user, sort_classifiers_by_feed from apps.profile.models import Profile -from apps.reader.models import UserSubscription, UserSubscriptionFolders, MUserStory, Feature +from apps.reader.models import UserSubscription, UserSubscriptionFolders, MUserStory, RUserStory, Feature from apps.reader.forms import SignupForm, LoginForm, FeatureForm from apps.rss_feeds.models import MFeedIcon from apps.statistics.models import MStatistics @@ -495,7 +495,6 @@ def load_single_feed(request, feed_id): include_story_content = is_true(request.REQUEST.get('include_story_content', True)) dupe_feed_id = None - userstories_db = None user_profiles = [] now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) if page: offset = limit * (page-1) @@ -543,27 +542,22 @@ def load_single_feed(request, feed_id): classifier_tags=classifier_tags) checkpoint3 = time.time() - userstories = [] + unread_story_hashes = [] if stories: - story_ids = [story['id'] for story in stories] - userstories_db = MUserStory.objects(user_id=user.pk, - feed_id=feed.pk, - story_id__in=story_ids - ).only('story_id').hint([('user_id', 1), - ('feed_id', 1), - ('story_id', 1)]) + if read_filter == 'all' and usersub: + unread_story_hashes = usersub.get_stories(read_filter='unread', limit=500, hashes_only=True) + story_hashes = [story['story_hash'] for story in stories] starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed.pk, - story_guid__in=story_ids - ).only('story_guid', 'starred_date') + story_hash__in=story_hashes)\ + .only('story_hash', 'starred_date') shared_stories = MSharedStory.objects(user_id=user.pk, story_feed_id=feed_id, - story_guid__in=story_ids - ).only('story_guid', 'shared_date', 'comments') - starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) - shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) + story_hash__in=story_hashes)\ + .only('story_guid', 'shared_date', 'comments') + starred_stories = dict([(story.story_hash, story.starred_date) for story in starred_stories]) + shared_stories = dict([(story.story_hash, dict(shared_date=story.shared_date, comments=story.comments)) for story in shared_stories]) - userstories = set(us.story_id for us in userstories_db) checkpoint4 = time.time() @@ -574,21 +568,22 @@ def load_single_feed(request, feed_id): story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: - if story['id'] in userstories: - story['read_status'] = 1 - elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: - story['read_status'] = 1 - elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: + story['read_status'] = 1 + if read_filter == 'unread' and usersub: story['read_status'] = 0 - if story['id'] in starred_stories: + elif read_filter == 'all' and usersub: + story['read_status'] = story['story_hash'] not in unread_story_hashes + if story['story_hash'] in starred_stories: story['starred'] = True - starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) + starred_date = localtime_for_timezone(starred_stories[story['story_hash']], + user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) - if story['id'] in shared_stories: + if story['story_hash'] in shared_stories: story['shared'] = True - shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'], user.profile.timezone) + shared_date = localtime_for_timezone(shared_stories[story['story_hash']]['shared_date'], + user.profile.timezone) story['shared_date'] = format_story_link_date__long(shared_date, now) - story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) + story['shared_comments'] = strip_tags(shared_stories[story['story_hash']]['comments']) else: story['read_status'] = 1 story['intelligence'] = { @@ -613,8 +608,8 @@ def load_single_feed(request, feed_id): diff4 = checkpoint4-start timediff = time.time()-start last_update = relative_timesince(feed.last_update) - time_breakdown = ("~SN~FR(~SB%.4s/%.4s/%.4s/%.4s(%s)~SN)" % ( - diff1, diff2, diff3, diff4, userstories_db and userstories_db.count() or '~SN0~SB') + time_breakdown = ("~SN~FR(~SB%.4s/%.4s/%.4s/%.4s~SN)" % ( + diff1, diff2, diff3, diff4) if timediff > 1 else "") logging.user(request, "~FYLoading feed: ~SB%s%s (%s/%s) %s" % ( feed.feed_title[:22], ('~SN/p%s' % page) if page > 1 else '', order, read_filter, time_breakdown)) @@ -709,7 +704,7 @@ def load_starred_stories(request): stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True) - story_ids = [story['id'] for story in stories] + story_hashes = [story['story_hash'] for story in stories] story_feed_ids = list(set(s['story_feed_id'] for s in stories)) usersub_ids = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids).values('feed__pk') usersub_ids = [us['feed__pk'] for us in usersub_ids] @@ -717,9 +712,10 @@ def load_starred_stories(request): unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = dict((feed.pk, feed.canonical(include_favicon=False)) for feed in unsub_feeds) shared_stories = MSharedStory.objects(user_id=user.pk, - story_guid__in=story_ids - ).only('story_guid', 'shared_date', 'comments') - shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) + story_hash__in=story_hashes)\ + .only('story_hash', 'shared_date', 'comments') + shared_stories = dict([(story.story_hash, dict(shared_date=story.shared_date, + comments=story.comments)) for story in shared_stories]) for story in stories: @@ -736,9 +732,9 @@ def load_starred_stories(request): 'tags': 0, 'title': 0, } - if story['id'] in shared_stories: + if story['story_hash'] in shared_stories: story['shared'] = True - story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) + story['shared_comments'] = strip_tags(shared_stories[story['story_hash']]['comments']) logging.user(request, "~FCLoading starred stories: ~SB%s stories" % (len(stories))) @@ -759,8 +755,6 @@ def load_river_stories__redis(request): order = request.REQUEST.get('order', 'newest') read_filter = request.REQUEST.get('read_filter', 'unread') now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) - UNREAD_CUTOFF = (datetime.datetime.utcnow() - - datetime.timedelta(days=settings.DAYS_OF_UNREAD)) if not feed_ids: usersubs = UserSubscription.objects.filter(user=user, active=True) @@ -769,41 +763,25 @@ def load_river_stories__redis(request): offset = (page-1) * limit limit = page * limit - 1 - story_hashes = UserSubscription.feed_stories(user.pk, feed_ids, offset=offset, limit=limit, - order=order, read_filter=read_filter) + story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(user.pk, feed_ids, + offset=offset, limit=limit, + order=order, + read_filter=read_filter) story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') mstories = MStory.objects(story_hash__in=story_hashes).order_by(story_date_order) stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk) - feed_marked_read_dates = None - if read_filter == 'all': - feed_marked_read_dates = dict((us.feed_id, us.mark_read_date) - for us in UserSubscription.objects.filter(user=user, - feed__in=found_feed_ids).only( - 'feed', 'mark_read_date')) # Find starred stories if found_feed_ids: - if read_filter == 'all': - story_ids = [story['id'] for story in stories] - userstories_db = MUserStory.objects(user_id=user.pk, - feed_id__in=found_feed_ids, - story_id__in=story_ids - ).only('story_id').hint([('user_id', 1), - ('feed_id', 1), - ('story_id', 1)]) - userstories = set(us.story_id for us in userstories_db) - else: - userstories = [] starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids - ).only('story_guid', 'starred_date') - starred_stories = dict([(story.story_guid, story.starred_date) + ).only('story_hash', 'starred_date') + starred_stories = dict([(story.story_hash, story.starred_date) for story in starred_stories]) else: - userstories = [] starred_stories = {} # Intelligence classifiers for all feeds involved @@ -832,18 +810,15 @@ def load_river_stories__redis(request): for story in stories: story['read_status'] = 0 if read_filter == 'all': - if story['id'] in userstories: - story['read_status'] = 1 - elif story['story_date'] < feed_marked_read_dates[story['story_feed_id']]: - story['read_status'] = 1 - elif story['story_date'] < UNREAD_CUTOFF: + if story['story_hash'] not in unread_feed_story_hashes.get(story['story_feed_id'], []): story['read_status'] = 1 story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) - if story['id'] in starred_stories: + if story['story_hash'] in starred_stories: story['starred'] = True - starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) + starred_date = localtime_for_timezone(starred_stories[story['story_hash']], + user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id']), @@ -1042,7 +1017,7 @@ def mark_story_as_unread(request): story_guid_hash=story.guid_hash) dirty_count = social_subs and social_subs.count() dirty_count = ("(%s social_subs)" % dirty_count) if dirty_count else "" - + try: m = MUserStory.objects.get(user_id=request.user.pk, feed_id=feed_id, story_id=story_id) m.delete() @@ -1051,6 +1026,7 @@ def mark_story_as_unread(request): logging.user(request, "~SB~FRCouldn't find read story to mark as unread.") else: data['code'] = -1 + RUserStory.mark_unread(user_id=request.user.pk, story_feed_id=feed_id, story_hash=story.story_hash) r = redis.Redis(connection_pool=settings.REDIS_POOL) r.publish(request.user.username, 'feed:%s' % feed_id) @@ -1062,6 +1038,7 @@ def mark_story_as_unread(request): @ajax_login_required @json.json_view def mark_feed_as_read(request): + r = redis.Redis(connection_pool=settings.REDIS_POOL) feed_ids = request.REQUEST.getlist('feed_id') multiple = len(feed_ids) > 1 code = 1 @@ -1087,6 +1064,7 @@ def mark_feed_as_read(request): try: sub.mark_feed_read() + r.publish(request.user.username, 'feed:%s' % feed_id) except IntegrityError: code = -1 diff --git a/apps/social/models.py b/apps/social/models.py index b6d1730dc..cd5e9d335 100644 --- a/apps/social/models.py +++ b/apps/social/models.py @@ -17,7 +17,8 @@ from django.core.urlresolvers import reverse from django.template.loader import render_to_string from django.template.defaultfilters import slugify from django.core.mail import EmailMultiAlternatives -from apps.reader.models import UserSubscription, MUserStory +from django.core.cache import cache +from apps.reader.models import UserSubscription, MUserStory, RUserStory from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags from apps.rss_feeds.models import Feed, MStory @@ -852,7 +853,7 @@ class MSocialSubscription(mongo.Document): } def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', - withscores=False, everything_unread=False): + withscores=False, hashes_only=False): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) ignore_user_stories = False @@ -862,14 +863,15 @@ class MSocialSubscription(mongo.Document): if not r.exists(stories_key): return [] - elif everything_unread or read_filter != 'unread' or not r.exists(read_stories_key): + elif read_filter != 'unread' or not r.exists(read_stories_key): ignore_user_stories = True unread_stories_key = stories_key else: r.sdiffstore(unread_stories_key, stories_key, read_stories_key) sorted_stories_key = 'zB:%s' % (self.subscription_user_id) - unread_ranked_stories_key = 'zUB:%s:%s' % (self.user_id, self.subscription_user_id) + unread_ranked_stories_key = 'z%sUB:%s:%s' % ('h' if hashes_only else '', + self.user_id, self.subscription_user_id) r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) current_time = int(time.time() + 60*60*24) @@ -896,7 +898,7 @@ class MSocialSubscription(mongo.Document): return story_ids @classmethod - def feed_stories(cls, user_id, social_user_ids, offset=0, limit=6, order='newest', read_filter='all', relative_user_id=None, everything_unread=False, cache=True): + def feed_stories(cls, user_id, social_user_ids, offset=0, limit=6, order='newest', read_filter='all', relative_user_id=None, cache=True): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) if not relative_user_id: @@ -910,31 +912,41 @@ class MSocialSubscription(mongo.Document): if not isinstance(social_user_ids, list): social_user_ids = [social_user_ids] - unread_ranked_stories_keys = 'zU:%s:social' % (user_id) - if offset and r.exists(unread_ranked_stories_keys) and cache: - story_hashes = range_func(unread_ranked_stories_keys, offset, offset+limit, withscores=True) + ranked_stories_keys = 'zU:%s:social' % (user_id) + read_ranked_stories_keys = 'zhU:%s:social' % (user_id) + if (offset and cache and + r.exists(ranked_stories_keys) and + r.exists(read_ranked_stories_keys)): + story_hashes = range_func(ranked_stories_keys, offset, offset+limit, withscores=True) + read_story_hashes = range_func(read_ranked_stories_keys, 0, -1) if story_hashes: - return zip(*story_hashes) + story_hashes, story_dates = zip(*story_hashes) + return story_hashes, story_dates, read_story_hashes else: - return [], [] + return [], [], [] else: - r.delete(unread_ranked_stories_keys) - + r.delete(ranked_stories_keys) + r.delete(read_ranked_stories_keys) + for social_user_id in social_user_ids: us = cls.objects.get(user_id=relative_user_id, subscription_user_id=social_user_id) story_hashes = us.get_stories(offset=0, limit=100, order=order, read_filter=read_filter, - withscores=True, everything_unread=everything_unread) + withscores=True) if story_hashes: - r.zadd(unread_ranked_stories_keys, **dict(story_hashes)) - - story_hashes = range_func(unread_ranked_stories_keys, offset, offset+limit, withscores=True) - r.expire(unread_ranked_stories_keys, 24*60*60) + r.zadd(ranked_stories_keys, **dict(story_hashes)) + r.zinterstore(read_ranked_stories_keys, [ranked_stories_keys, "RS:%s" % user_id]) + story_hashes = range_func(ranked_stories_keys, offset, limit, withscores=True) + read_story_hashes = range_func(read_ranked_stories_keys, offset, limit) + r.expire(ranked_stories_keys, 24*60*60) + r.expire(read_ranked_stories_keys, 24*60*60) + if story_hashes: - return zip(*story_hashes) + story_hashes, story_dates = zip(*story_hashes) + return story_hashes, story_dates, read_story_hashes else: - return [], [] + return [], [], [] def mark_story_ids_as_read(self, story_ids, feed_id=None, mark_all_read=False, request=None): data = dict(code=0, payload=story_ids) @@ -967,6 +979,7 @@ class MSocialSubscription(mongo.Document): date = now if now > story.story_date else story.story_date # For handling future stories feed_id = story.story_feed_id try: + RUserStory.mark_read(self.user_id, feed_id, story.story_hash) m, _ = MUserStory.objects.get_or_create(user_id=self.user_id, feed_id=feed_id, story_id=story.story_guid, @@ -1029,6 +1042,7 @@ class MSocialSubscription(mongo.Document): now = datetime.datetime.utcnow() date = now if now > story.story_date else story.story_date # For handling future stories try: + RUserStory.mark_read(user_id, story.story_feed_id, story.story_hash) m, _ = MUserStory.objects.get_or_create(user_id=user_id, feed_id=story.story_feed_id, story_id=story.story_guid, @@ -1104,8 +1118,9 @@ class MSocialSubscription(mongo.Document): else: self.mark_read_date = date_delta + unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True) stories_db = MSharedStory.objects(user_id=self.subscription_user_id, - shared_date__gte=date_delta) + story_hash__in=unread_story_hashes) story_feed_ids = set() story_ids = [] for s in stories_db: @@ -1115,21 +1130,12 @@ class MSocialSubscription(mongo.Document): usersubs = UserSubscription.objects.filter(user__pk=self.user_id, feed__pk__in=story_feed_ids) usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) - - # usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids) - # usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) - read_stories_ids = [] - if story_feed_ids: - read_stories = MUserStory.objects(user_id=self.user_id, - feed_id__in=story_feed_ids, - story_id__in=story_ids).only('story_id') - read_stories_ids = list(set(rs.story_id for rs in read_stories)) - + oldest_unread_story_date = now unread_stories_db = [] for story in stories_db: - if getattr(story, 'story_guid', None) in read_stories_ids: + if story['story_hash'] not in unread_story_hashes: continue feed_id = story.story_feed_id if usersubs_map.get(feed_id) and story.shared_date < usersubs_map[feed_id].mark_read_date: diff --git a/apps/social/views.py b/apps/social/views.py index 0cdc855ed..44540f70d 100644 --- a/apps/social/views.py +++ b/apps/social/views.py @@ -21,7 +21,7 @@ from apps.social.tasks import UpdateRecalcForSubscription, EmailFirstShare from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags from apps.analyzer.models import get_classifiers_for_user, sort_classifiers_by_feed -from apps.reader.models import MUserStory, UserSubscription +from apps.reader.models import UserSubscription from apps.profile.models import Profile from utils import json_functions as json from utils import log as logging @@ -97,53 +97,43 @@ def load_social_stories(request, user_id, username=None): classifier_titles = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_tags = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids)) - story_ids = [story['id'] for story in stories] - userstories_db = MUserStory.objects(user_id=user.pk, - feed_id__in=story_feed_ids, - story_id__in=story_ids).only('story_id') - userstories = set(us.story_id for us in userstories_db) + unread_story_hashes = [] + if read_filter == 'all' and socialsub: + unread_story_hashes = socialsub.get_stories(read_filter='unread', limit=500) + story_hashes = [story['story_hash'] for story in stories] starred_stories = MStarredStory.objects(user_id=user.pk, - story_guid__in=story_ids).only('story_guid', 'starred_date') + story_hash__in=story_hashes)\ + .only('story_hash', 'starred_date') shared_stories = MSharedStory.objects(user_id=user.pk, - story_guid__in=story_ids)\ - .only('story_guid', 'shared_date', 'comments') - starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) - shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) + story_hash__in=story_hashes)\ + .only('story_hash', 'shared_date', 'comments') + starred_stories = dict([(story.story_hash, story.starred_date) for story in starred_stories]) + shared_stories = dict([(story.story_hash, dict(shared_date=story.shared_date, + comments=story.comments)) for story in shared_stories]) for story in stories: story['social_user_id'] = social_user_id - story_feed_id = story['story_feed_id'] # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(shared_date, now) story['long_parsed_date'] = format_story_link_date__long(shared_date, now) - if not socialsub: - story['read_status'] = 1 - elif story['id'] in userstories: - story['read_status'] = 1 - elif story['shared_date'] < date_delta: - story['read_status'] = 1 - elif not usersubs_map.get(story_feed_id): - story['read_status'] = 0 - elif not story.get('read_status') and story['shared_date'] < usersubs_map[story_feed_id].mark_read_date: - story['read_status'] = 1 - elif not story.get('read_status') and story['shared_date'] < date_delta: - story['read_status'] = 1 - # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date: - # story['read_status'] = 0 - else: + story['read_status'] = 1 + if read_filter == 'unread' and socialsub: story['read_status'] = 0 + elif read_filter == 'all' and socialsub: + story['read_status'] = story['story_hash'] not in unread_story_hashes - if story['id'] in starred_stories: + if story['story_hash'] in starred_stories: story['starred'] = True - starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) + starred_date = localtime_for_timezone(starred_stories[story['story_hash']], + user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) - if story['id'] in shared_stories: + if story['story_hash'] in shared_stories: story['shared'] = True - story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) + story['shared_comments'] = strip_tags(shared_stories[story['story_hash']]['comments']) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'], @@ -188,7 +178,7 @@ def load_river_blurblog(request): global_feed = request.REQUEST.get('global_feed', None) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) - + if global_feed: global_user = User.objects.get(username='popular') relative_user_id = global_user.pk @@ -203,11 +193,11 @@ def load_river_blurblog(request): offset = (page-1) * limit limit = page * limit - 1 - story_hashes, story_dates = MSocialSubscription.feed_stories(user.pk, social_user_ids, - offset=offset, limit=limit, - order=order, read_filter=read_filter, - relative_user_id=relative_user_id, - everything_unread=global_feed) + story_hashes, story_dates, read_feed_story_hashes = MSocialSubscription.feed_stories( + user.pk, social_user_ids, + offset=offset, limit=limit, + order=order, read_filter=read_filter, + relative_user_id=relative_user_id) mstories = MStory.find_by_story_hashes(story_hashes) story_hashes_to_dates = dict(zip(story_hashes, story_dates)) def sort_stories_by_hash(a, b): @@ -232,30 +222,23 @@ def load_river_blurblog(request): unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds] - # Find starred stories if story_feed_ids: - story_ids = [story['id'] for story in stories] + story_hashes = [story['story_hash'] for story in stories] starred_stories = MStarredStory.objects( user_id=user.pk, - story_guid__in=story_ids - ).only('story_guid', 'starred_date') - starred_stories = dict([(story.story_guid, story.starred_date) + story_hash__in=story_hashes + ).only('story_hash', 'starred_date') + starred_stories = dict([(story.story_hash, story.starred_date) for story in starred_stories]) shared_stories = MSharedStory.objects(user_id=user.pk, - story_guid__in=story_ids)\ - .only('story_guid', 'shared_date', 'comments') - shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) - for story in shared_stories]) - - userstories_db = MUserStory.objects(user_id=user.pk, - feed_id__in=story_feed_ids, - story_id__in=story_ids).only('story_id') - userstories = set(us.story_id for us in userstories_db) - + story_hash__in=story_hashes)\ + .only('story_hash', 'shared_date', 'comments') + shared_stories = dict([(story.story_hash, dict(shared_date=story.shared_date, + comments=story.comments)) + for story in shared_stories]) else: starred_stories = {} shared_stories = {} - userstories = [] # Intelligence classifiers for all feeds involved if story_feed_ids: @@ -277,18 +260,15 @@ def load_river_blurblog(request): # Just need to format stories for story in stories: - if story['id'] in userstories: + story['read_status'] = 0 + if story['story_hash'] in read_feed_story_hashes: story['read_status'] = 1 - elif story['story_date'] < UNREAD_CUTOFF: - story['read_status'] = 1 - else: - story['read_status'] = 0 story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) - if story['id'] in starred_stories: + if story['story_hash'] in starred_stories: story['starred'] = True - starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) + starred_date = localtime_for_timezone(starred_stories[story['story_hash']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'], @@ -297,12 +277,15 @@ def load_river_blurblog(request): 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } - if story['id'] in shared_stories: + if story['story_hash'] in shared_stories: story['shared'] = True - shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'], + shared_date = localtime_for_timezone(shared_stories[story['story_hash']]['shared_date'], user.profile.timezone) story['shared_date'] = format_story_link_date__long(shared_date, now) - story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) + story['shared_comments'] = strip_tags(shared_stories[story['story_hash']]['comments']) + if story['shared_date'] < UNREAD_CUTOFF or story['story_hash'] in read_feed_story_hashes: + story['read_status'] = 1 + classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids, classifier_feeds=classifier_feeds, @@ -426,7 +409,7 @@ def load_social_page(request, user_id, username=None, **kwargs): story['shared_by_user'] = True shared_story = MSharedStory.objects.get(user_id=user.pk, story_feed_id=story['story_feed_id'], - story_guid=story['id']) + story_hash=story['story_hash']) story['user_comments'] = shared_story.comments stories = MSharedStory.attach_users_to_stories(stories, profiles) @@ -536,10 +519,11 @@ def mark_story_as_shared(request): shared_story = MSharedStory.objects.filter(user_id=request.user.pk, story_feed_id=feed_id, - story_guid=story_id).limit(1).first() + story_hash=story['story_hash']).limit(1).first() if not shared_story: story_db = { "story_guid": story.story_guid, + "story_hash": story.story_hash, "story_permalink": story.story_permalink, "story_title": story.story_title, "story_feed_id": story.story_feed_id, @@ -551,7 +535,6 @@ def mark_story_as_shared(request): "user_id": request.user.pk, "comments": comments, "has_comments": bool(comments), - "story_db_id": story.id, } shared_story = MSharedStory.objects.create(**story_db) if source_user_id: @@ -621,7 +604,7 @@ def mark_story_as_unshared(request): shared_story = MSharedStory.objects(user_id=request.user.pk, story_feed_id=feed_id, - story_guid=story_id).limit(1).first() + story_hash=story['story_hash']).limit(1).first() if not shared_story: return json.json_response(request, {'code': -1, 'message': 'Shared story not found.'})