New unread stories collector, now relies on already existing unread story per-feed per-user lists instead of creating them on the fly every time a river is assembled.

Massive performance improvement at the risk of stale unreads. Luckily this won't cause read stories to show as unread, but it may cause an issue of stale unread story lists not showing the latest unread.

The next commit will focus on additional existance checks for those unread story lists, so that at a small performance hit, no data migration is needed to enforce a recount for all feeds.
This commit is contained in:
Samuel Clay 2022-07-07 10:22:05 -04:00
parent e1e551a6a5
commit aca79771a9

View file

@ -133,15 +133,16 @@ class UserSubscription(models.Model):
cutoff_date = user.profile.unread_cutoff
feed_counter = 0
unread_ranked_stories_keys = []
expire_unread_stories_key = False
after_unread_pipeline = r.pipeline()
read_dates = dict()
needs_unread_recalc = dict()
manual_unread_pipeline = r.pipeline()
manual_unread_feed_oldest_date = dict()
oldest_manual_unread = None
usersub_count = len(usersubs)
for us in usersubs:
read_dates[us.feed_id] = int(max(us.mark_read_date, cutoff_date).strftime('%s'))
needs_unread_recalc[us.feed_id] = us.needs_unread_recalc or usersub_count == 1
user_unread_stories_feed_key = f"uU:{user_id}:{us.feed_id}"
manual_unread_pipeline.exists(user_unread_stories_feed_key)
results = manual_unread_pipeline.execute()
@ -166,25 +167,27 @@ class UserSubscription(models.Model):
min_score = read_dates[feed_id] + 1
# TODO: Remove above +1 and switch below to AGGREGATE='MAX', which may obviate the need
# for the U:%s keys and just work with the zF: & RS: directly into zU:
pipeline.sdiffstore(unread_stories_key, stories_key, read_stories_key)
expire_unread_stories_key = True
if needs_unread_recalc[feed_id]:
pipeline.sdiffstore(unread_stories_key, stories_key, read_stories_key)
else:
min_score = 0
unread_stories_key = stories_key
unread_ranked_stories_key = sorted_stories_key
if order == 'oldest':
byscorefunc = pipeline.zrangebyscore
else:
byscorefunc = pipeline.zrevrangebyscore
min_score, max_score = max_score, min_score
pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
if order == 'oldest':
pipeline.zremrangebyscore(unread_ranked_stories_key, 0, min_score-1)
pipeline.zremrangebyscore(unread_ranked_stories_key, max_score+1, 2*max_score)
else:
pipeline.zremrangebyscore(unread_ranked_stories_key, 0, max_score-1)
pipeline.zremrangebyscore(unread_ranked_stories_key, min_score+1, 2*min_score)
if needs_unread_recalc[feed_id]:
pipeline.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
if order == 'oldest':
pipeline.zremrangebyscore(unread_ranked_stories_key, 0, min_score-1)
pipeline.zremrangebyscore(unread_ranked_stories_key, max_score+1, 2*max_score)
else:
pipeline.zremrangebyscore(unread_ranked_stories_key, 0, max_score-1)
pipeline.zremrangebyscore(unread_ranked_stories_key, min_score+1, 2*min_score)
# If archive premium user has manually marked an older story as unread
if is_archive and feed_id in manual_unread_feed_oldest_date:
@ -206,10 +209,6 @@ class UserSubscription(models.Model):
if not store_stories_key:
byscorefunc(unread_ranked_stories_key, min_score, max_score, withscores=include_timestamps, start=offset, num=limit)
unread_ranked_stories_keys.append(unread_ranked_stories_key)
after_unread_pipeline.delete(unread_ranked_stories_key)
if expire_unread_stories_key:
after_unread_pipeline.delete(unread_stories_key)
results = pipeline.execute()
@ -225,8 +224,6 @@ class UserSubscription(models.Model):
if store_stories_key:
r.zunionstore(store_stories_key, unread_ranked_stories_keys, aggregate="MAX")
after_unread_pipeline.execute()
if not store_stories_key:
return story_hashes
@ -1047,7 +1044,20 @@ class UserSubscription(models.Model):
folders.extend(list(orphan_ids))
usf.folders = json.encode(folders)
usf.save()
@classmethod
def all_subs_needs_unread_recalc(cls, user_id):
subs = cls.objects.filter(user=user_id)
total = len(subs)
needed_recalc = 0
for sub in subs:
if not sub.needs_unread_recalc:
sub.needs_unread_recalc = True
sub.save()
needed_recalc += 1
logging.debug(f" ---> Relcaculated {needed_recalc} of {total} subscriptions for user_id: {user_id}")
@classmethod
def verify_feeds_scheduled(cls, user_id):
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)