Merge branch 'muserstory_remove' into circular

* muserstory_remove:
  Adding node group to fabfile.
  Using popular instead of homepage user to suggest new users to follow.
  Cleaning up unused detritus. Ready to deploy.
  Final cleanup before deploying.
  Removing MUserStory entirely.
This commit is contained in:
Samuel Clay 2013-05-14 16:11:44 -07:00
commit 6ebaa81ea1
8 changed files with 47 additions and 257 deletions

View file

@ -341,16 +341,6 @@ class UserSubscription(models.Model):
logging.user(request, "~FYRead story in feed: %s" % (self.feed))
for story_id in set(story_ids):
story, _ = MStory.find_story(story_feed_id=self.feed_id, story_id=story_id)
if not story: continue
now = datetime.datetime.utcnow()
date = now if now > story.story_date else story.story_date # For handling future stories
m, _ = MUserStory.objects.get_or_create(story_id=story_id, user_id=self.user_id,
feed_id=self.feed_id, defaults={
'read_date': date,
'story': story,
'story_date': story.story_date,
})
RUserStory.mark_read(self.user_id, self.feed_id, story_id)
return data
@ -472,30 +462,6 @@ class UserSubscription(models.Model):
# Switch read stories
stories = RUserStory.switch_feed(user_id=self.user_id, old_feed_id=old_feed.pk,
new_feed_id=new_feed.pk)
user_stories = MUserStory.objects(user_id=self.user_id, feed_id=old_feed.pk)
if user_stories.count() > 0:
logging.info(" ---> %s read stories" % user_stories.count())
for user_story in user_stories:
user_story.feed_id = new_feed.pk
duplicate_story = user_story.story
if duplicate_story:
story_guid = duplicate_story.story_guid if hasattr(duplicate_story, 'story_guid') else duplicate_story.id
original_story, _ = MStory.find_story(story_feed_id=new_feed.pk,
story_id=story_guid,
original_only=True)
if original_story:
user_story.story = original_story
try:
user_story.save()
except OperationError:
# User read the story in the original feed, too. Ugh, just ignore it.
pass
else:
user_story.delete()
else:
user_story.delete()
def switch_feed_for_classifier(model):
duplicates = model.objects(feed_id=old_feed.pk, user_id=self.user_id)
@ -568,13 +534,16 @@ class UserSubscription(models.Model):
class RUserStory:
RE_STORY_HASH = re.compile(r"^(\d{1,10}):(\w{6})$")
RE_RS_KEY = re.compile(r"^RS:(\d+):(\d+)$")
@classmethod
def story_hash(cls, story_id, story_feed_id):
if not cls.RE_STORY_HASH.match(story_id):
story, _ = MStory.find_story(story_feed_id=story_feed_id, story_id=story_id)
if not story: return
story_id = story.story_hash
if story:
story_id = story.story_hash
else:
story_id = "%s:%s" % (story_feed_id, hashlib.sha1(story_id).hexdigest()[:6])
return story_id
@ -586,6 +555,14 @@ class RUserStory:
return groups[0], groups[1]
return None, None
@classmethod
def split_rs_key(cls, rs_key):
matches = cls.RE_RS_KEY.match(rs_key)
if matches:
groups = matches.groups()
return groups[0], groups[1]
return None, None
@classmethod
def story_hashes(cls, story_ids):
story_hashes = []
@ -642,140 +619,24 @@ class RUserStory:
if len(story_hashes) > 0:
logging.info(" ---> %s read stories" % len(story_hashes))
class MUserStory(mongo.Document):
"""
Stories read by the user. These are deleted as the mark_read_date for the
UserSubscription passes the UserStory date.
"""
user_id = mongo.IntField()
feed_id = mongo.IntField()
read_date = mongo.DateTimeField()
story_id = mongo.StringField()
story_hash = mongo.StringField()
story_date = mongo.DateTimeField()
story = mongo.ReferenceField(MStory, dbref=True)
found_story = mongo.GenericReferenceField()
shared = mongo.BooleanField()
meta = {
'collection': 'userstories',
'indexes': [
{'fields': ('user_id', 'feed_id', 'story_id'), 'unique': True},
('feed_id', 'story_id'), # Updating stories with new guids
('feed_id', 'story_date'), # Trimming feeds
('feed_id', '-read_date'), # Trimming feeds
],
'allow_inheritance': False,
'index_drop_dups': True,
'cascade': False,
}
def save(self, *args, **kwargs):
self.story_hash = self.feed_guid_hash
# self.sync_redis()
super(MUserStory, self).save(*args, **kwargs)
def delete(self, *args, **kwargs):
# self.remove_from_redis()
super(MUserStory, self).delete(*args, **kwargs)
@property
def guid_hash(self):
return hashlib.sha1(self.story_id).hexdigest()[:6]
@property
def feed_guid_hash(self):
return "%s:%s" % (self.feed_id or "0", self.guid_hash)
@classmethod
def delete_old_stories(cls, feed_id):
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD*5)
read_stories = cls.objects(feed_id=feed_id, read_date__lte=UNREAD_CUTOFF)
read_stories_count = read_stories.count()
if read_stories_count:
feed = Feed.objects.get(pk=feed_id)
total = cls.objects(feed_id=feed_id).count()
logging.info(" ---> ~SN~FCTrimming ~SB%s~SN/~SB%s~SN read stories from %s..." %
(read_stories_count, total, feed.title[:30]))
read_stories.delete()
@classmethod
def delete_marked_as_read_stories(cls, user_id, feed_id, mark_read_date=None):
if not mark_read_date:
usersub = UserSubscription.objects.get(user__pk=user_id, feed__pk=feed_id)
mark_read_date = usersub.mark_read_date
# Next line forces only old read stories to be removed, just in case newer stories
# come in as unread because they're being shared.
mark_read_date = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
cls.objects(user_id=user_id, feed_id=feed_id, read_date__lte=mark_read_date).delete()
def sync_redis(self, r=None):
if not r:
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
all_read_stories_key = 'RS:%s' % (self.user_id)
r.sadd(all_read_stories_key, self.feed_guid_hash)
r.expire(all_read_stories_key, settings.DAYS_OF_UNREAD*24*60*60)
read_story_key = 'RS:%s:%s' % (self.user_id, self.feed_id)
r.sadd(read_story_key, self.feed_guid_hash)
r.expire(read_story_key, settings.DAYS_OF_UNREAD*24*60*60)
def remove_from_redis(self):
def switch_hash(cls, feed_id, old_hash, new_hash):
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
r.srem('RS:%s' % self.user_id, self.feed_guid_hash)
r.srem('RS:%s:%s' % (self.user_id, self.feed_id), self.feed_guid_hash)
@classmethod
def sync_all_redis(cls, user_id=None, feed_id=None, force=False):
r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD+1)
if feed_id and user_id:
read_stories = cls.objects.filter(user_id=user_id,
feed_id=feed_id,
read_date__gte=UNREAD_CUTOFF)
key = "RS:%s:%s" % (user_id, feed_id)
r.delete(key)
elif feed_id:
read_stories = cls.objects.filter(feed_id=feed_id, read_date__gte=UNREAD_CUTOFF)
keys = r.keys("RS:*:%s" % feed_id)
print " ---> Deleting %s redis keys: %s" % (len(keys), keys)
for key in keys:
r.delete(key)
elif user_id:
read_stories = cls.objects.filter(user_id=user_id, read_date__gte=UNREAD_CUTOFF)
keys = r.keys("RS:%s:*" % user_id)
r.delete("RS:%s" % user_id)
print " ---> Deleting %s redis keys: %s" % (len(keys), keys)
for key in keys:
r.delete(key)
elif force:
read_stories = cls.objects.all(read_date__gte=UNREAD_CUTOFF)
else:
raise "Specify user_id, feed_id, or force."
p = r.pipeline()
total = read_stories.count()
logging.debug(" ---> ~SN~FMSyncing ~SB%s~SN stories (%s/%s)" % (total, user_id, feed_id))
pipeline = None
for i, read_story in enumerate(read_stories):
if not pipeline:
pipeline = r.pipeline()
if (i+1) % 1000 == 0:
print " ---> %s/%s" % (i+1, total)
pipeline.execute()
pipeline = r.pipeline()
read_story.sync_redis(r=pipeline)
if pipeline:
pipeline.execute()
user_feeds = r.keys("RS:*:%s" % feed_id)
logging.info(" ---> %s user RS keys to switch hashes..." % len(user_feeds))
for rs_key in user_feeds:
read = r.sismember(rs_key, old_hash)
if read:
user_id, _ = cls.split_rs_key(rs_key)
p.sadd(rs_key, new_hash)
p.sadd("RS:%s" % user_id, new_hash)
p.execute()
class UserSubscriptionFolders(models.Model):
"""
A JSON list of folders and feeds for while a user has subscribed. The list

View file

@ -3,7 +3,7 @@ from celery.task import Task
from utils import log as logging
from django.contrib.auth.models import User
from django.conf import settings
from apps.reader.models import UserSubscription, MUserStory
from apps.reader.models import UserSubscription
from apps.social.models import MSocialSubscription
@ -43,21 +43,3 @@ class CleanAnalytics(Task):
settings.MONGOANALYTICSDB.nbanalytics.feed_fetches.remove({
"date": {"$lt": day_ago},
})
class CleanStories(Task):
name = 'clean-stories'
time_limit = 60 * 60 # 1 hour
def run(self, **kwargs):
days_ago = (datetime.datetime.utcnow() -
datetime.timedelta(days=settings.DAYS_OF_UNREAD*5))
old_stories = MUserStory.objects.filter(read_date__lte=days_ago)
logging.debug(" ---> Cleaning stories from %s days ago... %s/%s read stories" % (
settings.DAYS_OF_UNREAD*5,
MUserStory.objects.count(),
old_stories.count()
))
for s, story in enumerate(old_stories):
if (s+1) % 1000 == 0:
logging.debug(" ---> %s stories removed..." % (s+1))
story.delete()

View file

@ -27,7 +27,7 @@ from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
from apps.analyzer.models import apply_classifier_authors, apply_classifier_tags
from apps.analyzer.models import get_classifiers_for_user, sort_classifiers_by_feed
from apps.profile.models import Profile
from apps.reader.models import UserSubscription, UserSubscriptionFolders, MUserStory, RUserStory, Feature
from apps.reader.models import UserSubscription, UserSubscriptionFolders, RUserStory, Feature
from apps.reader.forms import SignupForm, LoginForm, FeatureForm
from apps.rss_feeds.models import MFeedIcon
from apps.statistics.models import MStatistics
@ -1020,15 +1020,8 @@ def mark_story_as_unread(request):
dirty_count = social_subs and social_subs.count()
dirty_count = ("(%s social_subs)" % dirty_count) if dirty_count else ""
try:
m = MUserStory.objects.get(user_id=request.user.pk, feed_id=feed_id, story_id=story_id)
m.delete()
except MUserStory.DoesNotExist:
if usersub and story.story_date > usersub.mark_read_date:
logging.user(request, "~SB~FRCouldn't find read story to mark as unread.")
else:
data['code'] = -1
RUserStory.mark_unread(user_id=request.user.pk, story_feed_id=feed_id, story_hash=story.story_hash)
RUserStory.mark_unread(user_id=request.user.pk, story_feed_id=feed_id,
story_hash=story.story_hash)
r = redis.Redis(connection_pool=settings.REDIS_POOL)
r.publish(request.user.username, 'feed:%s' % feed_id)

View file

@ -967,16 +967,13 @@ class Feed(models.Model):
return ret_values
def update_read_stories_with_new_guid(self, old_story_guid, new_story_guid):
from apps.reader.models import MUserStory
from apps.reader.models import RUserStory
from apps.social.models import MSharedStory
read_stories = MUserStory.objects.filter(feed_id=self.pk, story_id=old_story_guid)
for story in read_stories:
story.story_id = new_story_guid
try:
story.save()
except OperationError:
# User read both new and old. Just toss.
pass
old_hash = RUserStory.story_hash(old_story_guid, self.pk)
new_hash = RUserStory.story_hash(new_story_guid, self.pk)
RUserStory.switch_hash(feed_id=self.pk, old_hash=old_hash, new_hash=new_hash)
shared_stories = MSharedStory.objects.filter(story_feed_id=self.pk,
story_guid=old_story_guid)
for story in shared_stories:

View file

@ -9,7 +9,6 @@ import random
import requests
from collections import defaultdict
from BeautifulSoup import BeautifulSoup
from mongoengine.queryset import NotUniqueError
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.sites.models import Site
@ -18,7 +17,7 @@ from django.template.loader import render_to_string
from django.template.defaultfilters import slugify
from django.core.mail import EmailMultiAlternatives
from django.core.cache import cache
from apps.reader.models import UserSubscription, MUserStory, RUserStory
from apps.reader.models import UserSubscription, RUserStory
from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
from apps.rss_feeds.models import Feed, MStory
@ -205,7 +204,7 @@ class MSocialProfile(mongo.Document):
# Not enough? Grab popular users.
if len(nonfriend_user_ids) < RECOMMENDATIONS_LIMIT:
homepage_user = User.objects.get(username=settings.HOMEPAGE_USERNAME)
homepage_user = User.objects.get(username='popular')
suggested_users_list = r.sdiff("F:%s:F" % homepage_user.pk, following_key)
suggested_users_list = [int(f) for f in suggested_users_list]
suggested_user_ids = []
@ -975,25 +974,9 @@ class MSocialSubscription(mongo.Document):
logging.user(request, "~BR~FYCould not find story: %s/%s" %
(self.subscription_user_id, story_id))
continue
now = datetime.datetime.utcnow()
date = now if now > story.story_date else story.story_date # For handling future stories
feed_id = story.story_feed_id
try:
RUserStory.mark_read(self.user_id, feed_id, story.story_hash)
m, _ = MUserStory.objects.get_or_create(user_id=self.user_id,
feed_id=feed_id,
story_id=story.story_guid,
defaults={
"read_date": date,
"story_date": story.shared_date,
})
except NotUniqueError:
if not mark_all_read or settings.DEBUG:
logging.user(request, "~FRAlready saved read story: %s" % story.story_guid)
continue
except MUserStory.MultipleObjectsReturned:
if not mark_all_read or settings.DEBUG:
logging.user(request, "~BR~FW~SKMultiple read stories: %s" % story.story_guid)
RUserStory.mark_read(self.user_id, feed_id, story.story_hash)
# Find other social feeds with this story to update their counts
friend_key = "F:%s:F" % (self.user_id)
@ -1002,13 +985,13 @@ class MSocialSubscription(mongo.Document):
if self.user_id in friends_with_shares:
friends_with_shares.remove(self.user_id)
if friends_with_shares:
socialsubs = MSocialSubscription.objects.filter(user_id=self.user_id,
subscription_user_id__in=friends_with_shares)
socialsubs = MSocialSubscription.objects.filter(
user_id=self.user_id,
subscription_user_id__in=friends_with_shares)
for socialsub in socialsubs:
if not socialsub.needs_unread_recalc:
socialsub.needs_unread_recalc = True
socialsub.save()
# XXX TODO: Real-time notification, just for this user
# Also count on original subscription
usersubs = UserSubscription.objects.filter(user=self.user_id, feed=feed_id)
@ -1017,7 +1000,7 @@ class MSocialSubscription(mongo.Document):
if not usersub.needs_unread_recalc:
usersub.needs_unread_recalc = True
usersub.save()
# XXX TODO: Real-time notification, just for this user
return data
@classmethod
@ -1041,17 +1024,7 @@ class MSocialSubscription(mongo.Document):
continue
now = datetime.datetime.utcnow()
date = now if now > story.story_date else story.story_date # For handling future stories
try:
RUserStory.mark_read(user_id, story.story_feed_id, story.story_hash)
m, _ = MUserStory.objects.get_or_create(user_id=user_id,
feed_id=story.story_feed_id,
story_id=story.story_guid,
defaults={
"read_date": date,
"story_date": story.shared_date,
})
except MUserStory.MultipleObjectsReturned:
logging.user(request, "~BR~FW~SKMultiple read stories: %s" % story.story_guid)
RUserStory.mark_read(user_id, story.story_feed_id, story.story_hash)
# Also count on original subscription
usersubs = UserSubscription.objects.filter(user=user_id, feed=story.story_feed_id)
@ -1089,11 +1062,6 @@ class MSocialSubscription(mongo.Document):
story_ids = [s.story_guid for s in stories]
self.mark_story_ids_as_read(story_ids, mark_all_read=True)
# Cannot delete these stories, since the original feed may not be read.
# Just go 2 weeks back.
# UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
# MUserStory.delete_marked_as_read_stories(self.user_id, self.feed_id, mark_read_date=UNREAD_CUTOFF)
self.save()
def calculate_feed_scores(self, force=False, silent=False):

View file

@ -367,11 +367,6 @@ CELERYBEAT_SCHEDULE = {
'schedule': datetime.timedelta(hours=12),
'options': {'queue': 'beat_tasks'},
},
'clean-stories': {
'task': 'clean-stories',
'schedule': datetime.timedelta(hours=24),
'options': {'queue': 'beat_tasks'},
},
'premium-expire': {
'task': 'premium-expire',
'schedule': datetime.timedelta(hours=24),

View file

@ -10,7 +10,7 @@ import pymongo
from django.conf import settings
from django.db import IntegrityError
from django.core.cache import cache
from apps.reader.models import UserSubscription, MUserStory
from apps.reader.models import UserSubscription
from apps.rss_feeds.models import Feed, MStory
from apps.rss_feeds.page_importer import PageImporter
from apps.rss_feeds.icon_importer import IconImporter
@ -374,11 +374,7 @@ class Dispatcher:
logging.debug(' ---> [%-30s] ~FBPerforming feed cleanup...' % (feed.title[:30],))
start_cleanup = time.time()
feed.sync_redis()
cp1 = time.time() - start_cleanup
MUserStory.delete_old_stories(feed_id=feed.pk)
cp2 = time.time() - cp1 - start_cleanup
# MUserStory.sync_all_redis(feed_id=feed.pk)
logging.debug(' ---> [%-30s] ~FBDone with feed cleanup. Took %.4s+%.4s+%.4s=~SB%.4s~SN sec.' % (feed.title[:30], cp1, cp2, time.time() - cp2 - cp1 - start_cleanup, time.time() - start_cleanup))
logging.debug(' ---> [%-30s] ~FBDone with feed cleanup. Took ~SB%.4s~SN sec.' % (feed.title[:30], time.time() - start_cleanup))
try:
self.count_unreads_for_subscribers(feed)
except TimeoutError:

View file

@ -19,11 +19,9 @@ class NBMuninGraph(MuninGraph):
def calculate_metrics(self):
from apps.rss_feeds.models import MStory
from apps.reader.models import MUserStory
return {
'stories': MStory.objects().count(),
'read_stories': MUserStory.objects().count(),
}
if __name__ == '__main__':