Merge branch 'master' into original_pages_node

* master:
  Doubling resync likelihood.
  Clearing out old read stories for 1% of all feed fetches with new stories.
  Adding unread cutoff to redis sync.
  Syncing redis keys by feed_id for read stories.
  Adding a sync all redis for user stories to be run in a migration.
  Turning off deletion of old stories until the db can be prepped.
  Adding index for read story deletion.
  Deleting old stories.
  Typo in delete read stories log.
  Typo in delete read stories log.
  Typo in delete read stories log.
  Deleting old user stories again. This time going out to 5 times the unread bounds.
This commit is contained in:
Samuel Clay 2012-12-17 18:20:33 -08:00
commit ec9976c916
3 changed files with 45 additions and 10 deletions

View file

@ -591,6 +591,7 @@ class MUserStory(mongo.Document):
{'fields': ('user_id', 'feed_id', 'story_id'), 'unique': True},
('feed_id', 'story_id'), # Updating stories with new guids
('feed_id', 'story_date'), # Trimming feeds
('feed_id', '-read_date'), # Trimming feeds
],
'allow_inheritance': False,
'index_drop_dups': True,
@ -613,8 +614,15 @@ class MUserStory(mongo.Document):
@classmethod
def delete_old_stories(cls, feed_id):
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
cls.objects(feed_id=feed_id, story_date__lte=UNREAD_CUTOFF).delete()
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD*5)
read_stories = cls.objects(feed_id=feed_id, read_date__lte=UNREAD_CUTOFF)
read_stories_count = read_stories.count()
if read_stories_count:
feed = Feed.objects.get(pk=feed_id)
total = cls.objects(feed_id=feed_id).count()
logging.info(" ---> ~SN~FCTrimming ~SB%s~SN/~SB%s~SN read stories from %s..." %
(read_stories_count, total, feed.title[:30]))
read_stories.delete()
@classmethod
def delete_marked_as_read_stories(cls, user_id, feed_id, mark_read_date=None):
@ -667,10 +675,34 @@ class MUserStory(mongo.Document):
r.srem('RS:%s:%s' % (self.user_id, self.feed_id), self.story_db_id)
@classmethod
def sync_all_redis(cls):
read_stories = cls.objects.all()
for read_story in read_stories:
read_story.sync_redis()
def sync_all_redis(cls, user_id=None, feed_id=None, force=False):
r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL)
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD*2)
if feed_id:
read_stories = cls.objects.filter(feed_id=feed_id, read_date__gte=UNREAD_CUTOFF)
keys = r.keys("RS:*:%s" % feed_id)
print " ---> Deleting %s redis keys: %s" % (len(keys), keys)
for key in keys:
r.delete(key)
elif user_id:
read_stories = cls.objects.filter(user_id=user_id, read_date__gte=UNREAD_CUTOFF)
keys = r.keys("RS:%s:*" % user_id)
r.delete("RS:%s" % user_id)
print " ---> Deleting %s redis keys: %s" % (len(keys), keys)
for key in keys:
r.delete(key)
elif force:
read_stories = cls.objects.all(read_date__gte=UNREAD_CUTOFF)
else:
raise "Specify user_id, feed_id, or force."
total = read_stories.count()
print " ---> Syncing %s stories (%s)" % (total, user_id or feed_id)
for i, read_story in enumerate(read_stories):
if (i+1) % 1000 == 0:
print " ---> %s/%s" % (i+1, total)
read_story.sync_redis(r)
class UserSubscriptionFolders(models.Model):
"""

1
fabfile.py vendored
View file

@ -254,6 +254,7 @@ def backup_postgresql():
# crontab for postgres master server
# 0 4 * * * python /home/sclay/newsblur/utils/backups/backup_psql.py
# 0 * * * * sudo find /var/lib/postgresql/9.1/archive -mtime +1 -exec rm {} \;
with cd(os.path.join(env.NEWSBLUR_PATH, 'utils/backups')):
# run('./postgresql_backup.sh')
run('python backup_psql.py')

View file

@ -9,7 +9,7 @@ import random
import pymongo
from django.conf import settings
from django.db import IntegrityError
from apps.reader.models import UserSubscription
from apps.reader.models import UserSubscription, MUserStory
from apps.rss_feeds.models import Feed, MStory
from apps.rss_feeds.page_importer import PageImporter
from apps.rss_feeds.icon_importer import IconImporter
@ -248,7 +248,8 @@ class ProcessFeed:
'~FR~SB' if ret_values['error'] else '', ret_values['error'],
len(self.fpf.entries)))
self.feed.update_all_statistics(full=bool(ret_values['new']), force=self.options['force'])
self.feed.trim_feed()
if ret_values['new']:
self.feed.trim_feed()
self.feed.save_feed_history(200, "OK")
if self.options['verbose']:
@ -342,9 +343,10 @@ class Dispatcher:
feed.known_good = True
feed.fetched_once = True
feed = feed.save()
# MUserStory.delete_old_stories(feed_id=feed.pk)
if random.random() <= 0.01:
if random.random() <= 0.02:
feed.sync_redis()
MUserStory.delete_old_stories(feed_id=feed.pk)
MUserStory.sync_all_redis(feed_id=feed.pk)
try:
self.count_unreads_for_subscribers(feed)
except TimeoutError: