Deleting old read stories and speeding up computing feed scores by caching stories to search through.

This commit is contained in:
Samuel Clay 2010-09-19 11:30:18 -04:00
parent 554d5f5985
commit b9cbf6a71e
3 changed files with 26 additions and 13 deletions

View file

@ -2,14 +2,15 @@ import datetime
import mongoengine as mongo import mongoengine as mongo
from utils import log as logging from utils import log as logging
from django.db import models from django.db import models
from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.cache import cache from django.core.cache import cache
from apps.rss_feeds.models import Feed, Story, MStory from apps.rss_feeds.models import Feed, Story, MStory
from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
DAYS_OF_UNREAD = 14 UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
MONTH_AGO = datetime.datetime.now() - datetime.timedelta(days=30) MONTH_AGO = datetime.datetime.utcnow() - datetime.timedelta(days=30)
class UserSubscription(models.Model): class UserSubscription(models.Model):
""" """
@ -21,10 +22,8 @@ class UserSubscription(models.Model):
""" """
user = models.ForeignKey(User, related_name='subscriptions') user = models.ForeignKey(User, related_name='subscriptions')
feed = models.ForeignKey(Feed, related_name='subscribers') feed = models.ForeignKey(Feed, related_name='subscribers')
last_read_date = models.DateTimeField(default=datetime.datetime.now() last_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
- datetime.timedelta(days=DAYS_OF_UNREAD)) mark_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
mark_read_date = models.DateTimeField(default=datetime.datetime.now()
- datetime.timedelta(days=DAYS_OF_UNREAD))
unread_count_neutral = models.IntegerField(default=0) unread_count_neutral = models.IntegerField(default=0)
unread_count_positive = models.IntegerField(default=0) unread_count_positive = models.IntegerField(default=0)
unread_count_negative = models.IntegerField(default=0) unread_count_negative = models.IntegerField(default=0)
@ -52,7 +51,7 @@ class UserSubscription(models.Model):
self.needs_unread_relcalc = False self.needs_unread_relcalc = False
self.save() self.save()
def calculate_feed_scores(self, silent=False): def calculate_feed_scores(self, silent=False, stories_db=None):
if self.user.profile.last_seen_on < MONTH_AGO: if self.user.profile.last_seen_on < MONTH_AGO:
if not silent: if not silent:
logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 month+)' % (self.user, self.feed)) logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 month+)' % (self.user, self.feed))
@ -70,7 +69,7 @@ class UserSubscription(models.Model):
feed_scores = dict(negative=0, neutral=0, positive=0) feed_scores = dict(negative=0, neutral=0, positive=0)
# Two weeks in age. If mark_read_date is older, mark old stories as read. # Two weeks in age. If mark_read_date is older, mark old stories as read.
date_delta = datetime.datetime.utcnow()-datetime.timedelta(days=DAYS_OF_UNREAD) date_delta = UNREAD_CUTOFF
if date_delta < self.mark_read_date: if date_delta < self.mark_read_date:
date_delta = self.mark_read_date date_delta = self.mark_read_date
else: else:
@ -87,8 +86,8 @@ class UserSubscription(models.Model):
read_stories_ids.append(us.story.story_guid) read_stories_ids.append(us.story.story_guid)
elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
stories_db = MStory.objects(story_feed_id=self.feed.pk, stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
story_date__gte=date_delta) story_date__gte=date_delta)
if not silent: if not silent:
logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now))
unread_stories_db = [] unread_stories_db = []
@ -191,6 +190,10 @@ class MUserStory(mongo.Document):
'allow_inheritance': False, 'allow_inheritance': False,
} }
@classmethod
def delete_old_stories(cls):
MUserStory.objects(read_date__lte=UNREAD_CUTOFF).delete()
class UserSubscriptionFolders(models.Model): class UserSubscriptionFolders(models.Model):
""" """

View file

@ -174,6 +174,7 @@ APPEND_SLASH = True
SOUTH_TESTS_MIGRATE = False SOUTH_TESTS_MIGRATE = False
SESSION_ENGINE = "django.contrib.sessions.backends.cached_db" SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
TEST_RUNNER = "utils.testrunner.TestRunner" TEST_RUNNER = "utils.testrunner.TestRunner"
DAYS_OF_UNREAD = 14
# =========== # ===========
# = Logging = # = Logging =

View file

@ -2,7 +2,8 @@ from apps.rss_feeds.models import FeedUpdateHistory
# from apps.rss_feeds.models import FeedXML # from apps.rss_feeds.models import FeedXML
from django.core.cache import cache from django.core.cache import cache
from django.conf import settings from django.conf import settings
from apps.reader.models import UserSubscription from apps.reader.models import UserSubscription, MUserStory
from apps.rss_feeds.models import MStory
from apps.rss_feeds.importer import PageImporter from apps.rss_feeds.importer import PageImporter
from utils import feedparser from utils import feedparser
from django.db import IntegrityError from django.db import IntegrityError
@ -28,6 +29,8 @@ SLOWFEED_WARNING = 10
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4) ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5) FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
def mtime(ttime): def mtime(ttime):
""" datetime auxiliar function. """ datetime auxiliar function.
""" """
@ -314,13 +317,19 @@ class Dispatcher:
pfeed = ProcessFeed(feed, fetched_feed, db, self.options) pfeed = ProcessFeed(feed, fetched_feed, db, self.options)
ret_feed, ret_entries = pfeed.process() ret_feed, ret_entries = pfeed.process()
if ret_entries.get(ENTRY_NEW) or self.options['force']: if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
if not feed.fetched_once:
feed.fetched_once = True
feed.save()
MUserStory.delete_old_stories()
user_subs = UserSubscription.objects.filter(feed=feed) user_subs = UserSubscription.objects.filter(feed=feed)
logging.debug(u' ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % (unicode(feed)[:30], user_subs.count())) logging.debug(u' ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % (unicode(feed)[:30], user_subs.count()))
stories_db = MStory.objects(story_feed_id=feed.pk,
story_date__gte=UNREAD_CUTOFF)
for sub in user_subs: for sub in user_subs:
cache.delete('usersub:%s' % sub.user_id) cache.delete('usersub:%s' % sub.user_id)
silent = False if self.options['verbose'] >= 2 else True silent = False if self.options['verbose'] >= 2 else True
sub.calculate_feed_scores(silent=silent) sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25)) cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
# if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']: # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
# feed.get_stories(force=True) # feed.get_stories(force=True)