Readying unread counting to incorporate the new read preference in mongoengine.

This commit is contained in:
Samuel Clay 2012-10-29 12:25:28 -07:00
parent 448b1b1932
commit 2ec10bcf7e
7 changed files with 49 additions and 40 deletions

View file

@ -347,7 +347,7 @@ class UserSubscription(models.Model):
return data return data
def calculate_feed_scores(self, silent=False, stories_db=None): def calculate_feed_scores(self, silent=False, stories=None):
# now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
now = datetime.datetime.now() now = datetime.datetime.now()
UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
@ -376,23 +376,23 @@ class UserSubscription(models.Model):
read_stories = MUserStory.objects(user_id=self.user_id, read_stories = MUserStory.objects(user_id=self.user_id,
feed_id=self.feed_id, feed_id=self.feed_id,
read_date__gte=self.mark_read_date) read_date__gte=self.mark_read_date)
# if not silent:
# logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now))
read_stories_ids = [us.story_id for us in read_stories] read_stories_ids = [us.story_id for us in read_stories]
stories_db = stories_db or MStory.objects(story_feed_id=self.feed_id,
story_date__gte=date_delta) if not stories:
# if not silent: stories_db = MStory.objects(story_feed_id=self.feed_id,
# logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) story_date__gte=date_delta)
stories = Feed.format_stories(stories_db, self.feed_id)
oldest_unread_story_date = now oldest_unread_story_date = now
unread_stories_db = [] unread_stories = []
for story in stories_db: for story in stories:
if story.story_date < date_delta: if story['story_date'] < date_delta:
continue continue
if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids: if story.get('story_guid', None) not in read_stories_ids:
unread_stories_db.append(story) unread_stories.append(story)
if story.story_date < oldest_unread_story_date: if story.story_date < oldest_unread_story_date:
oldest_unread_story_date = story.story_date oldest_unread_story_date = story.story_date
stories = Feed.format_stories(unread_stories_db, self.feed_id)
# if not silent: # if not silent:
# logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now))
@ -408,7 +408,7 @@ class UserSubscription(models.Model):
'feed': apply_classifier_feeds(classifier_feeds, self.feed), 'feed': apply_classifier_feeds(classifier_feeds, self.feed),
} }
for story in stories: for story in unread_stories:
scores.update({ scores.update({
'author' : apply_classifier_authors(classifier_authors, story), 'author' : apply_classifier_authors(classifier_authors, story),
'tags' : apply_classifier_tags(classifier_tags, story), 'tags' : apply_classifier_tags(classifier_tags, story),

View file

@ -8,7 +8,7 @@ from django.core.urlresolvers import reverse
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.contrib.sites.models import Site from django.contrib.sites.models import Site
from django.template.loader import render_to_string from django.template.loader import render_to_string
from django.http import HttpResponse, HttpResponseRedirect, Http404, Http403 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponseForbidden
from django.conf import settings from django.conf import settings
from django.template import RequestContext from django.template import RequestContext
from django.utils import feedgenerator from django.utils import feedgenerator
@ -1110,7 +1110,7 @@ def shared_stories_rss_feed(request, user_id, username):
current_site = current_site and current_site.domain current_site = current_site and current_site.domain
if social_profile.private: if social_profile.private:
return Http403 return HttpResponseForbidden
data = {} data = {}
data['title'] = social_profile.title data['title'] = social_profile.title

15
fabfile.py vendored
View file

@ -455,12 +455,11 @@ def setup_mongoengine():
with cd(env.VENDOR_PATH): with cd(env.VENDOR_PATH):
with settings(warn_only=True): with settings(warn_only=True):
run('rm -fr mongoengine') run('rm -fr mongoengine')
run('git clone https://github.com/mongoengine/mongoengine.git') run('git clone https://github.com/MongoEngine/mongoengine.git')
sudo('rm -f /usr/local/lib/python2.7/dist-packages/mongoengine') sudo('rm -f /usr/local/lib/python2.7/dist-packages/mongoengine')
sudo('rm -f /usr/local/lib/python2.7/dist-packages/mongoengine-*')
sudo('ln -s %s /usr/local/lib/python2.7/dist-packages/mongoengine' % sudo('ln -s %s /usr/local/lib/python2.7/dist-packages/mongoengine' %
os.path.join(env.VENDOR_PATH, 'mongoengine/mongoengine')) os.path.join(env.VENDOR_PATH, 'mongoengine/mongoengine'))
with cd(os.path.join(env.VENDOR_PATH, 'mongoengine')):
run('git checkout -b dev origin/dev')
def setup_pymongo_repo(): def setup_pymongo_repo():
with cd(env.VENDOR_PATH): with cd(env.VENDOR_PATH):
@ -472,12 +471,10 @@ def setup_pymongo_repo():
def setup_forked_mongoengine(): def setup_forked_mongoengine():
with cd(os.path.join(env.VENDOR_PATH, 'mongoengine')): with cd(os.path.join(env.VENDOR_PATH, 'mongoengine')):
with settings(warn_only=True): with settings(warn_only=True):
run('git checkout master') run('git remote add clay https://github.com/samuelclay/mongoengine.git')
run('git branch -D dev') run('git pull')
run('git remote add %s git://github.com/samuelclay/mongoengine.git' % env.user) run('git fetch clay')
run('git fetch %s' % env.user) run('git checkout -b clay_master clay/master')
run('git checkout -b dev %s/dev' % env.user)
run('git pull %s dev' % env.user)
def switch_forked_mongoengine(): def switch_forked_mongoengine():
with cd(os.path.join(env.VENDOR_PATH, 'mongoengine')): with cd(os.path.join(env.VENDOR_PATH, 'mongoengine')):

View file

@ -2,7 +2,9 @@ express = require 'express'
mongo = require 'mongodb' mongo = require 'mongodb'
MONGODB_SERVER = if process.env.NODE_ENV == 'development' then 'localhost' else 'db04' MONGODB_SERVER = if process.env.NODE_ENV == 'development' then 'localhost' else 'db04'
server = new mongo.Server(MONGODB_SERVER, 27017, MONGODB_PORT = parseInt(process.env.MONGODB_PORT or 27017, 10)
server = new mongo.Server(MONGODB_SERVER, MONGODB_PORT,
auto_reconnect: true auto_reconnect: true
poolSize: 12) poolSize: 12)
db = new mongo.Db('newsblur', server) db = new mongo.Db('newsblur', server)

View file

@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.4.0 // Generated by CoffeeScript 1.4.0
(function() { (function() {
var MONGODB_SERVER, app, db, express, mongo, server, var MONGODB_PORT, MONGODB_SERVER, app, db, express, mongo, server,
_this = this; _this = this;
express = require('express'); express = require('express');
@ -9,7 +9,9 @@
MONGODB_SERVER = process.env.NODE_ENV === 'development' ? 'localhost' : 'db04'; MONGODB_SERVER = process.env.NODE_ENV === 'development' ? 'localhost' : 'db04';
server = new mongo.Server(MONGODB_SERVER, 27017, { MONGODB_PORT = parseInt(process.env.MONGODB_PORT || 27017, 10);
server = new mongo.Server(MONGODB_SERVER, MONGODB_PORT, {
auto_reconnect: true, auto_reconnect: true,
poolSize: 12 poolSize: 12
}); });

View file

@ -469,8 +469,16 @@ MONGO_DB_DEFAULTS = {
'alias': 'default', 'alias': 'default',
} }
MONGO_DB = dict(MONGO_DB_DEFAULTS, **MONGO_DB) MONGO_DB = dict(MONGO_DB_DEFAULTS, **MONGO_DB)
# if MONGO_DB.get('read_preference', pymongo.ReadPreference.PRIMARY) != pymongo.ReadPreference.PRIMARY:
# MONGO_PRIMARY_DB = MONGO_DB.copy()
# MONGO_PRIMARY_DB.update(read_preference=pymongo.ReadPreference.PRIMARY)
# MONGOPRIMARYDB = connect(MONGO_PRIMARY_DB.pop('name'), **MONGO_PRIMARY_DB)
# else:
# MONGOPRIMARYDB = MONGODB
MONGODB = connect(MONGO_DB.pop('name'), **MONGO_DB) MONGODB = connect(MONGO_DB.pop('name'), **MONGO_DB)
MONGO_ANALYTICS_DB_DEFAULTS = { MONGO_ANALYTICS_DB_DEFAULTS = {
'name': 'nbanalytics', 'name': 'nbanalytics',
'host': 'db02:27017', 'host': 'db02:27017',
@ -479,6 +487,7 @@ MONGO_ANALYTICS_DB_DEFAULTS = {
MONGO_ANALYTICS_DB = dict(MONGO_ANALYTICS_DB_DEFAULTS, **MONGO_ANALYTICS_DB) MONGO_ANALYTICS_DB = dict(MONGO_ANALYTICS_DB_DEFAULTS, **MONGO_ANALYTICS_DB)
MONGOANALYTICSDB = connect(MONGO_ANALYTICS_DB.pop('name'), **MONGO_ANALYTICS_DB) MONGOANALYTICSDB = connect(MONGO_ANALYTICS_DB.pop('name'), **MONGO_ANALYTICS_DB)
# ========= # =========
# = Redis = # = Redis =
# ========= # =========

View file

@ -229,11 +229,6 @@ class ProcessFeed:
story_feed_id=self.feed_id story_feed_id=self.feed_id
).limit(max(int(len(story_guids)*1.5), 10))) ).limit(max(int(len(story_guids)*1.5), 10)))
# MStory.objects(
# (Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
# | (Q(story_guid__in=story_guids)),
# story_feed=self.feed
# ).order_by('-story_date')
ret_values = self.feed.add_update_stories(stories, existing_stories, ret_values = self.feed.add_update_stories(stories, existing_stories,
verbose=self.options['verbose']) verbose=self.options['verbose'])
@ -513,28 +508,32 @@ class Dispatcher:
active=True, active=True,
user__profile__last_seen_on__gte=UNREAD_CUTOFF)\ user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
.order_by('-last_read_date') .order_by('-last_read_date')
if not user_subs.count():
return
for sub in user_subs: for sub in user_subs:
if not sub.needs_unread_recalc: if not sub.needs_unread_recalc:
sub.needs_unread_recalc = True sub.needs_unread_recalc = True
sub.save() sub.save()
if self.options['compute_scores']: if self.options['compute_scores']:
stories_db = MStory.objects(story_feed_id=feed.pk, stories = MStory.objects(story_feed_id=feed.pk,
story_date__gte=UNREAD_CUTOFF) story_date__gte=UNREAD_CUTOFF)
stories = Feed.format_stories(stories, feed.pk)
logging.debug(u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % ( logging.debug(u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
feed.title[:30], stories_db.count(), user_subs.count(), feed.title[:30], len(stories), user_subs.count(),
feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers)) feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
self.calculate_feed_scores_with_stories(user_subs, stories_db) self.calculate_feed_scores_with_stories(user_subs, stories)
elif self.options.get('mongodb_replication_lag'): elif self.options.get('mongodb_replication_lag'):
logging.debug(u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % ( logging.debug(u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
feed.title[:30], self.options.get('mongodb_replication_lag'))) feed.title[:30], self.options.get('mongodb_replication_lag')))
@timelimit(10) @timelimit(10)
def calculate_feed_scores_with_stories(self, user_subs, stories_db): def calculate_feed_scores_with_stories(self, user_subs, stories):
for sub in user_subs: for sub in user_subs:
silent = False if self.options['verbose'] >= 2 else True silent = False if self.options['verbose'] >= 2 else True
sub.calculate_feed_scores(silent=silent, stories_db=stories_db) sub.calculate_feed_scores(silent=silent, stories=stories)
def add_jobs(self, feeds_queue, feeds_count=1): def add_jobs(self, feeds_queue, feeds_count=1):
""" adds a feed processing job to the pool """ adds a feed processing job to the pool