Adding feed classifiers counts. Will be used to display graphs which show likes and dislikes aggregated across users in the Statistics dialog.

This commit is contained in:
Samuel Clay 2011-04-07 17:00:28 -04:00
parent 1254d350d6
commit 4c16df13eb
3 changed files with 138 additions and 4 deletions

View file

@ -0,0 +1,90 @@
# encoding: utf-8
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'FeedData.feed_classifier_counts'
db.add_column('rss_feeds_feeddata', 'feed_classifier_counts', self.gf('django.db.models.fields.TextField')(null=True, blank=True), keep_default=False)
def backwards(self, orm):
# Deleting field 'FeedData.feed_classifier_counts'
db.delete_column('rss_feeds_feeddata', 'feed_classifier_counts')
models = {
'rss_feeds.duplicatefeed': {
'Meta': {'object_name': 'DuplicateFeed'},
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.feed': {
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'feed_address': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255'}),
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
},
'rss_feeds.feeddata': {
'Meta': {'object_name': 'FeedData'},
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedicon': {
'Meta': {'object_name': 'FeedIcon'},
'color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
'data': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'icon'", 'unique': 'True', 'primary_key': 'True', 'to': "orm['rss_feeds.Feed']"}),
'icon_url': ('django.db.models.fields.CharField', [], {'max_length': '2000', 'null': 'True', 'blank': 'True'}),
'not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'})
},
'rss_feeds.feedloadtime': {
'Meta': {'object_name': 'FeedLoadtime'},
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'loadtime': ('django.db.models.fields.FloatField', [], {})
},
'rss_feeds.feedupdatehistory': {
'Meta': {'object_name': 'FeedUpdateHistory'},
'average_per_feed': ('django.db.models.fields.DecimalField', [], {'max_digits': '4', 'decimal_places': '1'}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'number_of_feeds': ('django.db.models.fields.IntegerField', [], {}),
'seconds_taken': ('django.db.models.fields.IntegerField', [], {})
}
}
complete_apps = ['rss_feeds']

View file

@ -5,7 +5,6 @@ import re
import mongoengine as mongo
import zlib
import urllib
from pprint import pprint
from collections import defaultdict
from operator import itemgetter
from BeautifulSoup import BeautifulStoneSoup
@ -14,7 +13,6 @@ from django.db import models
from django.db import IntegrityError
from django.core.cache import cache
from django.conf import settings
from django.core.mail import mail_admins
from mongoengine.queryset import OperationError
from mongoengine.base import ValidationError
from apps.rss_feeds.tasks import UpdateFeeds
@ -402,6 +400,51 @@ class Feed(models.Model):
self.save()
def save_classifiers_count(self):
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
def calculate_scores(cls, facet):
map_f = """
function() {
emit(this["%s"], {
pos: this.score>0 ? this.score : 0,
neg: this.score<0 ? Math.abs(this.score) : 0
});
}
""" % (facet)
reduce_f = """
function(key, values) {
var result = {pos: 0, neg: 0};
values.forEach(function(value) {
result.pos += value.pos;
result.neg += value.neg;
});
return result;
}
"""
scores = {}
res = cls.objects(feed_id=self.pk).map_reduce(map_f, reduce_f, keep_temp=False)
for r in res:
scores[r.key] = dict([(k, int(v)) for k,v in r.value.iteritems()])
return scores
scores = {}
for cls, facet in [(MClassifierTitle, 'title'),
(MClassifierAuthor, 'author'),
(MClassifierTag, 'tag'),
(MClassifierFeed, 'feed_id')]:
scores[facet] = calculate_scores(cls, facet)
if facet == 'feed_id' and scores[facet].values():
scores['feed'] = scores[facet].values()[0]
del scores['feed_id']
elif not scores[facet]:
del scores[facet]
if scores:
self.data.feed_classifier_counts = json.encode(scores)
self.data.save()
def update(self, force=False, single_threaded=True, compute_scores=True):
from utils import feed_fetcher
try:
@ -806,6 +849,7 @@ class FeedData(models.Model):
feed = AutoOneToOneField(Feed, related_name='data')
feed_tagline = models.CharField(max_length=1024, blank=True, null=True)
story_count_history = models.TextField(blank=True, null=True)
feed_classifier_counts = models.TextField(blank=True, null=True)
popular_tags = models.CharField(max_length=1024, blank=True, null=True)
popular_authors = models.CharField(max_length=2048, blank=True, null=True)

4
fabfile.py vendored
View file

@ -45,10 +45,10 @@ def deploy():
run('kill -HUP `cat logs/gunicorn.pid`')
run('curl -s http://www.newsblur.com > /dev/null')
with cd('media/js'):
run('rm *.gz')
run('rm -f *.gz')
run('for js in *-compressed-*.js; do gzip -9 $js -c > $js.gz; done;')
with cd('media/css'):
run('rm *.gz')
run('rm -f *.gz')
run('for css in *-compressed-*.css; do gzip -9 $css -c > $css.gz; done;')
@roles('web')