NewsBlur-viq/apps/statistics/models.py
Samuel Clay 408cb7e801 Merge branch 'master' into social
* master:
  Fixing starred stories from not loading past the second page due to a missing flag reset.
  Fixing user-found bug around tooltips continuing to show if the original dom element (and the reference to the tooltip) are destroyed too early, leaving a tooltip floating in the air.
  Geometrically delaying the fetching of bad feeds, while they're still somewhat fresh.
  Fixing error codes on statistics. Also fixing simple error on invalid feed.
  Update media/js/newsblur/reader.js

Conflicts:
	media/js/newsblur/reader/reader.js
2012-02-24 13:01:48 -08:00

168 lines
No EOL
6.7 KiB
Python

import datetime
import mongoengine as mongo
import urllib2
import random
from django.db.models import Avg, Count
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, FeedLoadtime
from apps.profile.models import Profile
from utils import json_functions as json
class MStatistics(mongo.Document):
key = mongo.StringField(unique=True)
value = mongo.StringField()
meta = {
'collection': 'statistics',
'allow_inheritance': False,
'indexes': ['key'],
}
def __unicode__(self):
return "%s: %s" % (self.key, self.value)
@classmethod
def all(cls):
values = dict([(stat.key, stat.value) for stat in cls.objects.all()])
for key, value in values.items():
if key in ('avg_time_taken', 'sites_loaded'):
values[key] = json.decode(value)
elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded',
'max_sites_loaded'):
values[key] = int(value)
elif key in ('latest_avg_time_taken', 'max_avg_time_taken'):
values[key] = float(value)
return values
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
cls.collect_statistics_feeds_fetched(last_day)
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_premium_users(last_day)
print "Premiums: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_standard_users(last_day)
print "Standard users: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_sites_loaded(last_day)
print "Sites loaded: %s" % (datetime.datetime.now() - now)
@classmethod
def collect_statistics_feeds_fetched(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
last_month = datetime.datetime.now() - datetime.timedelta(days=30)
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__lt=last_day).count()
cls.objects(key='feeds_fetched').update_one(upsert=True, key='feeds_fetched', value=feeds_fetched)
pages_fetched = MPageFetchHistory.objects.filter(fetch_date__lt=last_day).count()
cls.objects(key='pages_fetched').update_one(upsert=True, key='pages_fetched', value=pages_fetched)
from utils.feed_functions import timelimit, TimeoutError
@timelimit(60)
def delete_old_history():
MFeedFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
MPageFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
MFeedFetchHistory.objects(fetch_date__lt=last_month).delete()
MPageFetchHistory.objects(fetch_date__lt=last_month).delete()
try:
delete_old_history()
except TimeoutError:
print "Timed out on deleting old history. Shit."
return feeds_fetched
@classmethod
def collect_statistics_premium_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, key='premium_users', value=premium_users)
return premium_users
@classmethod
def collect_statistics_standard_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, key='standard_users', value=standard_users)
return standard_users
@classmethod
def collect_statistics_sites_loaded(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
now = datetime.datetime.now()
sites_loaded = []
avg_time_taken = []
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
load_times = FeedLoadtime.objects.filter(
date_accessed__lte=start_hours_ago,
date_accessed__gte=end_hours_ago
).aggregate(**aggregates)
sites_loaded.append(load_times['count'] or 0)
avg_time_taken.append(load_times['avg'] or 0)
sites_loaded.reverse()
avg_time_taken.reverse()
values = (
('sites_loaded', json.encode(sites_loaded)),
('avg_time_taken', json.encode(avg_time_taken)),
('latest_sites_loaded', sites_loaded[-1]),
('latest_avg_time_taken', avg_time_taken[-1]),
('max_sites_loaded', max(sites_loaded)),
('max_avg_time_taken', max(1, max(avg_time_taken))),
)
for key, value in values:
cls.objects(key=key).update_one(upsert=True, key=key, value=value)
class MFeedback(mongo.Document):
date = mongo.StringField()
summary = mongo.StringField()
subject = mongo.StringField()
url = mongo.StringField()
style = mongo.StringField()
order = mongo.IntField()
meta = {
'collection': 'feedback',
'allow_inheritance': False,
'indexes': ['style'],
'ordering': ['order'],
}
def __unicode__(self):
return "%s: (%s) %s" % (self.style, self.date, self.subject)
@classmethod
def collect_feedback(cls):
data = urllib2.urlopen('https://getsatisfaction.com/newsblur/topics.widget').read()
data = json.decode(data[1:-1])
i = 0
if len(data):
cls.objects.delete()
for feedback in data:
feedback['order'] = i
i += 1
for removal in ['about', 'less than']:
if removal in feedback['date']:
feedback['date'] = feedback['date'].replace(removal, '')
for feedback in data:
# Convert unicode to strings.
fb = dict([(str(k), v) for k, v in feedback.items()])
cls.objects.create(**fb)
@classmethod
def all(cls):
feedbacks = cls.objects.all()[:5]
return feedbacks