NewsBlur/apps/statistics/models.py
Samuel Clay 603ac08faa Merge branch 'master' into social
* master:
  Allow login from signup form.
  Reducing celery prefetch to get more accurate queue sizes.
  Removing RabbitMQ dependency. Moving to redis for task queuing.
  Counting queue size for celery/redis.
  Switching to redis to feed queuing. Also adding a weight for skipping feed fetches under high load.
  Skipping feed resurrection.
  Fixing last update problem that has been choking feed fetchers.
  Fixing last update problem that has been choking feed fetchers.
  Typo in feed pool drain.
  Adding ability to wind down feed queue by faking requests.
  Adding debug message about counting errors.

Conflicts:
	fabfile.py
2012-02-27 13:13:27 -08:00

181 lines
No EOL
7 KiB
Python

import datetime
import mongoengine as mongo
import urllib2
import random
from django.db.models import Avg, Count
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, FeedLoadtime
from apps.profile.models import Profile
from utils import json_functions as json
class MStatistics(mongo.Document):
key = mongo.StringField(unique=True)
value = mongo.StringField()
meta = {
'collection': 'statistics',
'allow_inheritance': False,
'indexes': ['key'],
}
def __unicode__(self):
return "%s: %s" % (self.key, self.value)
@classmethod
def get(cls, key, default=None):
obj = cls.objects.filter(key=key).first()
if not obj:
return default
return obj.value
@classmethod
def set(cls, key, value):
obj, _ = cls.objects.get_or_create(key=key)
obj.value = value
obj.save()
@classmethod
def all(cls):
values = dict([(stat.key, stat.value) for stat in cls.objects.all()])
for key, value in values.items():
if key in ('avg_time_taken', 'sites_loaded'):
values[key] = json.decode(value)
elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded',
'max_sites_loaded'):
values[key] = int(value)
elif key in ('latest_avg_time_taken', 'max_avg_time_taken'):
values[key] = float(value)
return values
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
cls.collect_statistics_feeds_fetched(last_day)
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_premium_users(last_day)
print "Premiums: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_standard_users(last_day)
print "Standard users: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_sites_loaded(last_day)
print "Sites loaded: %s" % (datetime.datetime.now() - now)
@classmethod
def collect_statistics_feeds_fetched(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
last_month = datetime.datetime.now() - datetime.timedelta(days=30)
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count()
cls.objects(key='feeds_fetched').update_one(upsert=True, key='feeds_fetched', value=feeds_fetched)
pages_fetched = MPageFetchHistory.objects.filter(fetch_date__gte=last_day).count()
cls.objects(key='pages_fetched').update_one(upsert=True, key='pages_fetched', value=pages_fetched)
from utils.feed_functions import timelimit, TimeoutError
@timelimit(60)
def delete_old_history():
MFeedFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
MPageFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
MFeedFetchHistory.objects(fetch_date__lt=last_month).delete()
MPageFetchHistory.objects(fetch_date__lt=last_month).delete()
try:
delete_old_history()
except TimeoutError:
print "Timed out on deleting old history. Shit."
return feeds_fetched
@classmethod
def collect_statistics_premium_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, key='premium_users', value=premium_users)
return premium_users
@classmethod
def collect_statistics_standard_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, key='standard_users', value=standard_users)
return standard_users
@classmethod
def collect_statistics_sites_loaded(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
now = datetime.datetime.now()
sites_loaded = []
avg_time_taken = []
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
load_times = FeedLoadtime.objects.filter(
date_accessed__lte=start_hours_ago,
date_accessed__gte=end_hours_ago
).aggregate(**aggregates)
sites_loaded.append(load_times['count'] or 0)
avg_time_taken.append(load_times['avg'] or 0)
sites_loaded.reverse()
avg_time_taken.reverse()
values = (
('sites_loaded', json.encode(sites_loaded)),
('avg_time_taken', json.encode(avg_time_taken)),
('latest_sites_loaded', sites_loaded[-1]),
('latest_avg_time_taken', avg_time_taken[-1]),
('max_sites_loaded', max(sites_loaded)),
('max_avg_time_taken', max(1, max(avg_time_taken))),
)
for key, value in values:
cls.objects(key=key).update_one(upsert=True, key=key, value=value)
class MFeedback(mongo.Document):
date = mongo.StringField()
summary = mongo.StringField()
subject = mongo.StringField()
url = mongo.StringField()
style = mongo.StringField()
order = mongo.IntField()
meta = {
'collection': 'feedback',
'allow_inheritance': False,
'indexes': ['style'],
'ordering': ['order'],
}
def __unicode__(self):
return "%s: (%s) %s" % (self.style, self.date, self.subject)
@classmethod
def collect_feedback(cls):
data = urllib2.urlopen('https://getsatisfaction.com/newsblur/topics.widget').read()
data = json.decode(data[1:-1])
i = 0
if len(data):
cls.objects.delete()
for feedback in data:
feedback['order'] = i
i += 1
for removal in ['about', 'less than']:
if removal in feedback['date']:
feedback['date'] = feedback['date'].replace(removal, '')
for feedback in data:
# Convert unicode to strings.
fb = dict([(str(k), v) for k, v in feedback.items()])
cls.objects.create(**fb)
@classmethod
def all(cls):
feedbacks = cls.objects.all()[:5]
return feedbacks