2011-04-15 11:34:41 -04:00
|
|
|
import datetime
|
|
|
|
import mongoengine as mongo
|
2011-05-07 17:22:41 -04:00
|
|
|
import urllib2
|
2011-04-16 16:21:00 -04:00
|
|
|
from django.db.models import Avg, Count
|
2012-08-16 23:33:48 -07:00
|
|
|
from django.conf import settings
|
2012-04-12 13:37:19 -07:00
|
|
|
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, MFeedPushHistory
|
|
|
|
from apps.rss_feeds.models import FeedLoadtime
|
|
|
|
from apps.social.models import MSharedStory
|
2011-04-15 11:34:41 -04:00
|
|
|
from apps.profile.models import Profile
|
2011-04-16 16:21:00 -04:00
|
|
|
from utils import json_functions as json
|
2012-08-16 23:33:48 -07:00
|
|
|
from utils import db_functions
|
2011-04-18 09:09:57 -04:00
|
|
|
|
2011-04-15 11:34:41 -04:00
|
|
|
class MStatistics(mongo.Document):
|
|
|
|
key = mongo.StringField(unique=True)
|
2012-07-25 14:03:15 -07:00
|
|
|
value = mongo.DynamicField()
|
2011-04-15 11:34:41 -04:00
|
|
|
|
|
|
|
meta = {
|
|
|
|
'collection': 'statistics',
|
|
|
|
'allow_inheritance': False,
|
|
|
|
'indexes': ['key'],
|
|
|
|
}
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return "%s: %s" % (self.key, self.value)
|
|
|
|
|
2012-02-24 16:43:08 -08:00
|
|
|
@classmethod
|
|
|
|
def get(cls, key, default=None):
|
|
|
|
obj = cls.objects.filter(key=key).first()
|
|
|
|
if not obj:
|
|
|
|
return default
|
|
|
|
return obj.value
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def set(cls, key, value):
|
|
|
|
obj, _ = cls.objects.get_or_create(key=key)
|
|
|
|
obj.value = value
|
|
|
|
obj.save()
|
|
|
|
|
2011-04-15 11:34:41 -04:00
|
|
|
@classmethod
|
|
|
|
def all(cls):
|
2012-07-25 14:03:15 -07:00
|
|
|
stats = cls.objects.all()
|
|
|
|
values = dict([(stat.key, stat.value) for stat in stats])
|
2011-04-16 16:21:00 -04:00
|
|
|
for key, value in values.items():
|
2012-04-12 13:37:19 -07:00
|
|
|
if key in ('avg_time_taken', 'sites_loaded', 'stories_shared'):
|
2011-04-16 16:21:00 -04:00
|
|
|
values[key] = json.decode(value)
|
|
|
|
elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded',
|
2012-04-12 13:37:19 -07:00
|
|
|
'max_sites_loaded', 'max_stories_shared'):
|
2011-04-16 16:21:00 -04:00
|
|
|
values[key] = int(value)
|
|
|
|
elif key in ('latest_avg_time_taken', 'max_avg_time_taken'):
|
|
|
|
values[key] = float(value)
|
2012-07-17 12:07:26 -07:00
|
|
|
|
|
|
|
values['total_sites_loaded'] = sum(values['sites_loaded']) if 'sites_loaded' in values else 0
|
|
|
|
values['total_stories_shared'] = sum(values['stories_shared']) if 'stories_shared' in values else 0
|
2012-05-11 09:17:58 -07:00
|
|
|
|
2011-04-16 16:21:00 -04:00
|
|
|
return values
|
2011-04-15 11:34:41 -04:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics(cls):
|
2011-11-01 19:00:58 -07:00
|
|
|
now = datetime.datetime.now()
|
2011-04-15 11:34:41 -04:00
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
2011-04-18 09:09:57 -04:00
|
|
|
cls.collect_statistics_feeds_fetched(last_day)
|
2011-11-01 19:14:18 -07:00
|
|
|
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
|
2011-04-18 09:09:57 -04:00
|
|
|
cls.collect_statistics_premium_users(last_day)
|
2011-11-01 19:14:18 -07:00
|
|
|
print "Premiums: %s" % (datetime.datetime.now() - now)
|
2011-04-18 09:09:57 -04:00
|
|
|
cls.collect_statistics_standard_users(last_day)
|
2011-11-01 19:14:18 -07:00
|
|
|
print "Standard users: %s" % (datetime.datetime.now() - now)
|
2011-04-18 09:09:57 -04:00
|
|
|
cls.collect_statistics_sites_loaded(last_day)
|
2011-11-01 19:14:18 -07:00
|
|
|
print "Sites loaded: %s" % (datetime.datetime.now() - now)
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.collect_statistics_stories_shared(last_day)
|
|
|
|
print "Stories shared: %s" % (datetime.datetime.now() - now)
|
2012-08-17 00:00:02 -07:00
|
|
|
cls.collect_statistics_for_db()
|
|
|
|
print "DB Stats: %s" % (datetime.datetime.now() - now)
|
2011-04-18 09:09:57 -04:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics_feeds_fetched(cls, last_day=None):
|
|
|
|
if not last_day:
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
2012-02-24 10:30:39 -08:00
|
|
|
last_month = datetime.datetime.now() - datetime.timedelta(days=30)
|
2011-04-18 09:09:57 -04:00
|
|
|
|
2012-02-24 15:07:34 -08:00
|
|
|
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count()
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.objects(key='feeds_fetched').update_one(upsert=True, set__key='feeds_fetched', set__value=feeds_fetched)
|
2012-02-24 15:07:34 -08:00
|
|
|
pages_fetched = MPageFetchHistory.objects.filter(fetch_date__gte=last_day).count()
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.objects(key='pages_fetched').update_one(upsert=True, set__key='pages_fetched', set__value=pages_fetched)
|
|
|
|
feeds_pushed = MFeedPushHistory.objects.filter(push_date__gte=last_day).count()
|
|
|
|
cls.objects(key='feeds_pushed').update_one(upsert=True, set__key='feeds_pushed', set__value=feeds_pushed)
|
2011-04-15 11:34:41 -04:00
|
|
|
|
2011-11-01 21:20:07 -07:00
|
|
|
from utils.feed_functions import timelimit, TimeoutError
|
2011-11-01 19:14:18 -07:00
|
|
|
@timelimit(60)
|
|
|
|
def delete_old_history():
|
2012-02-02 07:46:04 -08:00
|
|
|
MFeedFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
|
|
|
|
MPageFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
|
2012-02-24 10:30:39 -08:00
|
|
|
MFeedFetchHistory.objects(fetch_date__lt=last_month).delete()
|
|
|
|
MPageFetchHistory.objects(fetch_date__lt=last_month).delete()
|
2012-04-12 13:37:19 -07:00
|
|
|
MFeedPushHistory.objects(push_date__lt=last_month).delete()
|
2011-11-01 21:20:07 -07:00
|
|
|
try:
|
|
|
|
delete_old_history()
|
|
|
|
except TimeoutError:
|
|
|
|
print "Timed out on deleting old history. Shit."
|
2011-05-01 19:58:40 -04:00
|
|
|
|
2011-04-18 09:09:57 -04:00
|
|
|
return feeds_fetched
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics_premium_users(cls, last_day=None):
|
|
|
|
if not last_day:
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
|
|
|
|
2011-04-15 11:34:41 -04:00
|
|
|
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users)
|
2011-04-15 11:34:41 -04:00
|
|
|
|
2011-04-18 09:09:57 -04:00
|
|
|
return premium_users
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics_standard_users(cls, last_day=None):
|
|
|
|
if not last_day:
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
|
|
|
|
2011-04-15 11:34:41 -04:00
|
|
|
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users)
|
2011-04-18 09:09:57 -04:00
|
|
|
|
|
|
|
return standard_users
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics_sites_loaded(cls, last_day=None):
|
|
|
|
if not last_day:
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
2011-04-16 16:21:00 -04:00
|
|
|
now = datetime.datetime.now()
|
|
|
|
sites_loaded = []
|
|
|
|
avg_time_taken = []
|
2011-04-18 09:09:57 -04:00
|
|
|
|
2011-04-16 16:21:00 -04:00
|
|
|
for hour in range(24):
|
|
|
|
start_hours_ago = now - datetime.timedelta(hours=hour)
|
|
|
|
end_hours_ago = now - datetime.timedelta(hours=hour+1)
|
|
|
|
aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
|
|
|
|
load_times = FeedLoadtime.objects.filter(
|
|
|
|
date_accessed__lte=start_hours_ago,
|
|
|
|
date_accessed__gte=end_hours_ago
|
|
|
|
).aggregate(**aggregates)
|
|
|
|
sites_loaded.append(load_times['count'] or 0)
|
|
|
|
avg_time_taken.append(load_times['avg'] or 0)
|
|
|
|
sites_loaded.reverse()
|
|
|
|
avg_time_taken.reverse()
|
2011-04-18 09:09:57 -04:00
|
|
|
|
|
|
|
values = (
|
|
|
|
('sites_loaded', json.encode(sites_loaded)),
|
|
|
|
('avg_time_taken', json.encode(avg_time_taken)),
|
|
|
|
('latest_sites_loaded', sites_loaded[-1]),
|
|
|
|
('latest_avg_time_taken', avg_time_taken[-1]),
|
|
|
|
('max_sites_loaded', max(sites_loaded)),
|
|
|
|
('max_avg_time_taken', max(1, max(avg_time_taken))),
|
|
|
|
)
|
|
|
|
for key, value in values:
|
2012-04-12 13:37:19 -07:00
|
|
|
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_statistics_stories_shared(cls, last_day=None):
|
|
|
|
if not last_day:
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
stories_shared = []
|
|
|
|
|
|
|
|
for hour in range(24):
|
|
|
|
start_hours_ago = now - datetime.timedelta(hours=hour)
|
|
|
|
end_hours_ago = now - datetime.timedelta(hours=hour+1)
|
|
|
|
shares = MSharedStory.objects.filter(
|
|
|
|
shared_date__lte=start_hours_ago,
|
|
|
|
shared_date__gte=end_hours_ago
|
|
|
|
).count()
|
|
|
|
stories_shared.append(shares)
|
|
|
|
|
|
|
|
stories_shared.reverse()
|
|
|
|
|
|
|
|
values = (
|
|
|
|
('stories_shared', json.encode(stories_shared)),
|
|
|
|
('latest_stories_shared', stories_shared[-1]),
|
|
|
|
('max_stories_shared', max(stories_shared)),
|
|
|
|
)
|
|
|
|
for key, value in values:
|
|
|
|
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
|
2012-07-02 10:15:17 -07:00
|
|
|
|
2012-08-16 23:33:48 -07:00
|
|
|
@classmethod
|
|
|
|
def collect_statistics_for_db(cls):
|
|
|
|
lag = db_functions.mongo_max_replication_lag(settings.MONGODB)
|
|
|
|
cls.set('mongodb_replication_lag', lag)
|
|
|
|
|
2012-07-02 10:15:17 -07:00
|
|
|
@classmethod
|
|
|
|
def delete_old_stats(cls):
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
old_age = now - datetime.timedelta(days=7)
|
|
|
|
FeedLoadtime.objects.filter(date_accessed__lte=old_age).delete()
|
2011-05-07 17:22:41 -04:00
|
|
|
|
|
|
|
class MFeedback(mongo.Document):
|
|
|
|
date = mongo.StringField()
|
|
|
|
summary = mongo.StringField()
|
|
|
|
subject = mongo.StringField()
|
|
|
|
url = mongo.StringField()
|
|
|
|
style = mongo.StringField()
|
2011-06-15 11:21:55 -04:00
|
|
|
order = mongo.IntField()
|
2011-05-07 17:22:41 -04:00
|
|
|
|
|
|
|
meta = {
|
|
|
|
'collection': 'feedback',
|
|
|
|
'allow_inheritance': False,
|
|
|
|
'indexes': ['style'],
|
2011-06-15 11:21:55 -04:00
|
|
|
'ordering': ['order'],
|
2011-05-07 17:22:41 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return "%s: (%s) %s" % (self.style, self.date, self.subject)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def collect_feedback(cls):
|
|
|
|
data = urllib2.urlopen('https://getsatisfaction.com/newsblur/topics.widget').read()
|
|
|
|
data = json.decode(data[1:-1])
|
2011-06-15 11:21:55 -04:00
|
|
|
i = 0
|
2011-05-07 17:22:41 -04:00
|
|
|
if len(data):
|
|
|
|
cls.objects.delete()
|
|
|
|
for feedback in data:
|
2011-06-15 11:21:55 -04:00
|
|
|
feedback['order'] = i
|
|
|
|
i += 1
|
2011-05-07 18:52:11 -04:00
|
|
|
for removal in ['about', 'less than']:
|
|
|
|
if removal in feedback['date']:
|
|
|
|
feedback['date'] = feedback['date'].replace(removal, '')
|
2011-07-17 20:53:30 -07:00
|
|
|
for feedback in data:
|
|
|
|
# Convert unicode to strings.
|
|
|
|
fb = dict([(str(k), v) for k, v in feedback.items()])
|
|
|
|
cls.objects.create(**fb)
|
2011-05-07 17:22:41 -04:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def all(cls):
|
2012-04-19 22:38:00 -07:00
|
|
|
feedbacks = cls.objects.all()[:4]
|
2011-05-07 17:22:41 -04:00
|
|
|
|
2012-09-06 17:16:01 -07:00
|
|
|
return feedbacks
|
|
|
|
|
|
|
|
class MAnalyticsPageLoad(mongo.Document):
|
|
|
|
date = mongo.DateTimeField(default=datetime.datetime.now)
|
|
|
|
username = mongo.StringField()
|
|
|
|
user_id = mongo.IntField()
|
|
|
|
is_premium = mongo.BooleanField()
|
|
|
|
platform = mongo.StringField()
|
|
|
|
path = mongo.StringField()
|
|
|
|
duration = mongo.FloatField()
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
'db_alias': 'nbanalytics',
|
|
|
|
'collection': 'page_loads',
|
|
|
|
'allow_inheritance': False,
|
|
|
|
'indexes': ['path', 'date', 'platform', 'user_id'],
|
|
|
|
'ordering': ['date'],
|
|
|
|
}
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return "%s / %s: (%.4s) %s" % (self.username, self.platform, self.duration, self.path)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def add(cls, user, is_premium, platform, path, duration):
|
|
|
|
if user.is_anonymous():
|
|
|
|
username = None
|
|
|
|
user_id = 0
|
|
|
|
else:
|
|
|
|
username = user.username
|
|
|
|
user_id = user.pk
|
|
|
|
|
|
|
|
path = cls.clean_path(path)
|
|
|
|
|
|
|
|
cls.objects.create(username=username, user_id=user_id, is_premium=is_premium,
|
|
|
|
platform=platform, path=path, duration=duration)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def clean_path(cls, path):
|
|
|
|
if path and path.startswith('/reader/feed/'):
|
|
|
|
path = '/reader/feed/'
|
|
|
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def fetch_stats(cls, stat_key, stat_value):
|
|
|
|
stats = cls.objects.filter(**{stat_key: stat_value})
|
|
|
|
return cls.calculate_stats(stats)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def calculate_stats(cls, stats):
|
|
|
|
return cls.aggregate(**stats)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def clean(cls, days=1):
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(days=days)
|
|
|
|
|
|
|
|
from utils.feed_functions import timelimit, TimeoutError
|
|
|
|
@timelimit(60)
|
|
|
|
def delete_old_history():
|
|
|
|
cls.objects(date__lte=last_day).delete()
|
|
|
|
cls.objects(date__lte=last_day).delete()
|
|
|
|
try:
|
|
|
|
delete_old_history()
|
|
|
|
except TimeoutError:
|
|
|
|
print "Timed out on deleting old history. Shit."
|
|
|
|
|
|
|
|
|
|
|
|
class MAnalyticsFetcher(mongo.Document):
|
|
|
|
date = mongo.DateTimeField(default=datetime.datetime.now)
|
|
|
|
feed_id = mongo.IntField()
|
|
|
|
feed_fetch_duration = mongo.FloatField()
|
|
|
|
feed_process_duration = mongo.FloatField()
|
|
|
|
page_duration = mongo.FloatField()
|
|
|
|
icon_duration = mongo.FloatField()
|
|
|
|
total_duration = mongo.FloatField()
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
'db_alias': 'nbanalytics',
|
|
|
|
'collection': 'feed_fetches',
|
|
|
|
'allow_inheritance': False,
|
|
|
|
'indexes': ['date', 'feed_id'],
|
|
|
|
'ordering': ['date'],
|
|
|
|
}
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return "%s: %.4s+%.4s+%.4s+%.4s = %.4ss" % (self.feed_id, self.feed_fetch_duration,
|
|
|
|
self.feed_process_duration,
|
|
|
|
self.page_duration,
|
|
|
|
self.icon_duration,
|
|
|
|
self.total_duration)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def add(cls, feed_id, feed_fetch_duration, feed_process_duration,
|
|
|
|
page_duration, icon_duration, total_duration):
|
|
|
|
if icon_duration and page_duration:
|
|
|
|
icon_duration -= page_duration
|
|
|
|
if page_duration and feed_process_duration:
|
|
|
|
page_duration -= feed_process_duration
|
|
|
|
if feed_process_duration and feed_fetch_duration:
|
|
|
|
feed_process_duration -= feed_fetch_duration
|
|
|
|
|
|
|
|
cls.objects.create(feed_id=feed_id, feed_fetch_duration=feed_fetch_duration,
|
|
|
|
feed_process_duration=feed_process_duration,
|
|
|
|
page_duration=page_duration, icon_duration=icon_duration,
|
|
|
|
total_duration=total_duration)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def calculate_stats(cls, stats):
|
|
|
|
return cls.aggregate(**stats)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def clean(cls, days=1):
|
|
|
|
last_day = datetime.datetime.now() - datetime.timedelta(days=days)
|
|
|
|
|
|
|
|
from utils.feed_functions import timelimit, TimeoutError
|
|
|
|
@timelimit(60)
|
|
|
|
def delete_old_history():
|
|
|
|
cls.objects(date__lte=last_day).delete()
|
|
|
|
cls.objects(date__lte=last_day).delete()
|
|
|
|
try:
|
|
|
|
delete_old_history()
|
|
|
|
except TimeoutError:
|
|
|
|
print "Timed out on deleting old history. Shit."
|
|
|
|
|