Deprecating FeedLoadtime and moving to MongoDB-backed aggregate data for feed load time graphs.

This commit is contained in:
Samuel Clay 2012-09-27 15:21:38 -07:00
parent 2461b2f83a
commit f9ed7fdd26
8 changed files with 103 additions and 57 deletions

2
.gitignore vendored
View file

@ -4,6 +4,8 @@ logs/*.pid
*.pyc
static/*
local_settings.py
celerybeat-schedule
celerybeat.pid
media/iphone/NewsBlur/build
media/iphone/build
build/

View file

@ -41,7 +41,6 @@ class CollectStats(Task):
def run(self, **kwargs):
logging.debug(" ---> Collecting stats...")
MStatistics.collect_statistics()
MStatistics.delete_old_stats()
class CollectFeedback(Task):

View file

@ -30,7 +30,7 @@ from apps.reader.forms import SignupForm, LoginForm, FeatureForm
from apps.rss_feeds.models import MFeedIcon
from apps.statistics.models import MStatistics
try:
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory, FeedLoadtime
from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory
except:
pass
from apps.social.models import MSharedStory, MSocialProfile, MSocialServices
@ -542,7 +542,6 @@ def load_single_feed(request, feed_id):
if timediff > 0.50 else "")
logging.user(request, "~FYLoading feed: ~SB%s%s (%s/%s) %s" % (
feed.feed_title[:22], ('~SN/p%s' % page) if page > 1 else '', order, read_filter, time_breakdown))
FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
data = dict(stories=stories,
user_profiles=user_profiles,

View file

@ -8,5 +8,4 @@ class Command(BaseCommand):
def handle(self, *args, **options):
MStatistics.collect_statistics()
MStatistics.delete_old_stats()

View file

@ -1,10 +1,8 @@
import datetime
import mongoengine as mongo
import urllib2
from django.db.models import Avg, Count
from django.conf import settings
from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, MFeedPushHistory
from apps.rss_feeds.models import FeedLoadtime
from apps.social.models import MSharedStory
from apps.profile.models import Profile
from utils import json_functions as json
@ -57,24 +55,22 @@ class MStatistics(mongo.Document):
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
cls.collect_statistics_feeds_fetched(last_day)
cls.collect_statistics_feeds_fetched()
print "Feeds Fetched: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_premium_users(last_day)
cls.collect_statistics_premium_users()
print "Premiums: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_standard_users(last_day)
cls.collect_statistics_standard_users()
print "Standard users: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_sites_loaded(last_day)
cls.collect_statistics_sites_loaded()
print "Sites loaded: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_stories_shared(last_day)
cls.collect_statistics_stories_shared()
print "Stories shared: %s" % (datetime.datetime.now() - now)
cls.collect_statistics_for_db()
print "DB Stats: %s" % (datetime.datetime.now() - now)
@classmethod
def collect_statistics_feeds_fetched(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_feeds_fetched(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
last_month = datetime.datetime.now() - datetime.timedelta(days=30)
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count()
@ -100,19 +96,17 @@ class MStatistics(mongo.Document):
return feeds_fetched
@classmethod
def collect_statistics_premium_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_premium_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users)
return premium_users
@classmethod
def collect_statistics_standard_users(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_standard_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users)
@ -120,9 +114,7 @@ class MStatistics(mongo.Document):
return standard_users
@classmethod
def collect_statistics_sites_loaded(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_sites_loaded(cls):
now = datetime.datetime.now()
sites_loaded = []
avg_time_taken = []
@ -130,13 +122,39 @@ class MStatistics(mongo.Document):
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime'))
load_times = FeedLoadtime.objects.filter(
date_accessed__lte=start_hours_ago,
date_accessed__gte=end_hours_ago
).aggregate(**aggregates)
sites_loaded.append(load_times['count'] or 0)
avg_time_taken.append(load_times['avg'] or 0)
load_times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
"$match": {
"date": {
"$gte": end_hours_ago,
"$lte": start_hours_ago,
},
"path": {
"$in": [
"/reader/feed/",
"/social/stories/",
"/reader/river_stories/",
"/social/river_stories/",
]
}
},
}, {
"$group": {
"_id" : 1,
"count" : {"$sum": 1},
"avg" : {"$avg": "$duration"},
},
}])
count = 0
avg = 0
if load_times['result']:
count = load_times['result'][0]['count']
avg = load_times['result'][0]['avg']
sites_loaded.append(count)
avg_time_taken.append(avg)
sites_loaded.reverse()
avg_time_taken.reverse()
@ -152,9 +170,7 @@ class MStatistics(mongo.Document):
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
@classmethod
def collect_statistics_stories_shared(cls, last_day=None):
if not last_day:
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
def collect_statistics_stories_shared(cls):
now = datetime.datetime.now()
stories_shared = []
@ -182,11 +198,6 @@ class MStatistics(mongo.Document):
lag = db_functions.mongo_max_replication_lag(settings.MONGODB)
cls.set('mongodb_replication_lag', lag)
@classmethod
def delete_old_stats(cls):
now = datetime.datetime.now()
old_age = now - datetime.timedelta(days=7)
FeedLoadtime.objects.filter(date_accessed__lte=old_age).delete()
class MFeedback(mongo.Document):
date = mongo.StringField()

View file

@ -9,7 +9,7 @@
# 199.15.253.226 db03 db03.newsblur.com
199.15.249.98 db04 db04.newsblur.com
199.15.249.99 db05 db05.newsblur.com
# 199.15.249.100 db06 db06.newsblur.com
199.15.249.101 db07 db07.newsblur.com
199.15.250.231 task01 task01.newsblur.com
199.15.250.250 task02 task02.newsblur.com
@ -18,6 +18,7 @@
199.15.252.106 task05 task05.newsblur.com
199.15.252.107 task06 task06.newsblur.com
199.15.252.108 task07 task07.newsblur.com
# EC2
23.20.165.187 db10 db10.newsblur.com
199.15.251.144 task08 task08.newsblur.com
199.15.251.154 task09 task09.newsblur.com
199.15.251.137 task10 task10.newsblur.com
199.15.251.155 task11 task11.newsblur.com

View file

@ -1,8 +1,14 @@
import pymongo
PRIMARY_STATE = 1
SECONDARY_STATE = 2
def mongo_max_replication_lag(connection):
status = connection.admin.command('replSetGetStatus')
try:
status = connection.admin.command('replSetGetStatus')
except pymongo.errors.OperationFailure:
return 0
members = status['members']
primary_optime = None
oldest_secondary_optime = None

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python
from utils.munin.base import MuninGraph
from django.conf import settings
import datetime
class NBMuninGraph(MuninGraph):
@ -17,18 +18,46 @@ class NBMuninGraph(MuninGraph):
}
def calculate_metrics(self):
from django.db.models import Avg, Min, Max, Count
import datetime
from apps.rss_feeds.models import FeedLoadtime
hour_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=60)
averages = dict(avg=Avg('loadtime'), max=Max('loadtime'), min=Min('loadtime'), count=Count('loadtime'))
hour = FeedLoadtime.objects.filter(date_accessed__gte=hour_ago).aggregate(**averages)
times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{
"$match": {
"date": {
"$gte": hour_ago,
},
"path": {
"$in": [
"/reader/feed/",
"/social/stories/",
"/reader/river_stories/",
"/social/river_stories/",
]
}
},
}, {
"$group": {
"_id" : 1,
"count" : {"$sum": 1},
"avg" : {"$avg": "$duration"},
"min" : {"$min": "$duration"},
"max" : {"$max": "$duration"},
},
}])
load_avg = 0
load_min = 0
load_max = 0
load_count = 0
if times['result']:
load_avg = times['result'][0]['avg']
load_min = times['result'][0]['min']
load_max = times['result'][0]['max']
load_count = times['result'][0]['count']
return {
'feed_loadtimes_avg_hour': hour['avg'],
'feed_loadtimes_min_hour': hour['min'],
'feed_loadtimes_max_hour': hour['max'],
'feeds_loaded_hour': hour['count'],
'feed_loadtimes_avg_hour': load_avg,
'feed_loadtimes_min_hour': load_min,
'feed_loadtimes_max_hour': load_max,
'feeds_loaded_hour': load_count,
}
if __name__ == '__main__':