diff --git a/.gitignore b/.gitignore index 3569ddaee..61a96d369 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ logs/*.pid *.pyc static/* local_settings.py +celerybeat-schedule +celerybeat.pid media/iphone/NewsBlur/build media/iphone/build build/ diff --git a/apps/reader/tasks.py b/apps/reader/tasks.py index dd0bcceb6..d6982f5d4 100644 --- a/apps/reader/tasks.py +++ b/apps/reader/tasks.py @@ -41,7 +41,6 @@ class CollectStats(Task): def run(self, **kwargs): logging.debug(" ---> Collecting stats...") MStatistics.collect_statistics() - MStatistics.delete_old_stats() class CollectFeedback(Task): diff --git a/apps/reader/views.py b/apps/reader/views.py index 6bb4fa58c..ed976569a 100644 --- a/apps/reader/views.py +++ b/apps/reader/views.py @@ -30,7 +30,7 @@ from apps.reader.forms import SignupForm, LoginForm, FeatureForm from apps.rss_feeds.models import MFeedIcon from apps.statistics.models import MStatistics try: - from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory, FeedLoadtime + from apps.rss_feeds.models import Feed, MFeedPage, DuplicateFeed, MStory, MStarredStory except: pass from apps.social.models import MSharedStory, MSocialProfile, MSocialServices @@ -542,7 +542,6 @@ def load_single_feed(request, feed_id): if timediff > 0.50 else "") logging.user(request, "~FYLoading feed: ~SB%s%s (%s/%s) %s" % ( feed.feed_title[:22], ('~SN/p%s' % page) if page > 1 else '', order, read_filter, time_breakdown)) - FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict(stories=stories, user_profiles=user_profiles, diff --git a/apps/statistics/management/commands/collect_stats.py b/apps/statistics/management/commands/collect_stats.py index d16c11967..f70e34e21 100644 --- a/apps/statistics/management/commands/collect_stats.py +++ b/apps/statistics/management/commands/collect_stats.py @@ -8,5 +8,4 @@ class Command(BaseCommand): def handle(self, *args, **options): MStatistics.collect_statistics() - - MStatistics.delete_old_stats() \ No newline at end of file + \ No newline at end of file diff --git a/apps/statistics/models.py b/apps/statistics/models.py index 50bca12d0..21e3f2673 100644 --- a/apps/statistics/models.py +++ b/apps/statistics/models.py @@ -1,10 +1,8 @@ import datetime import mongoengine as mongo import urllib2 -from django.db.models import Avg, Count from django.conf import settings from apps.rss_feeds.models import MFeedFetchHistory, MPageFetchHistory, MFeedPushHistory -from apps.rss_feeds.models import FeedLoadtime from apps.social.models import MSharedStory from apps.profile.models import Profile from utils import json_functions as json @@ -57,24 +55,22 @@ class MStatistics(mongo.Document): @classmethod def collect_statistics(cls): now = datetime.datetime.now() - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) - cls.collect_statistics_feeds_fetched(last_day) + cls.collect_statistics_feeds_fetched() print "Feeds Fetched: %s" % (datetime.datetime.now() - now) - cls.collect_statistics_premium_users(last_day) + cls.collect_statistics_premium_users() print "Premiums: %s" % (datetime.datetime.now() - now) - cls.collect_statistics_standard_users(last_day) + cls.collect_statistics_standard_users() print "Standard users: %s" % (datetime.datetime.now() - now) - cls.collect_statistics_sites_loaded(last_day) + cls.collect_statistics_sites_loaded() print "Sites loaded: %s" % (datetime.datetime.now() - now) - cls.collect_statistics_stories_shared(last_day) + cls.collect_statistics_stories_shared() print "Stories shared: %s" % (datetime.datetime.now() - now) cls.collect_statistics_for_db() print "DB Stats: %s" % (datetime.datetime.now() - now) @classmethod - def collect_statistics_feeds_fetched(cls, last_day=None): - if not last_day: - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) + def collect_statistics_feeds_fetched(cls): + last_day = datetime.datetime.now() - datetime.timedelta(hours=24) last_month = datetime.datetime.now() - datetime.timedelta(days=30) feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__gte=last_day).count() @@ -100,19 +96,17 @@ class MStatistics(mongo.Document): return feeds_fetched @classmethod - def collect_statistics_premium_users(cls, last_day=None): - if not last_day: - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) - + def collect_statistics_premium_users(cls): + last_day = datetime.datetime.now() - datetime.timedelta(hours=24) + premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count() cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users) return premium_users @classmethod - def collect_statistics_standard_users(cls, last_day=None): - if not last_day: - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) + def collect_statistics_standard_users(cls): + last_day = datetime.datetime.now() - datetime.timedelta(hours=24) standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count() cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users) @@ -120,9 +114,7 @@ class MStatistics(mongo.Document): return standard_users @classmethod - def collect_statistics_sites_loaded(cls, last_day=None): - if not last_day: - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) + def collect_statistics_sites_loaded(cls): now = datetime.datetime.now() sites_loaded = [] avg_time_taken = [] @@ -130,13 +122,39 @@ class MStatistics(mongo.Document): for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour) end_hours_ago = now - datetime.timedelta(hours=hour+1) - aggregates = dict(count=Count('loadtime'), avg=Avg('loadtime')) - load_times = FeedLoadtime.objects.filter( - date_accessed__lte=start_hours_ago, - date_accessed__gte=end_hours_ago - ).aggregate(**aggregates) - sites_loaded.append(load_times['count'] or 0) - avg_time_taken.append(load_times['avg'] or 0) + + load_times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{ + "$match": { + "date": { + "$gte": end_hours_ago, + "$lte": start_hours_ago, + }, + "path": { + "$in": [ + "/reader/feed/", + "/social/stories/", + "/reader/river_stories/", + "/social/river_stories/", + ] + } + }, + }, { + "$group": { + "_id" : 1, + "count" : {"$sum": 1}, + "avg" : {"$avg": "$duration"}, + }, + }]) + + count = 0 + avg = 0 + if load_times['result']: + count = load_times['result'][0]['count'] + avg = load_times['result'][0]['avg'] + + sites_loaded.append(count) + avg_time_taken.append(avg) + sites_loaded.reverse() avg_time_taken.reverse() @@ -152,9 +170,7 @@ class MStatistics(mongo.Document): cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) @classmethod - def collect_statistics_stories_shared(cls, last_day=None): - if not last_day: - last_day = datetime.datetime.now() - datetime.timedelta(hours=24) + def collect_statistics_stories_shared(cls): now = datetime.datetime.now() stories_shared = [] @@ -182,11 +198,6 @@ class MStatistics(mongo.Document): lag = db_functions.mongo_max_replication_lag(settings.MONGODB) cls.set('mongodb_replication_lag', lag) - @classmethod - def delete_old_stats(cls): - now = datetime.datetime.now() - old_age = now - datetime.timedelta(days=7) - FeedLoadtime.objects.filter(date_accessed__lte=old_age).delete() class MFeedback(mongo.Document): date = mongo.StringField() diff --git a/config/hosts b/config/hosts index a0b8780cf..fd6db5ee4 100644 --- a/config/hosts +++ b/config/hosts @@ -9,7 +9,7 @@ # 199.15.253.226 db03 db03.newsblur.com 199.15.249.98 db04 db04.newsblur.com 199.15.249.99 db05 db05.newsblur.com -# 199.15.249.100 db06 db06.newsblur.com + 199.15.249.101 db07 db07.newsblur.com 199.15.250.231 task01 task01.newsblur.com 199.15.250.250 task02 task02.newsblur.com @@ -18,6 +18,7 @@ 199.15.252.106 task05 task05.newsblur.com 199.15.252.107 task06 task06.newsblur.com 199.15.252.108 task07 task07.newsblur.com - -# EC2 -23.20.165.187 db10 db10.newsblur.com \ No newline at end of file +199.15.251.144 task08 task08.newsblur.com +199.15.251.154 task09 task09.newsblur.com +199.15.251.137 task10 task10.newsblur.com +199.15.251.155 task11 task11.newsblur.com diff --git a/utils/db_functions.py b/utils/db_functions.py index 59ab8e031..ed3030313 100644 --- a/utils/db_functions.py +++ b/utils/db_functions.py @@ -1,8 +1,14 @@ +import pymongo + PRIMARY_STATE = 1 SECONDARY_STATE = 2 def mongo_max_replication_lag(connection): - status = connection.admin.command('replSetGetStatus') + try: + status = connection.admin.command('replSetGetStatus') + except pymongo.errors.OperationFailure: + return 0 + members = status['members'] primary_optime = None oldest_secondary_optime = None diff --git a/utils/munin/newsblur_loadtimes.py b/utils/munin/newsblur_loadtimes.py index 2702a4c3a..9fbb26e9d 100755 --- a/utils/munin/newsblur_loadtimes.py +++ b/utils/munin/newsblur_loadtimes.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from utils.munin.base import MuninGraph - +from django.conf import settings +import datetime class NBMuninGraph(MuninGraph): @@ -17,18 +18,46 @@ class NBMuninGraph(MuninGraph): } def calculate_metrics(self): - from django.db.models import Avg, Min, Max, Count - import datetime - from apps.rss_feeds.models import FeedLoadtime hour_ago = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) - - averages = dict(avg=Avg('loadtime'), max=Max('loadtime'), min=Min('loadtime'), count=Count('loadtime')) - hour = FeedLoadtime.objects.filter(date_accessed__gte=hour_ago).aggregate(**averages) + times = settings.MONGOANALYTICSDB.nbanalytics.page_loads.aggregate([{ + "$match": { + "date": { + "$gte": hour_ago, + }, + "path": { + "$in": [ + "/reader/feed/", + "/social/stories/", + "/reader/river_stories/", + "/social/river_stories/", + ] + } + }, + }, { + "$group": { + "_id" : 1, + "count" : {"$sum": 1}, + "avg" : {"$avg": "$duration"}, + "min" : {"$min": "$duration"}, + "max" : {"$max": "$duration"}, + }, + }]) + + load_avg = 0 + load_min = 0 + load_max = 0 + load_count = 0 + if times['result']: + load_avg = times['result'][0]['avg'] + load_min = times['result'][0]['min'] + load_max = times['result'][0]['max'] + load_count = times['result'][0]['count'] + return { - 'feed_loadtimes_avg_hour': hour['avg'], - 'feed_loadtimes_min_hour': hour['min'], - 'feed_loadtimes_max_hour': hour['max'], - 'feeds_loaded_hour': hour['count'], + 'feed_loadtimes_avg_hour': load_avg, + 'feed_loadtimes_min_hour': load_min, + 'feed_loadtimes_max_hour': load_max, + 'feeds_loaded_hour': load_count, } if __name__ == '__main__':