import datetime import mongoengine as mongo import urllib.request, urllib.error, urllib.parse import redis import dateutil from django.conf import settings from apps.social.models import MSharedStory from apps.profile.models import Profile from apps.statistics.rstats import RStats, round_time from utils.story_functions import relative_date from utils import json_functions as json from utils import db_functions from utils import log as logging class MStatistics(mongo.Document): key = mongo.StringField(unique=True) value = mongo.DynamicField() expiration_date = mongo.DateTimeField() meta = { 'collection': 'statistics', 'allow_inheritance': False, 'indexes': ['key'], } def __str__(self): return "%s: %s" % (self.key, self.value) @classmethod def get(cls, key, default=None): obj = cls.objects.filter(key=key).first() if not obj: return default if obj.expiration_date and obj.expiration_date < datetime.datetime.now(): obj.delete() return default return obj.value @classmethod def set(cls, key, value, expiration_sec=None): try: obj = cls.objects.get(key=key) except cls.DoesNotExist: obj = cls.objects.create(key=key) obj.value = value if expiration_sec: obj.expiration_date = datetime.datetime.now() + datetime.timedelta(seconds=expiration_sec) obj.save() @classmethod def all(cls): stats = cls.objects.all() values = dict([(stat.key, stat.value) for stat in stats]) for key, value in list(values.items()): if key in ('avg_time_taken', 'sites_loaded', 'stories_shared'): values[key] = json.decode(value) elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded', 'max_sites_loaded', 'max_stories_shared'): values[key] = int(value) elif key in ('latest_avg_time_taken', 'max_avg_time_taken', 'last_5_min_time_taken'): values[key] = float(value) values['total_sites_loaded'] = sum(values['sites_loaded']) if 'sites_loaded' in values else 0 values['total_stories_shared'] = sum(values['stories_shared']) if 'stories_shared' in values else 0 return values @classmethod def collect_statistics(cls): now = datetime.datetime.now() cls.collect_statistics_premium_users() print("Premiums: %s" % (datetime.datetime.now() - now)) cls.collect_statistics_standard_users() print("Standard users: %s" % (datetime.datetime.now() - now)) cls.collect_statistics_sites_loaded() print("Sites loaded: %s" % (datetime.datetime.now() - now)) cls.collect_statistics_stories_shared() print("Stories shared: %s" % (datetime.datetime.now() - now)) cls.collect_statistics_for_db() print("DB Stats: %s" % (datetime.datetime.now() - now)) cls.collect_statistics_feeds_fetched() print("Feeds Fetched: %s" % (datetime.datetime.now() - now)) @classmethod def collect_statistics_feeds_fetched(cls): feeds_fetched = RStats.count('feed_fetch', hours=24) cls.objects(key='feeds_fetched').update_one(upsert=True, set__key='feeds_fetched', set__value=feeds_fetched) return feeds_fetched @classmethod def collect_statistics_premium_users(cls): last_day = datetime.datetime.now() - datetime.timedelta(hours=24) premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count() cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users) return premium_users @classmethod def collect_statistics_standard_users(cls): last_day = datetime.datetime.now() - datetime.timedelta(hours=24) standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count() cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users) return standard_users @classmethod def collect_statistics_sites_loaded(cls): now = round_time(datetime.datetime.now(), round_to=60) sites_loaded = [] avg_time_taken = [] last_5_min_time_taken = 0 r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour+1) pipe = r.pipeline() for m in range(60): minute = start_hours_ago + datetime.timedelta(minutes=m) key = "%s:%s" % (RStats.stats_type('page_load'), minute.strftime('%s')) pipe.get("%s:s" % key) pipe.get("%s:a" % key) times = pipe.execute() counts = [int(c) for c in times[::2] if c] avgs = [float(a) for a in times[1::2] if a] if hour == 0: last_5_min_time_taken = round(sum(avgs[:1]) / max(1, sum(counts[:1])), 2) if counts and avgs: count = max(1, sum(counts)) avg = round(sum(avgs) / count, 3) else: count = 0 avg = 0 sites_loaded.append(count) avg_time_taken.append(avg) sites_loaded.reverse() avg_time_taken.reverse() values = ( ('sites_loaded', json.encode(sites_loaded)), ('avg_time_taken', json.encode(avg_time_taken)), ('latest_sites_loaded', sites_loaded[-1]), ('latest_avg_time_taken', avg_time_taken[-1]), ('max_sites_loaded', max(sites_loaded)), ('max_avg_time_taken', max(1, max(avg_time_taken))), ('last_5_min_time_taken', last_5_min_time_taken), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) @classmethod def collect_statistics_stories_shared(cls): now = datetime.datetime.now() stories_shared = [] for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour) end_hours_ago = now - datetime.timedelta(hours=hour+1) shares = MSharedStory.objects.filter( shared_date__lte=start_hours_ago, shared_date__gte=end_hours_ago ).count() stories_shared.append(shares) stories_shared.reverse() values = ( ('stories_shared', json.encode(stories_shared)), ('latest_stories_shared', stories_shared[-1]), ('max_stories_shared', max(stories_shared)), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) @classmethod def collect_statistics_for_db(cls, debug=False): lag = db_functions.mongo_max_replication_lag(settings.MONGODB) cls.set('mongodb_replication_lag', lag) now = round_time(datetime.datetime.now(), round_to=60) r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) db_times = {} latest_db_times = {} for db in ['sql', 'mongo', 'redis', 'task_sql', 'task_mongo', 'task_redis']: db_times[db] = [] for hour in range(24): start_hours_ago = now - datetime.timedelta(hours=hour+1) pipe = r.pipeline() for m in range(60): minute = start_hours_ago + datetime.timedelta(minutes=m) key = "DB:%s:%s" % (db, minute.strftime('%s')) if debug: print(" -> %s:c" % key) pipe.get("%s:c" % key) pipe.get("%s:t" % key) times = pipe.execute() counts = [int(c or 0) for c in times[::2]] avgs = [float(a or 0) for a in times[1::2]] if counts and avgs: count = sum(counts) avg = round(sum(avgs) / count, 3) if count else 0 else: count = 0 avg = 0 if hour == 0: latest_count = float(counts[-1]) if len(counts) else 0 latest_avg = float(avgs[-1]) if len(avgs) else 0 latest_db_times[db] = latest_avg / latest_count if latest_count else 0 db_times[db].append(avg) db_times[db].reverse() values = ( ('avg_sql_times', json.encode(db_times['sql'])), ('avg_mongo_times', json.encode(db_times['mongo'])), ('avg_redis_times', json.encode(db_times['redis'])), ('latest_sql_avg', latest_db_times['sql']), ('latest_mongo_avg', latest_db_times['mongo']), ('latest_redis_avg', latest_db_times['redis']), ('latest_task_sql_avg', latest_db_times['task_sql']), ('latest_task_mongo_avg', latest_db_times['task_mongo']), ('latest_task_redis_avg', latest_db_times['task_redis']), ) for key, value in values: cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value) class MFeedback(mongo.Document): date = mongo.DateTimeField() date_short = mongo.StringField() subject = mongo.StringField() url = mongo.StringField() style = mongo.StringField() order = mongo.IntField() meta = { 'collection': 'feedback', 'allow_inheritance': False, 'indexes': ['style'], 'ordering': ['order'], } CATEGORIES = { 5: 'idea', 6: 'problem', 7: 'praise', 8: 'question', } def __str__(self): return "%s: (%s) %s" % (self.style, self.date, self.subject) @classmethod def collect_feedback(cls): seen_posts = set() try: data = urllib.request.urlopen('https://forum.newsblur.com/posts.json').read() except (urllib.error.HTTPError) as e: logging.debug(" ***> Failed to collect feedback: %s" % e) return data = json.decode(data).get('latest_posts', "") if not len(data): print("No data!") return cls.objects.delete() post_count = 0 for post in data: if post['topic_id'] in seen_posts: continue seen_posts.add(post['topic_id']) feedback = {} feedback['order'] = post_count post_count += 1 feedback['date'] = dateutil.parser.parse(post['created_at']).replace(tzinfo=None) feedback['date_short'] = relative_date(feedback['date']) feedback['subject'] = post['topic_title'] feedback['url'] = "https://forum.newsblur.com/t/%s/%s/%s" % (post['topic_slug'], post['topic_id'], post['post_number']) feedback['style'] = cls.CATEGORIES[post['category_id']] cls.objects.create(**feedback) print("%s: %s (%s)" % (feedback['style'], feedback['subject'], feedback['date_short'])) if post_count >= 4: break @classmethod def all(cls): feedbacks = cls.objects.all()[:4] return feedbacks class MAnalyticsFetcher(mongo.Document): date = mongo.DateTimeField(default=datetime.datetime.now) feed_id = mongo.IntField() feed_fetch = mongo.FloatField() feed_process = mongo.FloatField() page = mongo.FloatField() icon = mongo.FloatField() total = mongo.FloatField() server = mongo.StringField() feed_code = mongo.IntField() meta = { 'db_alias': 'nbanalytics', 'collection': 'feed_fetches', 'allow_inheritance': False, 'indexes': ['date', 'feed_id', 'server', 'feed_code'], 'ordering': ['date'], } def __str__(self): return "%s: %.4s+%.4s+%.4s+%.4s = %.4ss" % (self.feed_id, self.feed_fetch, self.feed_process, self.page, self.icon, self.total) @classmethod def add(cls, feed_id, feed_fetch, feed_process, page, icon, total, feed_code): server_name = settings.SERVER_NAME if 'app' in server_name: return if icon and page: icon -= page if page and feed_process: page -= feed_process elif page and feed_fetch: page -= feed_fetch if feed_process and feed_fetch: feed_process -= feed_fetch cls.objects.create(feed_id=feed_id, feed_fetch=feed_fetch, feed_process=feed_process, page=page, icon=icon, total=total, server=server_name, feed_code=feed_code) @classmethod def calculate_stats(cls, stats): return cls.aggregate(**stats) class MAnalyticsLoader(mongo.Document): date = mongo.DateTimeField(default=datetime.datetime.now) page_load = mongo.FloatField() server = mongo.StringField() meta = { 'db_alias': 'nbanalytics', 'collection': 'page_loads', 'allow_inheritance': False, 'indexes': ['date', 'server'], 'ordering': ['date'], } def __str__(self): return "%s: %.4ss" % (self.server, self.page_load) @classmethod def add(cls, page_load): server_name = settings.SERVER_NAME cls.objects.create(page_load=page_load, server=server_name) @classmethod def calculate_stats(cls, stats): return cls.aggregate(**stats)