NewsBlur/apps/statistics/models.py
2022-03-28 16:06:44 -04:00

411 lines
15 KiB
Python

import datetime
import mongoengine as mongo
import urllib.request, urllib.error, urllib.parse
import redis
import dateutil
from django.conf import settings
from apps.social.models import MSharedStory
from apps.profile.models import Profile
from apps.statistics.rstats import RStats, round_time
from utils.story_functions import relative_date
from utils import json_functions as json
from utils import db_functions
from utils import log as logging
class MStatistics(mongo.Document):
key = mongo.StringField(unique=True)
value = mongo.DynamicField()
expiration_date = mongo.DateTimeField()
meta = {
'collection': 'statistics',
'allow_inheritance': False,
'indexes': ['key'],
}
def __str__(self):
return "%s: %s" % (self.key, self.value)
@classmethod
def get(cls, key, default=None):
obj = cls.objects.filter(key=key).first()
if not obj:
return default
if obj.expiration_date and obj.expiration_date < datetime.datetime.now():
obj.delete()
return default
return obj.value
@classmethod
def set(cls, key, value, expiration_sec=None):
try:
obj = cls.objects.get(key=key)
except cls.DoesNotExist:
obj = cls.objects.create(key=key)
obj.value = value
if expiration_sec:
obj.expiration_date = datetime.datetime.now() + datetime.timedelta(seconds=expiration_sec)
obj.save()
@classmethod
def all(cls):
stats = cls.objects.all()
values = dict([(stat.key, stat.value) for stat in stats])
for key, value in list(values.items()):
if key in ('avg_time_taken', 'sites_loaded', 'stories_shared'):
values[key] = json.decode(value)
elif key in ('feeds_fetched', 'premium_users', 'standard_users', 'latest_sites_loaded',
'max_sites_loaded', 'max_stories_shared'):
values[key] = int(value)
elif key in ('latest_avg_time_taken', 'max_avg_time_taken', 'last_1_min_time_taken'):
values[key] = float(value)
values['total_sites_loaded'] = sum(values['sites_loaded']) if 'sites_loaded' in values else 0
values['total_stories_shared'] = sum(values['stories_shared']) if 'stories_shared' in values else 0
return values
@classmethod
def collect_statistics(cls):
now = datetime.datetime.now()
cls.collect_statistics_premium_users()
# if settings.DEBUG:
# print("Premiums: %s" % (datetime.datetime.now() - now))
cls.collect_statistics_standard_users()
# if settings.DEBUG:
# print("Standard users: %s" % (datetime.datetime.now() - now))
cls.collect_statistics_sites_loaded()
# if settings.DEBUG:
# print("Sites loaded: %s" % (datetime.datetime.now() - now))
cls.collect_statistics_stories_shared()
# if settings.DEBUG:
# print("Stories shared: %s" % (datetime.datetime.now() - now))
cls.collect_statistics_for_db()
# if settings.DEBUG:
# print("DB Stats: %s" % (datetime.datetime.now() - now))
cls.collect_statistics_feeds_fetched()
# if settings.DEBUG:
# print("Feeds Fetched: %s" % (datetime.datetime.now() - now))
@classmethod
def collect_statistics_feeds_fetched(cls):
feeds_fetched = RStats.count('feed_fetch', hours=24)
cls.objects(key='feeds_fetched').update_one(upsert=True,
set__key='feeds_fetched',
set__value=feeds_fetched)
return feeds_fetched
@classmethod
def collect_statistics_premium_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
premium_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=True).count()
cls.objects(key='premium_users').update_one(upsert=True, set__key='premium_users', set__value=premium_users)
return premium_users
@classmethod
def collect_statistics_standard_users(cls):
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
standard_users = Profile.objects.filter(last_seen_on__gte=last_day, is_premium=False).count()
cls.objects(key='standard_users').update_one(upsert=True, set__key='standard_users', set__value=standard_users)
return standard_users
@classmethod
def collect_statistics_sites_loaded(cls):
now = round_time(datetime.datetime.now(), round_to=60)
sites_loaded = []
avg_time_taken = []
last_1_min_time_taken = 0
r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL)
for hours_ago in range(24):
start_hours_ago = now - datetime.timedelta(hours=hours_ago+1)
pipe = r.pipeline()
for m in range(60):
minute = start_hours_ago + datetime.timedelta(minutes=m)
key = "%s:%s" % (RStats.stats_type('page_load'), minute.strftime('%s'))
pipe.get("%s:s" % key)
pipe.get("%s:a" % key)
times = pipe.execute()
counts = [int(c) for c in times[::2] if c]
avgs = [float(a) for a in times[1::2] if a]
if hours_ago == 0:
last_1_min_time_taken = round(sum(avgs[:1]) / max(1, sum(counts[:1])), 2)
if counts and avgs:
count = max(1, sum(counts))
avg = round(sum(avgs) / count, 3)
else:
count = 0
avg = 0
sites_loaded.append(count)
avg_time_taken.append(avg)
sites_loaded.reverse()
avg_time_taken.reverse()
values = (
('sites_loaded', json.encode(sites_loaded)),
('avg_time_taken', json.encode(avg_time_taken)),
('latest_sites_loaded', sites_loaded[-1]),
('latest_avg_time_taken', avg_time_taken[-1]),
('max_sites_loaded', max(sites_loaded)),
('max_avg_time_taken', max(1, max(avg_time_taken))),
('last_1_min_time_taken', last_1_min_time_taken),
)
for key, value in values:
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
@classmethod
def collect_statistics_stories_shared(cls):
now = datetime.datetime.now()
stories_shared = []
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour)
end_hours_ago = now - datetime.timedelta(hours=hour+1)
shares = MSharedStory.objects.filter(
shared_date__lte=start_hours_ago,
shared_date__gte=end_hours_ago
).count()
stories_shared.append(shares)
stories_shared.reverse()
values = (
('stories_shared', json.encode(stories_shared)),
('latest_stories_shared', stories_shared[-1]),
('max_stories_shared', max(stories_shared)),
)
for key, value in values:
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
@classmethod
def collect_statistics_for_db(cls, debug=False):
lag = db_functions.mongo_max_replication_lag(settings.MONGODB)
cls.set('mongodb_replication_lag', lag)
now = round_time(datetime.datetime.now(), round_to=60)
r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL)
db_times = {}
latest_db_times = {}
for db in ['sql',
'mongo',
'redis',
'redis_user',
'redis_story',
'redis_session',
'redis_pubsub',
'task_sql',
'task_mongo',
'task_redis',
'task_redis_user',
'task_redis_story',
'task_redis_session',
'task_redis_pubsub',
]:
db_times[db] = []
for hour in range(24):
start_hours_ago = now - datetime.timedelta(hours=hour+1)
pipe = r.pipeline()
for m in range(60):
minute = start_hours_ago + datetime.timedelta(minutes=m)
key = "DB:%s:%s" % (db, minute.strftime('%s'))
if debug:
print(" -> %s:c" % key)
pipe.get("%s:c" % key)
pipe.get("%s:t" % key)
times = pipe.execute()
counts = [int(c or 0) for c in times[::2]]
avgs = [float(a or 0) for a in times[1::2]]
if counts and avgs:
count = sum(counts)
avg = round(sum(avgs) / count, 3) if count else 0
else:
count = 0
avg = 0
if hour == 0:
latest_count = float(counts[-1]) if len(counts) else 0
latest_avg = float(avgs[-1]) if len(avgs) else 0
latest_db_times[db] = latest_avg / latest_count if latest_count else 0
db_times[db].append(avg)
db_times[db].reverse()
values = (
('avg_sql_times', json.encode(db_times['sql'])),
('avg_mongo_times', json.encode(db_times['mongo'])),
('avg_redis_times', json.encode(db_times['redis'])),
('latest_sql_avg', latest_db_times['sql']),
('latest_mongo_avg', latest_db_times['mongo']),
('latest_redis_user_avg', latest_db_times['redis_user']),
('latest_redis_story_avg', latest_db_times['redis_story']),
('latest_redis_session_avg',latest_db_times['redis_session']),
('latest_redis_pubsub_avg', latest_db_times['redis_pubsub']),
('latest_task_sql_avg', latest_db_times['task_sql']),
('latest_task_mongo_avg', latest_db_times['task_mongo']),
('latest_task_redis_user_avg', latest_db_times['task_redis_user']),
('latest_task_redis_story_avg', latest_db_times['task_redis_story']),
('latest_task_redis_session_avg',latest_db_times['task_redis_session']),
('latest_task_redis_pubsub_avg', latest_db_times['task_redis_pubsub']),
)
for key, value in values:
cls.objects(key=key).update_one(upsert=True, set__key=key, set__value=value)
class MFeedback(mongo.Document):
date = mongo.DateTimeField()
date_short = mongo.StringField()
subject = mongo.StringField()
url = mongo.StringField()
style = mongo.StringField()
order = mongo.IntField()
meta = {
'collection': 'feedback',
'allow_inheritance': False,
'indexes': ['style'],
'ordering': ['order'],
}
CATEGORIES = {
5: 'idea',
6: 'problem',
7: 'praise',
8: 'question',
9: 'admin',
10: 'updates',
}
def __str__(self):
return "%s: (%s) %s" % (self.style, self.date, self.subject)
@classmethod
def collect_feedback(cls):
seen_posts = set()
try:
data = urllib.request.urlopen('https://forum.newsblur.com/posts.json').read()
except (urllib.error.HTTPError) as e:
logging.debug(" ***> Failed to collect feedback: %s" % e)
return
data = json.decode(data).get('latest_posts', "")
if not len(data):
print("No data!")
return
cls.objects.delete()
post_count = 0
for post in data:
if post['topic_id'] in seen_posts: continue
seen_posts.add(post['topic_id'])
feedback = {}
feedback['order'] = post_count
post_count += 1
feedback['date'] = dateutil.parser.parse(post['created_at']).replace(tzinfo=None)
feedback['date_short'] = relative_date(feedback['date'])
feedback['subject'] = post['topic_title']
feedback['url'] = "https://forum.newsblur.com/t/%s/%s/%s" % (post['topic_slug'], post['topic_id'], post['post_number'])
feedback['style'] = cls.CATEGORIES[post['category_id']]
cls.objects.create(**feedback)
# if settings.DEBUG:
# print("%s: %s (%s)" % (feedback['style'], feedback['subject'], feedback['date_short']))
if post_count >= 4: break
@classmethod
def all(cls):
feedbacks = cls.objects.all()[:4]
return feedbacks
class MAnalyticsFetcher(mongo.Document):
date = mongo.DateTimeField(default=datetime.datetime.now)
feed_id = mongo.IntField()
feed_fetch = mongo.FloatField()
feed_process = mongo.FloatField()
page = mongo.FloatField()
icon = mongo.FloatField()
total = mongo.FloatField()
server = mongo.StringField()
feed_code = mongo.IntField()
meta = {
'db_alias': 'nbanalytics',
'collection': 'feed_fetches',
'allow_inheritance': False,
'indexes': ['date', 'feed_id', 'server', 'feed_code'],
'ordering': ['date'],
}
def __str__(self):
return "%s: %.4s+%.4s+%.4s+%.4s = %.4ss" % (self.feed_id, self.feed_fetch,
self.feed_process,
self.page,
self.icon,
self.total)
@classmethod
def add(cls, feed_id, feed_fetch, feed_process,
page, icon, total, feed_code):
server_name = settings.SERVER_NAME
if 'app' in server_name: return
if icon and page:
icon -= page
if page and feed_process:
page -= feed_process
elif page and feed_fetch:
page -= feed_fetch
if feed_process and feed_fetch:
feed_process -= feed_fetch
cls.objects.create(feed_id=feed_id, feed_fetch=feed_fetch,
feed_process=feed_process,
page=page, icon=icon, total=total,
server=server_name, feed_code=feed_code)
@classmethod
def calculate_stats(cls, stats):
return cls.aggregate(**stats)
class MAnalyticsLoader(mongo.Document):
date = mongo.DateTimeField(default=datetime.datetime.now)
page_load = mongo.FloatField()
server = mongo.StringField()
meta = {
'db_alias': 'nbanalytics',
'collection': 'page_loads',
'allow_inheritance': False,
'indexes': ['date', 'server'],
'ordering': ['date'],
}
def __str__(self):
return "%s: %.4ss" % (self.server, self.page_load)
@classmethod
def add(cls, page_load):
server_name = settings.SERVER_NAME
cls.objects.create(page_load=page_load, server=server_name)
@classmethod
def calculate_stats(cls, stats):
return cls.aggregate(**stats)