NewsBlur/apps/statistics/rstats.py

134 lines
4.9 KiB
Python

import redis
import datetime
import re
from collections import defaultdict
from django.conf import settings
class RStats:
STATS_TYPE = {
'page_load': 'PLT',
'feed_fetch': 'FFH',
}
@classmethod
def stats_type(cls, name):
return cls.STATS_TYPE[name]
@classmethod
def add(cls, name, duration=None):
r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL)
pipe = r.pipeline()
minute = round_time(round_to=60)
key = "%s:%s" % (cls.stats_type(name), minute.strftime('%s'))
pipe.incr("%s:s" % key)
if duration:
pipe.incrbyfloat("%s:a" % key, duration)
pipe.expireat("%s:a" % key, (minute + datetime.timedelta(days=2)).strftime("%s"))
pipe.expireat("%s:s" % key, (minute + datetime.timedelta(days=2)).strftime("%s"))
pipe.execute()
@classmethod
def clean_path(cls, path):
if not path:
return
if path.startswith('/reader/feed/'):
path = '/reader/feed/'
elif path.startswith('/social/stories'):
path = '/social/stories/'
elif path.startswith('/reader/river_stories'):
path = '/reader/river_stories/'
elif path.startswith('/social/river_stories'):
path = '/social/river_stories/'
elif path.startswith('/reader/page/'):
path = '/reader/page/'
elif path.startswith('/api/check_share_on_site'):
path = '/api/check_share_on_site/'
return path
@classmethod
def count(cls, name, hours=24):
r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL)
stats_type = cls.stats_type(name)
now = datetime.datetime.now()
pipe = r.pipeline()
for minutes_ago in range(60*hours):
dt_min_ago = now - datetime.timedelta(minutes=minutes_ago)
minute = round_time(dt=dt_min_ago, round_to=60)
key = "%s:%s" % (stats_type, minute.strftime('%s'))
pipe.get("%s:s" % key)
values = pipe.execute()
total = sum(int(v) for v in values if v)
return total
@classmethod
def sample(cls, sample=1000, pool=None):
if not pool:
pool = settings.REDIS_STORY_HASH_POOL
r = redis.Redis(connection_pool=pool)
keys = set()
errors = set()
prefixes = defaultdict(set)
sizes = defaultdict(int)
prefixes_ttls = defaultdict(lambda: defaultdict(int))
prefix_re = re.compile(r"(\w+):(.*)")
p = r.pipeline()
[p.randomkey() for _ in range(sample)]
keys = set(p.execute())
p = r.pipeline()
[p.ttl(key) for key in keys]
ttls = p.execute()
dump = [r.execute_command('dump', key) for key in keys]
for k, key in enumerate(keys):
match = prefix_re.match(key)
if not match or dump[k] is None:
errors.add(key)
continue
prefix, rest = match.groups()
prefixes[prefix].add(rest)
sizes[prefix] += len(dump[k])
ttl = ttls[k]
if ttl < 0: # Never expire
prefixes_ttls[prefix]['-'] += 1
elif ttl == 0:
prefixes_ttls[prefix]['X'] += 1
elif ttl < 60*60: # 1 hour
prefixes_ttls[prefix]['1h'] += 1
elif ttl < 60*60*24:
prefixes_ttls[prefix]['1d'] += 1
elif ttl < 60*60*24*7:
prefixes_ttls[prefix]['1w'] += 1
elif ttl < 60*60*24*14:
prefixes_ttls[prefix]['2w'] += 1
elif ttl < 60*60*24*30:
prefixes_ttls[prefix]['4w'] += 1
else:
prefixes_ttls[prefix]['4w+'] += 1
keys_count = len(keys)
total_size = float(sum([k for k in sizes.values()]))
print " ---> %s total keys" % keys_count
for prefix, rest in prefixes.items():
total_expiring = sum([k for p, k in dict(prefixes_ttls[prefix]).items() if p != "-"])
print " ---> %4s: (%.4s%% keys - %.4s%% space) %s keys (%s expiring: %s)" % (prefix, 100. * (len(rest) / float(keys_count)), 100 * (sizes[prefix] / total_size), len(rest), total_expiring, dict(prefixes_ttls[prefix]))
print " ---> %s errors: %s" % (len(errors), errors)
def round_time(dt=None, round_to=60):
"""Round a datetime object to any time laps in seconds
dt : datetime.datetime object, default now.
round_to : Closest number of seconds to round to, default 1 minute.
Author: Thierry Husson 2012 - Use it as you want but don't blame me.
"""
if dt == None : dt = datetime.datetime.now()
seconds = (dt - dt.min).seconds
rounding = (seconds+round_to/2) // round_to * round_to
return dt + datetime.timedelta(0,rounding-seconds,-dt.microsecond)