import redis import datetime import re from collections import defaultdict from django.conf import settings class RStats: STATS_TYPE = { 'page_load': 'PLT', 'feed_fetch': 'FFH', } @classmethod def stats_type(cls, name): return cls.STATS_TYPE[name] @classmethod def add(cls, name, duration=None): r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) pipe = r.pipeline() minute = round_time(round_to=60) key = "%s:%s" % (cls.stats_type(name), minute.strftime('%s')) pipe.incr("%s:s" % key) if duration: pipe.incrbyfloat("%s:a" % key, duration) pipe.expireat("%s:a" % key, (minute + datetime.timedelta(days=2)).strftime("%s")) pipe.expireat("%s:s" % key, (minute + datetime.timedelta(days=2)).strftime("%s")) pipe.execute() @classmethod def clean_path(cls, path): if not path: return if path.startswith('/reader/feed/'): path = '/reader/feed/' elif path.startswith('/social/stories'): path = '/social/stories/' elif path.startswith('/reader/river_stories'): path = '/reader/river_stories/' elif path.startswith('/social/river_stories'): path = '/social/river_stories/' elif path.startswith('/reader/page/'): path = '/reader/page/' elif path.startswith('/api/check_share_on_site'): path = '/api/check_share_on_site/' return path @classmethod def count(cls, name, hours=24): r = redis.Redis(connection_pool=settings.REDIS_STATISTICS_POOL) stats_type = cls.stats_type(name) now = datetime.datetime.now() pipe = r.pipeline() for minutes_ago in range(60*hours): dt_min_ago = now - datetime.timedelta(minutes=minutes_ago) minute = round_time(dt=dt_min_ago, round_to=60) key = "%s:%s" % (stats_type, minute.strftime('%s')) pipe.get("%s:s" % key) values = pipe.execute() total = sum(int(v) for v in values if v) return total @classmethod def sample(cls, sample=1000, pool=None): if not pool: pool = settings.REDIS_STORY_HASH_POOL r = redis.Redis(connection_pool=pool) p = r.pipeline() keys = set() prefixes = defaultdict(set) prefixes_ttls = defaultdict(lambda: defaultdict(int)) prefix_re = re.compile(r"(\w+):(.*)") [p.randomkey() for _ in range(sample)] keys = set(p.execute()) p = r.pipeline() [p.ttl(key) for key in keys] ttls = p.execute() for k, key in enumerate(keys): prefix, rest = prefix_re.match(key).groups() prefixes[prefix].add(rest) ttl = ttls[k] if ttl < 60*60: # 1 hour prefixes_ttls[prefix]['1'] += 1 elif ttl < 60*60*12: prefixes_ttls[prefix]['12'] += 1 elif ttl < 60*60*24: prefixes_ttls[prefix]['24'] += 1 elif ttl < 60*60*168: prefixes_ttls[prefix]['168'] += 1 elif ttl < 60*60*336: prefixes_ttls[prefix]['336'] += 1 else: prefixes_ttls[prefix]['1000'] += 1 keys_count = len(keys) print " ---> %s total keys" % keys_count for prefix, rest in prefixes.items(): print " ---> %4s: (%.4s%%) %s keys (%s)" % (prefix, 100. * (len(rest) / float(keys_count)), len(rest), dict(prefixes_ttls[prefix])) def round_time(dt=None, round_to=60): """Round a datetime object to any time laps in seconds dt : datetime.datetime object, default now. round_to : Closest number of seconds to round to, default 1 minute. Author: Thierry Husson 2012 - Use it as you want but don't blame me. """ if dt == None : dt = datetime.datetime.now() seconds = (dt - dt.min).seconds rounding = (seconds+round_to/2) // round_to * round_to return dt + datetime.timedelta(0,rounding-seconds,-dt.microsecond)