Adding new analytics collections to keep track of page loads and feed fetches. Need to start aggregating before I can tell how useful this data will be.

This commit is contained in:
Samuel Clay 2012-09-06 17:16:01 -07:00
parent ff017caec2
commit c07d7e7578
8 changed files with 219 additions and 38 deletions

View file

@ -557,7 +557,7 @@ class MUserStory(mongo.Document):
read_date = mongo.DateTimeField()
story_id = mongo.StringField(unique_with=('user_id', 'feed_id'))
story_date = mongo.DateTimeField()
story = mongo.ReferenceField(MStory)
story = mongo.ReferenceField(MStory, dbref=True)
found_story = mongo.GenericReferenceField()
meta = {

View file

@ -228,4 +228,127 @@ class MFeedback(mongo.Document):
def all(cls):
feedbacks = cls.objects.all()[:4]
return feedbacks
return feedbacks
class MAnalyticsPageLoad(mongo.Document):
date = mongo.DateTimeField(default=datetime.datetime.now)
username = mongo.StringField()
user_id = mongo.IntField()
is_premium = mongo.BooleanField()
platform = mongo.StringField()
path = mongo.StringField()
duration = mongo.FloatField()
meta = {
'db_alias': 'nbanalytics',
'collection': 'page_loads',
'allow_inheritance': False,
'indexes': ['path', 'date', 'platform', 'user_id'],
'ordering': ['date'],
}
def __unicode__(self):
return "%s / %s: (%.4s) %s" % (self.username, self.platform, self.duration, self.path)
@classmethod
def add(cls, user, is_premium, platform, path, duration):
if user.is_anonymous():
username = None
user_id = 0
else:
username = user.username
user_id = user.pk
path = cls.clean_path(path)
cls.objects.create(username=username, user_id=user_id, is_premium=is_premium,
platform=platform, path=path, duration=duration)
@classmethod
def clean_path(cls, path):
if path and path.startswith('/reader/feed/'):
path = '/reader/feed/'
return path
@classmethod
def fetch_stats(cls, stat_key, stat_value):
stats = cls.objects.filter(**{stat_key: stat_value})
return cls.calculate_stats(stats)
@classmethod
def calculate_stats(cls, stats):
return cls.aggregate(**stats)
@classmethod
def clean(cls, days=1):
last_day = datetime.datetime.now() - datetime.timedelta(days=days)
from utils.feed_functions import timelimit, TimeoutError
@timelimit(60)
def delete_old_history():
cls.objects(date__lte=last_day).delete()
cls.objects(date__lte=last_day).delete()
try:
delete_old_history()
except TimeoutError:
print "Timed out on deleting old history. Shit."
class MAnalyticsFetcher(mongo.Document):
date = mongo.DateTimeField(default=datetime.datetime.now)
feed_id = mongo.IntField()
feed_fetch_duration = mongo.FloatField()
feed_process_duration = mongo.FloatField()
page_duration = mongo.FloatField()
icon_duration = mongo.FloatField()
total_duration = mongo.FloatField()
meta = {
'db_alias': 'nbanalytics',
'collection': 'feed_fetches',
'allow_inheritance': False,
'indexes': ['date', 'feed_id'],
'ordering': ['date'],
}
def __unicode__(self):
return "%s: %.4s+%.4s+%.4s+%.4s = %.4ss" % (self.feed_id, self.feed_fetch_duration,
self.feed_process_duration,
self.page_duration,
self.icon_duration,
self.total_duration)
@classmethod
def add(cls, feed_id, feed_fetch_duration, feed_process_duration,
page_duration, icon_duration, total_duration):
if icon_duration and page_duration:
icon_duration -= page_duration
if page_duration and feed_process_duration:
page_duration -= feed_process_duration
if feed_process_duration and feed_fetch_duration:
feed_process_duration -= feed_fetch_duration
cls.objects.create(feed_id=feed_id, feed_fetch_duration=feed_fetch_duration,
feed_process_duration=feed_process_duration,
page_duration=page_duration, icon_duration=icon_duration,
total_duration=total_duration)
@classmethod
def calculate_stats(cls, stats):
return cls.aggregate(**stats)
@classmethod
def clean(cls, days=1):
last_day = datetime.datetime.now() - datetime.timedelta(days=days)
from utils.feed_functions import timelimit, TimeoutError
@timelimit(60)
def delete_old_history():
cls.objects(date__lte=last_day).delete()
cls.objects(date__lte=last_day).delete()
try:
delete_old_history()
except TimeoutError:
print "Timed out on deleting old history. Shit."

View file

@ -7666,9 +7666,13 @@ form.opml_import_form input {
/* = iPhone Page = */
/* =============== */
.NB-static-iphone {
.NB-static-iphone .NB-ios-mockup,
.NB-static-iphone .NB-ios-main {
-webkit-transform : translate3d(0, 0, 0);
}
.NB-static-iphone .NB-splash-info {
z-index: 2;
}
.NB-static-iphone .NB-ios-main {
margin: 24px 36px 24px 0;
text-align: center;
@ -7834,6 +7838,7 @@ form.opml_import_form input {
width: 554px;
height: 630px;
position: relative;
z-index: 1;
}
.NB-static-iphone .NB-ios-mockup .NB-ios-iphone-skeleton {
position: absolute;

View file

@ -15,8 +15,8 @@
// #define BACKGROUND_REFRESH_SECONDS -5
#define BACKGROUND_REFRESH_SECONDS -10*60
#define NEWSBLUR_URL [NSString stringWithFormat:@"nb.local.com"]
// #define NEWSBLUR_URL [NSString stringWithFormat:@"www.newsblur.com"]
// #define NEWSBLUR_URL [NSString stringWithFormat:@"nb.local.com"]
#define NEWSBLUR_URL [NSString stringWithFormat:@"www.newsblur.com"]
#define NEWSBLUR_LINK_COLOR 0x405BA8
#define NEWSBLUR_HIGHLIGHT_COLOR 0xd2e6fd

View file

@ -352,6 +352,19 @@ CELERYBEAT_SCHEDULE = {
},
}
# =========
# = Mongo =
# =========
MONGO_DB = {
'host': '127.0.0.1:27017',
'name': 'newsblur',
}
MONGO_ANALYTICS_DB = {
'host': '127.0.0.1:27017',
'name': 'nbanalytics',
}
# ====================
# = Database Routers =
# ====================
@ -427,10 +440,19 @@ DEBUG_TOOLBAR_CONFIG = {
MONGO_DB_DEFAULTS = {
'name': 'newsblur',
'host': 'db02:27017',
'alias': 'default',
}
MONGO_DB = dict(MONGO_DB_DEFAULTS, **MONGO_DB)
MONGODB = connect(MONGO_DB.pop('name'), **MONGO_DB)
MONGO_ANALYTICS_DB_DEFAULTS = {
'name': 'nbanalytics',
'host': 'db02:27017',
'alias': 'nbanalytics',
}
MONGO_ANALYTICS_DB = dict(MONGO_ANALYTICS_DB_DEFAULTS, **MONGO_ANALYTICS_DB)
MONGOANALYTICSDB = connect(MONGO_ANALYTICS_DB.pop('name'), **MONGO_ANALYTICS_DB)
# =========
# = Redis =
# =========

View file

@ -14,6 +14,7 @@ from apps.rss_feeds.models import Feed, MStory
from apps.rss_feeds.page_importer import PageImporter
from apps.rss_feeds.icon_importer import IconImporter
from apps.push.models import PushSubscription
from apps.statistics.models import MAnalyticsFetcher
from utils import feedparser
from utils.story_functions import pre_process_story
from utils import log as logging
@ -303,10 +304,17 @@ class Dispatcher:
current_process = multiprocessing.current_process()
identity = "X"
feed = None
if current_process._identity:
identity = current_process._identity[0]
for feed_id in feed_queue:
start_duration = time.time()
feed_fetch_duration = None
feed_process_duration = None
page_duration = None
icon_duration = None
ret_entries = {
ENTRY_NEW: 0,
ENTRY_UPDATED: 0,
@ -339,14 +347,16 @@ class Dispatcher:
feed.num_subscribers,
rand, quick))
continue
ffeed = FetchFeed(feed_id, self.options)
ret_feed, fetched_feed = ffeed.fetch()
feed_fetch_duration = time.time() - start_duration
if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
ret_feed, ret_entries = pfeed.process()
feed = pfeed.feed
feed_process_duration = time.time() - start_duration
if ret_entries.get(ENTRY_NEW) or self.options['force']:
start = time.time()
@ -402,6 +412,7 @@ class Dispatcher:
page_importer = PageImporter(feed)
try:
page_data = page_importer.fetch_page()
page_duration = time.time() - start_duration
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~FRPage fetch timed out...' % (feed.title[:30]))
page_data = None
@ -421,6 +432,7 @@ class Dispatcher:
icon_importer = IconImporter(feed, page_data=page_data, force=self.options['force'])
try:
icon_importer.save()
icon_duration = time.time() - start_duration
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~FRIcon fetch timed out...' % (feed.title[:30]))
feed.save_page_history(556, 'Timeout', '')
@ -451,6 +463,11 @@ class Dispatcher:
identity, feed.feed_title[:30], delta,
feed.pk, self.feed_trans[ret_feed],))
logging.debug(done_msg)
total_duration = time.time() - start_duration
MAnalyticsFetcher.add(feed_id=feed.pk, feed_fetch_duration=feed_fetch_duration,
feed_process_duration=feed_process_duration,
page_duration=page_duration, icon_duration=icon_duration,
total_duration=total_duration)
self.feed_stats[ret_feed] += 1
for key, val in ret_entries.items():

View file

@ -4,6 +4,7 @@ import string
import time
from django.core.handlers.wsgi import WSGIRequest
from django.conf import settings
from utils.user_functions import extract_user_agent
class NullHandler(logging.Handler): #exists in python 3.1
def emit(self, record):
@ -14,41 +15,14 @@ def getlogger():
return logger
def user(u, msg, request=None):
from apps.statistics.models import MAnalyticsPageLoad
platform = '------'
time_elapsed = ""
if isinstance(u, WSGIRequest) or request:
if not request:
request = u
u = request.user
user_agent = request.environ.get('HTTP_USER_AGENT', '')
if 'iPad App' in user_agent:
platform = 'iPad'
elif 'iPhone App' in user_agent:
platform = 'iPhone'
elif 'Blar' in user_agent:
platform = 'Blar'
elif 'Android' in user_agent:
platform = 'Androd'
elif 'MSIE' in user_agent:
platform = 'IE'
if 'MSIE 9' in user_agent:
platform += '9'
elif 'MSIE 10' in user_agent:
platform += '10'
elif 'MSIE 8' in user_agent:
platform += '8'
elif 'Chrome' in user_agent:
platform = 'Chrome'
elif 'Safari' in user_agent:
platform = 'Safari'
elif 'MeeGo' in user_agent:
platform = 'MeeGo'
elif 'Firefox' in user_agent:
platform = 'FF'
elif 'Opera' in user_agent:
platform = 'Opera'
elif 'WP7' in user_agent:
platform = 'WP7'
platform = extract_user_agent(request)
if hasattr(request, 'start_time'):
seconds = time.time() - request.start_time
@ -56,9 +30,13 @@ def user(u, msg, request=None):
'~FB' if seconds < .5 else '~FR',
seconds,
)
premium = '*' if u.is_authenticated() and u.profile.is_premium else ''
username = cipher(unicode(u)) if settings.CIPHER_USERNAMES else u
is_premium = u.is_authenticated() and u.profile.is_premium
premium = '*' if is_premium else ''
username = cipher(unicode(u)) if settings.CIPHER_USERNAMES else unicode(u)
info(' ---> [~FB~SN%-6s~SB] %s[%s%s] %s' % (platform, time_elapsed, username, premium, msg))
if request:
MAnalyticsPageLoad.add(user=u, is_premium=is_premium, platform=platform, path=request.path,
duration=seconds)
def cipher(msg):
shift = len(msg)

View file

@ -70,4 +70,40 @@ def invalidate_template_cache(fragment_name, *variables):
def generate_secret_token(phrase, size=12):
"""Generate a (SHA1) security hash from the provided info."""
info = (phrase, settings.SECRET_KEY)
return sha_constructor("".join(info)).hexdigest()[:size]
return sha_constructor("".join(info)).hexdigest()[:size]
def extract_user_agent(request):
user_agent = request.environ.get('HTTP_USER_AGENT', '')
platform = '------'
if 'iPad App' in user_agent:
platform = 'iPad'
elif 'iPhone App' in user_agent:
platform = 'iPhone'
elif 'Blar' in user_agent:
platform = 'Blar'
elif 'Android' in user_agent:
platform = 'Androd'
elif 'MSIE' in user_agent:
platform = 'IE'
if 'MSIE 9' in user_agent:
platform += '9'
elif 'MSIE 10' in user_agent:
platform += '10'
elif 'MSIE 8' in user_agent:
platform += '8'
elif 'Chrome' in user_agent:
platform = 'Chrome'
elif 'Safari' in user_agent:
platform = 'Safari'
elif 'MeeGo' in user_agent:
platform = 'MeeGo'
elif 'Firefox' in user_agent:
platform = 'FF'
elif 'Opera' in user_agent:
platform = 'Opera'
elif 'WP7' in user_agent:
platform = 'WP7'
elif 'WP8' in user_agent:
platform = 'WP8'
return platform