New experimental data collector for debugging feeds over time.

This commit is contained in:
Samuel Clay 2017-04-12 19:13:33 -07:00
parent 557438008b
commit d84e2af636
2 changed files with 30 additions and 7 deletions

View file

@ -614,10 +614,11 @@ class Feed(models.Model):
return bool(feed_address), feed
def save_feed_history(self, status_code, message, exception=None):
def save_feed_history(self, status_code, message, exception=None, date=None):
fetch_history = MFetchHistory.add(feed_id=self.pk,
fetch_type='feed',
code=int(status_code),
date=date,
message=message,
exception=exception)
@ -631,10 +632,11 @@ class Feed(models.Model):
self.active = True
self.save()
def save_page_history(self, status_code, message, exception=None):
def save_page_history(self, status_code, message, exception=None, date=None):
fetch_history = MFetchHistory.add(feed_id=self.pk,
fetch_type='page',
code=int(status_code),
date=date,
message=message,
exception=exception)
@ -645,6 +647,13 @@ class Feed(models.Model):
self.has_page = True
self.active = True
self.save()
def save_raw_feed(self, raw_feed, fetch_date):
MFetchHistory.add(feed_id=self.pk,
fetch_type='raw_feed',
code=200,
message=raw_feed,
date=fetch_date)
def count_errors_in_history(self, exception_type='feed', status_code=None, fetch_history=None):
if not fetch_history:
@ -3016,6 +3025,7 @@ class MFetchHistory(mongo.Document):
feed_fetch_history = mongo.DynamicField()
page_fetch_history = mongo.DynamicField()
push_history = mongo.DynamicField()
raw_feed_history = mongo.DynamicField()
meta = {
'db_alias': 'nbanalytics',
@ -3063,11 +3073,15 @@ class MFetchHistory(mongo.Document):
history = fetch_history.page_fetch_history or []
elif fetch_type == 'push':
history = fetch_history.push_history or []
elif fetch_type == 'raw_feed':
history = fetch_history.raw_feed_history or []
history = [[date, code, message]] + history
any_exceptions = any([c for d, c, m in history if c not in [200, 304]])
if any_exceptions:
history = history[:25]
elif fetch_type == 'raw_feed':
history = history[:10]
else:
history = history[:5]
@ -3077,6 +3091,8 @@ class MFetchHistory(mongo.Document):
fetch_history.page_fetch_history = history
elif fetch_type == 'push':
fetch_history.push_history = history
elif fetch_type == 'raw_feed':
fetch_history.raw_feed_history = history
fetch_history.save()

View file

@ -22,7 +22,7 @@ from apps.rss_feeds.icon_importer import IconImporter
from apps.notifications.tasks import QueueNotifications, MUserFeedNotification
from apps.push.models import PushSubscription
from apps.social.models import MSocialServices
from apps.statistics.models import MAnalyticsFetcher
from apps.statistics.models import MAnalyticsFetcher, MStatistics
from utils import feedparser
from utils.story_functions import pre_process_story, strip_tags, linkify
from utils import log as logging
@ -49,6 +49,7 @@ class FetchFeed:
self.feed = Feed.get_by_id(feed_id)
self.options = options
self.fpf = None
self.raw_feed = None
@timelimit(30)
def fetch(self):
@ -134,7 +135,8 @@ class FetchFeed:
if raw_feed.content and raw_feed.status_code < 400:
response_headers = raw_feed.headers
response_headers['Content-Location'] = raw_feed.url
self.fpf = feedparser.parse(smart_unicode(raw_feed.content),
self.raw_feed = smart_unicode(raw_feed.content)
self.fpf = feedparser.parse(self.raw_feed,
response_headers=response_headers)
if self.options.get('debug', False):
logging.debug(" ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.log_title[:30], raw_feed.status_code, len(smart_unicode(raw_feed.content)), raw_feed.headers))
@ -486,10 +488,11 @@ class FetchFeed:
return rss.writeString('utf-8')
class ProcessFeed:
def __init__(self, feed_id, fpf, options):
def __init__(self, feed_id, fpf, options, raw_feed=None):
self.feed_id = feed_id
self.options = options
self.fpf = fpf
self.raw_feed = raw_feed
def refresh_feed(self):
self.feed = Feed.get_by_id(self.feed_id)
@ -726,10 +729,13 @@ class ProcessFeed:
'~FR~SB' if ret_values['error'] else '', ret_values['error'],
len(self.fpf.entries)))
self.feed.update_all_statistics(has_new_stories=bool(ret_values['new']), force=self.options['force'])
fetch_date = datetime.datetime.now()
if ret_values['new']:
self.feed.trim_feed()
self.feed.expire_redis()
self.feed.save_feed_history(200, "OK")
if MStatistics.get('raw_feed', None) == self.feed.pk:
self.feed.save_raw_feed(self.raw_feed, fetch_date)
self.feed.save_feed_history(200, "OK", date=fetch_date)
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
@ -814,9 +820,10 @@ class Dispatcher:
ffeed = FetchFeed(feed_id, self.options)
ret_feed, fetched_feed = ffeed.fetch()
feed_fetch_duration = time.time() - start_duration
raw_feed = ffeed.raw_feed
if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
pfeed = ProcessFeed(feed_id, fetched_feed, self.options, raw_feed=raw_feed)
ret_feed, ret_entries = pfeed.process()
feed = pfeed.feed
feed_process_duration = time.time() - start_duration