Changing feed log format to include id at the beginning. Also normalizing all feed titles in logs for better searchability.

This commit is contained in:
Samuel Clay 2017-03-31 19:52:24 -07:00
parent 9079af45d3
commit 461c1c4b65
6 changed files with 97 additions and 92 deletions

View file

@ -179,8 +179,8 @@ class EmailNewsletter:
r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
listeners_count = r.publish("%s:story" % feed.pk, 'story:new:%s' % story_hash)
if listeners_count:
logging.debug(" ---> [%-30s] ~FMPublished to %s subscribers" % (feed.title[:30], listeners_count))
logging.debug(" ---> [%-30s] ~FMPublished to %s subscribers" % (feed.log_title[:30], listeners_count))
except redis.ConnectionError:
logging.debug(" ***> [%-30s] ~BMRedis is unavailable for real-time." % (feed.title[:30],))
logging.debug(" ***> [%-30s] ~BMRedis is unavailable for real-time." % (feed.log_title[:30],))

View file

@ -449,7 +449,7 @@ class Profile(models.Model):
if verbose:
feed = Feed.get_by_id(feed_id)
logging.debug(" ---> [%-30s] ~SN~FBCounting subscribers for feed:~SB~FM%s~SN~FB user:~SB~FM%s" % (feed.title[:30], feed_id, user_id))
logging.debug(" ---> [%-30s] ~SN~FBCounting subscribers for feed:~SB~FM%s~SN~FB user:~SB~FM%s" % (feed.log_title[:30], feed_id, user_id))
if feed_id:
feed_ids = [feed_id]
@ -511,7 +511,7 @@ class Profile(models.Model):
r.expire(premium_key, settings.SUBSCRIBER_EXPIRE*24*60*60)
logging.info(" ---> [%-30s] ~SN~FBCounting subscribers, storing in ~SBredis~SN: ~FMt:~SB~FM%s~SN a:~SB%s~SN p:~SB%s~SN ap:~SB%s" %
(feed.title[:30], total, active, premium, active_premium))
(feed.log_title[:30], total, active, premium, active_premium))
@classmethod
def count_all_feed_subscribers_for_user(self, user):

View file

@ -67,8 +67,8 @@ class IconImporter(object):
self.feed_icon.icon_url != icon_url or
self.feed_icon.not_found or
(settings.BACKED_BY_AWS.get('icons_on_s3') and not self.feed.s3_icon))):
logging.debug(" ---> [%-30s] ~SN~FBIcon difference:~FY color:%s (%s/%s) data:%s url:%s notfound:%s no-s3:%s" % (
self.feed,
logging.debug(" ---> [%-30s] ~SN~FBIcon difference:~FY color:%s (%s/%s) data:%s url:%s notfound:%s no-s3:%s" % (
self.feed.log_title[:30],
self.feed_icon.color != color, self.feed_icon.color, color,
self.feed_icon.data != image_str,
self.feed_icon.icon_url != icon_url,

View file

@ -95,13 +95,14 @@ class Feed(models.Model):
if not self.feed_title:
self.feed_title = "[Untitled]"
self.save()
return "%s (%s - %s/%s/%s)%s" % (
self.feed_title,
return "%s%s: %s - %s/%s/%s" % (
self.pk,
(" [B: %s]" % self.branch_from_feed.pk if self.branch_from_feed else ""),
self.feed_title,
self.num_subscribers,
self.active_subscribers,
self.active_premium_subscribers,
(" [B: %s]" % self.branch_from_feed.pk if self.branch_from_feed else ""))
)
@property
def title(self):
@ -110,6 +111,10 @@ class Feed(models.Model):
title = "%s*" % title[:29]
return title
@property
def log_title(self):
return self.__unicode__()
@property
def permalink(self):
return "%s/site/%s/%s" % (settings.NEWSBLUR_URL, self.pk, slugify(self.feed_title.lower()[:50]))
@ -246,7 +251,7 @@ class Feed(models.Model):
if not duplicate_feeds:
# Feed has been deleted. Just ignore it.
logging.debug(" ***> Changed to: %s - %s: %s" % (self.feed_address, self.feed_link, duplicate_feeds))
logging.debug(' ***> [%-30s] Feed deleted (%s).' % (unicode(self)[:30], self.pk))
logging.debug(' ***> [%-30s] Feed deleted (%s).' % (self.log_title[:30], self.pk))
return
for duplicate_feed in duplicate_feeds:
@ -602,7 +607,7 @@ class Feed(models.Model):
try:
feed_address, feed = _1()
except TimeoutError, e:
logging.debug(' ---> [%-30s] Feed address check timed out...' % (unicode(self)[:30]))
logging.debug(' ---> [%-30s] Feed address check timed out...' % (self.log_title[:30]))
self.save_feed_history(505, 'Timeout', e)
feed = self
feed_address = None
@ -667,12 +672,12 @@ class Feed(models.Model):
self.save()
logging.debug(' ---> [%-30s] ~FBCounting any errors in history: %s (%s non errors)' %
(unicode(self)[:30], len(errors), len(non_errors)))
(self.log_title[:30], len(errors), len(non_errors)))
return errors, non_errors
def count_redirects_in_history(self, fetch_type='feed', fetch_history=None):
logging.debug(' ---> [%-30s] Counting redirects in history...' % (unicode(self)[:30]))
logging.debug(' ---> [%-30s] Counting redirects in history...' % (self.log_title[:30]))
if not fetch_history:
fetch_history = MFetchHistory.feed(self.pk)
fh = fetch_history[fetch_type+'_fetch_history']
@ -703,7 +708,7 @@ class Feed(models.Model):
return True
elif last_recount:
logging.info(" ---> [%-30s] ~SN~FBFeed has expired redis subscriber counts (%s < %s), clearing..." % (
unicode(self)[:30], last_recount, subscriber_expire))
self.log_title[:30], last_recount, subscriber_expire))
r.delete(total_key, -1)
r.delete(premium_key, -1)
@ -753,7 +758,7 @@ class Feed(models.Model):
original_premium_subscribers = self.premium_subscribers
original_active_premium_subscribers = self.active_premium_subscribers
logging.info(" ---> [%-30s] ~SN~FBCounting subscribers from ~FCredis~FB: ~FMt:~SB~FM%s~SN a:~SB%s~SN p:~SB%s~SN ap:~SB%s ~SN~FC%s" %
(self.title[:30], total, active, premium, active_premium, "(%s branches)" % (len(feed_ids)-1) if len(feed_ids)>1 else ""))
(self.log_title[:30], total, active, premium, active_premium, "(%s branches)" % (len(feed_ids)-1) if len(feed_ids)>1 else ""))
else:
from apps.reader.models import UserSubscription
@ -786,7 +791,7 @@ class Feed(models.Model):
original_active_premium_subscribers = self.active_premium_subscribers
active_premium = active_premium_subscribers.count()
logging.debug(" ---> [%-30s] ~SN~FBCounting subscribers from ~FYpostgres~FB: ~FMt:~SB~FM%s~SN a:~SB%s~SN p:~SB%s~SN ap:~SB%s" %
(self.title[:30], total, active, premium, active_premium))
(self.log_title[:30], total, active, premium, active_premium))
# If any counts have changed, save them
self.num_subscribers = total
@ -1148,7 +1153,7 @@ class Feed(models.Model):
if settings.DEBUG or verbose:
logging.debug(" ---> [%-30s] ~FBChecking ~SB%s~SN new/updated against ~SB%s~SN stories" % (
self.title[:30],
self.log_title[:30],
len(stories),
len(existing_stories.keys())))
@timelimit(2)
@ -1160,7 +1165,7 @@ class Feed(models.Model):
for story in stories:
if verbose:
logging.debug(" ---> [%-30s] ~FBChecking ~SB%s~SN / ~SB%s" % (
self.title[:30],
self.log_title[:30],
story.get('title'),
story.get('guid')))
if not story.get('title'):
@ -1179,7 +1184,7 @@ class Feed(models.Model):
existing_story, story_has_changed = _1(story, story_content,
existing_stories, new_story_hashes)
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~SB~FRExisting story check timed out...' % (unicode(self)[:30]))
logging.debug(' ---> [%-30s] ~SB~FRExisting story check timed out...' % (self.log_title[:30]))
existing_story = None
story_has_changed = False
@ -1424,9 +1429,9 @@ class Feed(models.Model):
original_cutoff = cutoff
cutoff = min(cutoff, 10)
try:
logging.debug(" ---> [%-30s] ~FBTrimming down to ~SB%s (instead of %s)~SN stories (~FM%s~FB)" % (self, cutoff, original_cutoff, self.last_story_date.strftime("%Y-%m-%d") if self.last_story_date else "No last story date"))
logging.debug(" ---> [%-30s] ~FBTrimming down to ~SB%s (instead of %s)~SN stories (~FM%s~FB)" % (self.log_title[:30], cutoff, original_cutoff, self.last_story_date.strftime("%Y-%m-%d") if self.last_story_date else "No last story date"))
except ValueError, e:
logging.debug(" ***> [%-30s] Error trimming: %s" % (self, e))
logging.debug(" ***> [%-30s] Error trimming: %s" % (self.log_title[:30], e))
pass
return cutoff
@ -2035,7 +2040,7 @@ class Feed(models.Model):
if verbose:
logging.debug(" ---> [%-30s] Fetched every %s min - Subs: %s/%s/%s Stories/day: %s" % (
unicode(self)[:30], total,
self.log_title[:30], total,
self.num_subscribers,
self.active_subscribers,
self.active_premium_subscribers,
@ -2053,7 +2058,7 @@ class Feed(models.Model):
if verbose:
logging.debug(' ---> [%-30s] ~FBScheduling feed fetch geometrically: '
'~SB%s errors. Time: %s min' % (
unicode(self)[:30], self.errors_since_good, total))
self.log_title[:30], self.errors_since_good, total))
random_factor = random.randint(0, total) / 4
next_scheduled_update = datetime.datetime.utcnow() + datetime.timedelta(
@ -2085,11 +2090,11 @@ class Feed(models.Model):
def schedule_feed_fetch_immediately(self, verbose=True):
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
if not self.num_subscribers:
logging.debug(' ---> [%-30s] Not scheduling feed fetch immediately, no subs.' % (unicode(self)[:30]))
logging.debug(' ---> [%-30s] Not scheduling feed fetch immediately, no subs.' % (self.log_title[:30]))
return
if verbose:
logging.debug(' ---> [%-30s] Scheduling feed fetch immediately...' % (unicode(self)[:30]))
logging.debug(' ---> [%-30s] Scheduling feed fetch immediately...' % (self.log_title[:30]))
self.next_scheduled_update = datetime.datetime.utcnow()
r.zadd('scheduled_updates', self.pk, self.next_scheduled_update.strftime('%s'))
@ -2116,7 +2121,7 @@ class Feed(models.Model):
if queue_size > 1000:
self.schedule_feed_fetch_immediately()
else:
logging.debug(' ---> [%-30s] [%s] ~FB~SBQueuing pushed stories, last pushed %s...' % (unicode(self)[:30], self.pk, latest_push_date_delta))
logging.debug(' ---> [%-30s] [%s] ~FB~SBQueuing pushed stories, last pushed %s...' % (self.log_title[:30], self.pk, latest_push_date_delta))
self.set_next_scheduled_update()
PushFeeds.apply_async(args=(self.pk, xml), queue='push_feeds')
@ -2575,7 +2580,7 @@ class MStory(mongo.Document):
r.delete('zF:%s' % story_feed_id)
# r2.delete('zF:%s' % story_feed_id)
logging.info(" ---> [%-30s] ~FMSyncing ~SB%s~SN stories to redis" % (feed and feed.title[:30] or story_feed_id, stories.count()))
logging.info(" ---> [%-30s] ~FMSyncing ~SB%s~SN stories to redis" % (feed and feed.log_title[:30] or story_feed_id, stories.count()))
p = r.pipeline()
# p2 = r2.pipeline()
for story in stories:

View file

@ -63,7 +63,7 @@ class PageImporter(object):
try:
self.fetch_page_timeout(urllib_fallback=urllib_fallback, requests_exception=requests_exception)
except TimeoutError:
logging.user(self.request, ' ***> [%-30s] ~FBPage fetch ~SN~FRfailed~FB due to timeout' % (self.feed))
logging.user(self.request, ' ***> [%-30s] ~FBPage fetch ~SN~FRfailed~FB due to timeout' % (self.feed.log_title[:30]))
@timelimit(10)
def fetch_page_timeout(self, urllib_fallback=False, requests_exception=None):
@ -95,7 +95,7 @@ class PageImporter(object):
except requests.exceptions.TooManyRedirects:
response = requests.get(feed_link)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError), e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e))
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
self.save_no_page()
return
# try:
@ -127,14 +127,14 @@ class PageImporter(object):
fp = feedparser.parse(self.feed.feed_address)
feed_link = fp.feed.get('link', "")
self.feed.save()
logging.debug(' ***> [%-30s] Page fetch failed: %s' % (self.feed, e))
logging.debug(' ***> [%-30s] Page fetch failed: %s' % (self.feed.log_title[:30], e))
except (urllib2.HTTPError), e:
self.feed.save_page_history(e.code, e.msg, e.fp.read())
except (httplib.IncompleteRead), e:
self.feed.save_page_history(500, "IncompleteRead", e)
except (requests.exceptions.RequestException,
requests.packages.urllib3.exceptions.HTTPError), e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e))
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
# mail_feed_error_to_admin(self.feed, e, local_vars=locals())
return self.fetch_page(urllib_fallback=True, requests_exception=e)
except Exception, e:
@ -183,7 +183,7 @@ class PageImporter(object):
except requests.exceptions.TooManyRedirects:
response = requests.get(story_permalink)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError), e:
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed, e))
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
return
try:
data = response.text
@ -213,7 +213,7 @@ class PageImporter(object):
def save_no_page(self):
logging.debug(' ---> [%-30s] ~FYNo original page: %s' % (self.feed, self.feed.feed_link))
logging.debug(' ---> [%-30s] ~FYNo original page: %s' % (self.feed.log_title[:30], self.feed.feed_link))
self.feed.has_page = False
self.feed.save()
self.feed.save_page_history(404, "Feed has no original page.")
@ -275,7 +275,7 @@ class PageImporter(object):
feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
# feed_page.page_data = html.encode('utf-8')
if feed_page.page() == html:
logging.debug(' ---> [%-30s] ~FYNo change in page data: %s' % (self.feed.title[:30], self.feed.feed_link))
logging.debug(' ---> [%-30s] ~FYNo change in page data: %s' % (self.feed.log_title[:30], self.feed.feed_link))
else:
feed_page.page_data = html
feed_page.save()
@ -307,7 +307,7 @@ class PageImporter(object):
try:
feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
feed_page.delete()
logging.debug(' ---> [%-30s] ~FYTransfering page data to S3...' % (self.feed))
logging.debug(' ---> [%-30s] ~FYTransfering page data to S3...' % (self.feed.log_title[:30]))
except MFeedPage.DoesNotExist:
pass

View file

@ -58,7 +58,7 @@ class FetchFeed:
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
self.feed.title[:30],
self.feed.log_title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
@ -73,19 +73,19 @@ class FetchFeed:
etag = None
address = qurl(address, add={"_": random.randint(0, 10000)})
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.title[:30], address))
self.feed.log_title[:30], address))
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
self.feed.title[:30], len(self.options.get('feed_xml'))))
self.feed.log_title[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
self.feed.title[:30]))
self.feed.log_title[:30]))
return FEED_OK, self.fpf
if 'youtube.com' in address:
@ -95,7 +95,7 @@ class FetchFeed:
youtube_feed = None
if not youtube_feed:
logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' %
(self.feed.title[:30], address))
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(youtube_feed)
elif re.match('(https?)?://twitter.com/\w+/?$', qurl(address, remove=['_'])):
@ -103,11 +103,11 @@ class FetchFeed:
twitter_feed = self.fetch_twitter(address)
# except Exception, e:
# logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %e' %
# (self.feed.title[:30], address, e))
# (self.feed.log_title[:30], address, e))
# twitter_feed = None
if not twitter_feed:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' %
(self.feed.title[:30], address))
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(twitter_feed)
@ -129,7 +129,7 @@ class FetchFeed:
headers['A-IM'] = 'feed'
raw_feed = requests.get(address, headers=headers)
if raw_feed.status_code >= 400:
logging.debug(" ---> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.title[:30], raw_feed.status_code, raw_feed.headers))
logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers))
raw_feed = requests.get(address, headers=self.feed.fetch_headers(fake=True))
if raw_feed.content and raw_feed.status_code < 400:
response_headers = raw_feed.headers
@ -137,9 +137,9 @@ class FetchFeed:
self.fpf = feedparser.parse(smart_unicode(raw_feed.content),
response_headers=response_headers)
if self.options.get('debug', False):
logging.debug(" ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.title[:30], raw_feed.status_code, len(smart_unicode(raw_feed.content)), raw_feed.headers))
logging.debug(" ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.log_title[:30], raw_feed.status_code, len(smart_unicode(raw_feed.content)), raw_feed.headers))
except Exception, e:
logging.debug(" ---> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.title[:30], unicode(e)[:100]))
logging.debug(" ***> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.log_title[:30], unicode(e)[:100]))
if not self.fpf or self.options.get('force_fp', False):
try:
@ -149,21 +149,21 @@ class FetchFeed:
modified=modified)
except (TypeError, ValueError, KeyError, EOFError, MemoryError), e:
logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' %
(self.feed.title[:30], e))
(self.feed.log_title[:30], e))
pass
if not self.fpf:
try:
logging.debug(u' ***> [%-30s] ~FRTurning off headers...' %
(self.feed.title[:30]))
(self.feed.log_title[:30]))
self.fpf = feedparser.parse(address, agent=self.feed.user_agent)
except (TypeError, ValueError, KeyError, EOFError, MemoryError), e:
logging.debug(u' ***> [%-30s] ~FRFetch failed: %s.' %
(self.feed.title[:30], e))
(self.feed.log_title[:30], e))
return FEED_ERRHTTP, None
logging.debug(u' ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' % (
self.feed.title[:30], time.time() - start))
self.feed.log_title[:30], time.time() - start))
return FEED_OK, self.fpf
@ -327,13 +327,13 @@ class FetchFeed:
twitter_api = social_services.twitter_api()
except tweepy.error.TweepError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
if not usersubs:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions' %
(self.feed.title[:30], address))
(self.feed.log_title[:30], address))
return
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
@ -343,19 +343,19 @@ class FetchFeed:
break
except tweepy.error.TweepError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
continue
if not twitter_api:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No twitter API for %s' %
(self.feed.title[:30], address, usersubs[0].user.username))
(self.feed.log_title[:30], address, usersubs[0].user.username))
return
try:
twitter_user = twitter_api.get_user(username)
except TypeError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
except tweepy.error.TweepError, e:
@ -364,22 +364,22 @@ class FetchFeed:
('temporarily locked' in message)):
# Suspended
logging.debug(u' ***> [%-30s] ~FRTwitter failed, user suspended, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
elif 'suspended' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
elif 'not found' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
elif 'over capacity' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
return
else:
raise
@ -389,12 +389,12 @@ class FetchFeed:
except tweepy.error.TweepError, e:
if 'Not authorized' in e.args[0]:
logging.debug(u' ***> [%-30s] ~FRTwitter timeline failed, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
elif 'User not found' in e.args[0]:
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
(self.feed.title[:30], address, e))
(self.feed.log_title[:30], address, e))
social_services.disconnect_twitter()
return
else:
@ -500,7 +500,7 @@ class ProcessFeed:
if self.options['verbose']:
if self.fpf.bozo and self.fpf.status != 304:
logging.debug(u' ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (
self.feed.title[:30],
self.feed.log_title[:30],
self.fpf.bozo_exception,
len(self.fpf.entries)))
@ -523,14 +523,14 @@ class ProcessFeed:
self.feed.feed_address = address
if not self.feed.known_good:
self.feed.fetched_once = True
logging.debug(" ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status))
logging.debug(" ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.log_title[:30], self.fpf.status))
self.feed = self.feed.schedule_feed_fetch_immediately()
if not self.fpf.entries:
self.feed = self.feed.save()
self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
return FEED_ERRHTTP, ret_values
if self.fpf.status >= 400:
logging.debug(" ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status))
logging.debug(" ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.log_title[:30], self.fpf.status))
fixed_feed = None
if not self.feed.known_good:
fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
@ -542,13 +542,13 @@ class ProcessFeed:
return FEED_ERRHTTP, ret_values
if not self.fpf:
logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30]))
logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.log_title[:30]))
self.feed.save_feed_history(551, "Broken feed")
return FEED_ERRHTTP, ret_values
if self.fpf and not self.fpf.entries:
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.log_title[:30], len(self.fpf.entries)))
fixed_feed = None
if not self.feed.known_good:
fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
@ -559,7 +559,7 @@ class ProcessFeed:
self.feed = self.feed.save()
return FEED_ERRPARSE, ret_values
elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
logging.debug(" ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
logging.debug(" ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.log_title[:30], len(self.fpf.entries)))
fixed_feed = None
if not self.feed.known_good:
fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
@ -613,7 +613,7 @@ class ProcessFeed:
if self.options['force'] and new_feed_link:
new_feed_link = qurl(new_feed_link, remove=['_'])
if new_feed_link != self.feed.feed_link:
logging.debug(" ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.title[:30], self.feed.feed_link, new_feed_link))
logging.debug(" ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.log_title[:30], self.feed.feed_link, new_feed_link))
redirects, non_redirects = self.feed.count_redirects_in_history('page')
self.feed.save_page_history(301, "HTTP Redirect (%s to go)" % (10-len(redirects)))
if len(redirects) >= 10 or len(non_redirects) == 0:
@ -646,14 +646,14 @@ class ProcessFeed:
new_story_guid = unicode(story.get('published'))
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % (
self.feed.title[:30],
self.feed.log_title[:30],
story.get('guid'), new_story_guid))
story['guid'] = new_story_guid
else:
new_story_guid = Feed.get_permalink(story)
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % (
self.feed.title[:30],
self.feed.log_title[:30],
story.get('guid'), new_story_guid))
story['guid'] = new_story_guid
story['story_hash'] = MStory.feed_guid_hash_unsaved(self.feed.pk, story.get('guid'))
@ -690,17 +690,17 @@ class ProcessFeed:
self.feed.active_subscribers > 0 and
(push_expired or not self.feed.is_push or self.options.get('force'))):
logging.debug(u' ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % (
self.feed.title[:30],
self.feed.log_title[:30],
"~SKRe-~SN" if push_expired else "", hub_url))
try:
PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url)
except TimeoutError:
logging.debug(u' ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % (
self.feed.title[:30], hub_url))
self.feed.log_title[:30], hub_url))
elif (self.feed.is_push and
(self.feed.active_subscribers <= 0 or not hub_url)):
logging.debug(u' ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % (
self.feed.title[:30]))
self.feed.log_title[:30]))
self.feed.is_push = False
self.feed = self.feed.save()
@ -710,7 +710,7 @@ class ProcessFeed:
# All Done
logging.debug(u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
self.feed.title[:30],
self.feed.log_title[:30],
'~FG~SB' if ret_values['new'] else '', ret_values['new'],
'~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
'~SB' if ret_values['same'] else '', ret_values['same'],
@ -724,7 +724,7 @@ class ProcessFeed:
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
self.feed.title[:30], time.time() - start))
self.feed.log_title[:30], time.time() - start))
return FEED_OK, ret_values
@ -796,7 +796,7 @@ class Dispatcher:
rand = "-"
if skip:
logging.debug(' ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...' % (
feed.title[:30],
feed.log_title[:30],
weight,
feed.num_subscribers,
rand, quick))
@ -819,17 +819,17 @@ class Dispatcher:
feed.fetched_once = True
feed = feed.save()
if self.options['force'] or random.random() <= 0.02:
logging.debug(' ---> [%-30s] ~FBPerforming feed cleanup...' % (feed.title[:30],))
logging.debug(' ---> [%-30s] ~FBPerforming feed cleanup...' % (feed.log_title[:30],))
start_cleanup = time.time()
feed.sync_redis()
logging.debug(' ---> [%-30s] ~FBDone with feed cleanup. Took ~SB%.4s~SN sec.' % (feed.title[:30], time.time() - start_cleanup))
logging.debug(' ---> [%-30s] ~FBDone with feed cleanup. Took ~SB%.4s~SN sec.' % (feed.log_title[:30], time.time() - start_cleanup))
try:
self.count_unreads_for_subscribers(feed)
except TimeoutError:
logging.debug(' ---> [%-30s] Unread count took too long...' % (feed.title[:30],))
logging.debug(' ---> [%-30s] Unread count took too long...' % (feed.log_title[:30],))
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
feed.title[:30], time.time() - start))
feed.log_title[:30], time.time() - start))
except urllib2.HTTPError, e:
logging.debug(' ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read()))
feed_code = e.code
@ -845,7 +845,7 @@ class Dispatcher:
feed_code = 559
feed.save_feed_history(feed_code, 'Timeout', e)
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~FRFeed fetch timed out...' % (feed.title[:30]))
logging.debug(' ---> [%-30s] ~FRFeed fetch timed out...' % (feed.log_title[:30]))
feed_code = 505
feed.save_feed_history(feed_code, 'Timeout', e)
fetched_feed = None
@ -889,7 +889,7 @@ class Dispatcher:
(ret_feed == FEED_OK or
(ret_feed == FEED_SAME and feed.stories_last_month > 10)))):
logging.debug(u' ---> [%-30s] ~FYFetching page: %s' % (feed.title[:30], feed.feed_link))
logging.debug(u' ---> [%-30s] ~FYFetching page: %s' % (feed.log_title[:30], feed.feed_link))
page_importer = PageImporter(feed)
try:
page_data = page_importer.fetch_page()
@ -899,7 +899,7 @@ class Dispatcher:
page_data = None
feed.save_feed_history(557, 'Timeout', e)
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~FRPage fetch timed out...' % (feed.title[:30]))
logging.debug(' ---> [%-30s] ~FRPage fetch timed out...' % (feed.log_title[:30]))
page_data = None
feed.save_page_history(555, 'Timeout', '')
except Exception, e:
@ -916,7 +916,7 @@ class Dispatcher:
settings.RAVEN_CLIENT.captureException()
feed = self.refresh_feed(feed.pk)
logging.debug(u' ---> [%-30s] ~FYFetching icon: %s' % (feed.title[:30], feed.feed_link))
logging.debug(u' ---> [%-30s] ~FYFetching icon: %s' % (feed.log_title[:30], feed.feed_link))
force = self.options['force']
if random.random() > .99:
force = True
@ -928,7 +928,7 @@ class Dispatcher:
logging.debug(" ---> [%-30s] ~BR~FWTime limit hit!~SB~FR Moving on to next feed..." % feed)
feed.save_feed_history(558, 'Timeout', e)
except TimeoutError, e:
logging.debug(' ---> [%-30s] ~FRIcon fetch timed out...' % (feed.title[:30]))
logging.debug(' ---> [%-30s] ~FRIcon fetch timed out...' % (feed.log_title[:30]))
feed.save_page_history(556, 'Timeout', '')
except Exception, e:
logging.debug('[%d] ! -------------------------' % (feed_id,))
@ -941,7 +941,7 @@ class Dispatcher:
settings.RAVEN_CLIENT):
settings.RAVEN_CLIENT.captureException()
else:
logging.debug(u' ---> [%-30s] ~FBSkipping page fetch: (%s on %s stories) %s' % (feed.title[:30], self.feed_trans[ret_feed], feed.stories_last_month, '' if feed.has_page else ' [HAS NO PAGE]'))
logging.debug(u' ---> [%-30s] ~FBSkipping page fetch: (%s on %s stories) %s' % (feed.log_title[:30], self.feed_trans[ret_feed], feed.stories_last_month, '' if feed.has_page else ' [HAS NO PAGE]'))
feed = self.refresh_feed(feed.pk)
delta = time.time() - start_time
@ -951,13 +951,13 @@ class Dispatcher:
try:
feed = feed.save(update_fields=['last_load_time', 'fetched_once'])
except IntegrityError:
logging.debug(" ---> [%-30s] ~FRIntegrityError on feed: %s" % (feed.title[:30], feed.feed_address,))
logging.debug(" ***> [%-30s] ~FRIntegrityError on feed: %s" % (feed.log_title[:30], feed.feed_address,))
if ret_entries and ret_entries['new']:
self.publish_to_subscribers(feed, ret_entries['new'])
done_msg = (u'%2s ---> [%-30s] ~FYProcessed in ~FM~SB%.4ss~FY~SN (~FB%s~FY) [%s]' % (
identity, feed.title[:30], delta,
identity, feed.log_title[:30], delta,
feed.pk, self.feed_trans[ret_feed],))
logging.debug(done_msg)
total_duration = time.time() - start_duration
@ -978,9 +978,9 @@ class Dispatcher:
r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
listeners_count = r.publish(str(feed.pk), 'story:new_count:%s' % new_count)
if listeners_count:
logging.debug(" ---> [%-30s] ~FMPublished to %s subscribers" % (feed.title[:30], listeners_count))
logging.debug(" ---> [%-30s] ~FMPublished to %s subscribers" % (feed.log_title[:30], listeners_count))
except redis.ConnectionError:
logging.debug(" ***> [%-30s] ~BMRedis is unavailable for real-time." % (feed.title[:30],))
logging.debug(" ***> [%-30s] ~BMRedis is unavailable for real-time." % (feed.log_title[:30],))
def count_unreads_for_subscribers(self, feed):
user_subs = UserSubscription.objects.filter(feed=feed,
@ -1010,15 +1010,15 @@ class Dispatcher:
.read_preference(pymongo.ReadPreference.PRIMARY)
missing_stories = Feed.format_stories(missing_stories, feed.pk)
stories = missing_stories + stories
logging.debug(u' ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores' % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories)))
logging.debug(u' ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores' % (feed.log_title[:30], len(missing_stories), len(missing_story_hashes), len(stories)))
cache.set("S:%s" % feed.pk, stories, 60)
logging.debug(u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
feed.title[:30], len(stories), user_subs.count(),
feed.log_title[:30], len(stories), user_subs.count(),
feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
self.calculate_feed_scores_with_stories(user_subs, stories)
elif self.options.get('mongodb_replication_lag'):
logging.debug(u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
feed.title[:30], self.options.get('mongodb_replication_lag')))
feed.log_title[:30], self.options.get('mongodb_replication_lag')))
@timelimit(10)
def calculate_feed_scores_with_stories(self, user_subs, stories):