Combing through all updates to feeds table when fetching feed.

This commit is contained in:
Samuel Clay 2015-07-22 13:53:20 -07:00
parent da26ecdb46
commit 9cd8124419
3 changed files with 59 additions and 25 deletions

View file

@ -494,9 +494,10 @@ class Feed(models.Model):
if not last_story_date or seconds_timesince(last_story_date) < 0:
last_story_date = datetime.datetime.now()
self.last_story_date = last_story_date
self.save()
if last_story_date != self.last_story_date:
self.last_story_date = last_story_date
self.save(update_fields=['last_story_date'])
@classmethod
def setup_feeds_for_premium_subscribers(cls, feed_ids):
@ -645,6 +646,7 @@ class Feed(models.Model):
feed_ids = list(set(feed_ids))
subs = UserSubscription.objects.filter(feed__in=feed_ids)
original_num_subscribers = self.num_subscribers
self.num_subscribers = subs.count()
active_subs = UserSubscription.objects.filter(
@ -652,6 +654,7 @@ class Feed(models.Model):
active=True,
user__profile__last_seen_on__gte=SUBSCRIBER_EXPIRE
)
original_active_subs = self.active_subscribers
self.active_subscribers = active_subs.count()
premium_subs = UserSubscription.objects.filter(
@ -659,6 +662,7 @@ class Feed(models.Model):
active=True,
user__profile__is_premium=True
)
original_premium_subscribers = self.premium_subscribers
self.premium_subscribers = premium_subs.count()
active_premium_subscribers = UserSubscription.objects.filter(
@ -667,9 +671,15 @@ class Feed(models.Model):
user__profile__is_premium=True,
user__profile__last_seen_on__gte=SUBSCRIBER_EXPIRE
)
original_active_premium_subscribers = self.active_premium_subscribers
self.active_premium_subscribers = active_premium_subscribers.count()
self.save()
if (self.num_subscribers != original_num_subscribers or
self.active_subscribers != original_active_subs or
self.premium_subscribers != original_premium_subscribers or
self.active_premium_subscribers != original_active_premium_subscribers):
self.save(update_fields=['num_subscribers', 'active_subscribers',
'premium_subscribers', 'active_premium_subscribers'])
if verbose:
if self.num_subscribers <= 1:
@ -753,9 +763,9 @@ class Feed(models.Model):
month_ago = datetime.datetime.utcnow() - datetime.timedelta(days=30)
stories_last_month = MStory.objects(story_feed_id=self.pk,
story_date__gte=month_ago).count()
self.stories_last_month = stories_last_month
self.save()
if self.stories_last_month != stories_last_month:
self.stories_last_month = stories_last_month
self.save(update_fields=['stories_last_month'])
if verbose:
print " ---> %s [%s]: %s stories last month" % (self.feed_title, self.pk,
@ -828,13 +838,18 @@ class Feed(models.Model):
months.append((key, dates.get(key, 0)))
total += dates.get(key, 0)
month_count += 1
original_story_count_history = self.data.story_count_history
self.data.story_count_history = json.encode(months)
self.data.save()
if self.data.story_count_history != original_story_count_history:
self.data.save(update_fields=['story_count_history'])
original_average_stories_per_month = self.average_stories_per_month
if not total or not month_count:
self.average_stories_per_month = 0
else:
self.average_stories_per_month = int(round(total / float(month_count)))
self.save()
if self.average_stories_per_month != original_average_stories_per_month:
self.save(update_fields=['average_stories_per_month'])
def save_classifier_counts(self):
@ -892,11 +907,14 @@ class Feed(models.Model):
original_feed_id = int(self.pk)
if getattr(settings, 'TEST_DEBUG', False):
original_feed_address = self.feed_address
original_feed_link = self.feed_link
self.feed_address = self.feed_address.replace("%(NEWSBLUR_DIR)s", settings.NEWSBLUR_DIR)
if self.feed_link:
self.feed_link = self.feed_link.replace("%(NEWSBLUR_DIR)s", settings.NEWSBLUR_DIR)
self.save(update_fields=['feed_address', 'feed_link'])
if self.feed_address != original_feed_address or self.feed_link != original_feed_link:
self.save(update_fields=['feed_address', 'feed_link'])
options = {
'verbose': kwargs.get('verbose'),
'timeout': 10,
@ -1139,8 +1157,9 @@ class Feed(models.Model):
# popular tags the size of a small planet. I'm looking at you
# Tumblr writers.
if len(popular_tags) < 1024:
self.data.popular_tags = popular_tags
self.data.save()
if self.data.popular_tags != popular_tags:
self.data.popular_tags = popular_tags
self.data.save(update_fields=['popular_tags'])
return
tags_list = []
@ -1160,8 +1179,9 @@ class Feed(models.Model):
popular_authors = json.encode(feed_authors)
if len(popular_authors) < 1023:
self.data.popular_authors = popular_authors
self.data.save()
if self.data.popular_authors != popular_authors:
self.data.popular_authors = popular_authors
self.data.save(update_fields=['popular_authors'])
return
if len(feed_authors) > 1:

View file

@ -244,16 +244,29 @@ def exception_retry(request):
raise Http404
feed.schedule_feed_fetch_immediately()
feed.has_page_exception = False
feed.has_feed_exception = False
feed.active = True
changed = False
if feed.has_page_exception:
changed = True
feed.has_page_exception = False
if feed.has_feed_exception:
changed = True
feed.has_feed_exception = False
if not feed.active:
changed = True
feed.active = True
if changed:
feed.save(update_fields=['has_page_exception', 'has_feed_exception', 'active'])
original_fetched_once = feed.fetched_once
if reset_fetch:
logging.user(request, "~FRRefreshing exception feed: ~SB%s" % (feed))
feed.fetched_once = False
else:
logging.user(request, "~FRForcing refreshing feed: ~SB%s" % (feed))
feed.fetched_once = True
feed.save()
if feed.fetched_once != original_fetched_once:
feed.save(update_fields=['fetched_once'])
feed = feed.update(force=True, compute_scores=False, verbose=True)
feed = Feed.get_by_id(feed.pk)

View file

@ -386,7 +386,7 @@ class ProcessFeed:
self.feed.data.feed_tagline = utf8encode(tagline)
if self.feed.data.feed_tagline != original_tagline:
self.feed.data.save(update_fields=['feed_tagline'])
if not self.feed.feed_link_locked:
new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
if new_feed_link != self.feed.feed_link:
@ -442,7 +442,7 @@ class ProcessFeed:
# story_date__gte=start_date,
# story_feed_id=self.feed.pk
))
ret_values = self.feed.add_update_stories(stories, existing_stories,
verbose=self.options['verbose'],
updates_off=self.options['updates_off'])
@ -479,7 +479,7 @@ class ProcessFeed:
self.feed.title[:30]))
self.feed.is_push = False
self.feed = self.feed.save()
logging.debug(u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
self.feed.title[:30],
'~FG~SB' if ret_values['new'] else '', ret_values['new'],
@ -492,7 +492,7 @@ class ProcessFeed:
self.feed.trim_feed()
self.feed.expire_redis()
self.feed.save_feed_history(200, "OK")
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
self.feed.title[:30], time.time() - start))
@ -643,6 +643,7 @@ class Dispatcher:
if not feed: continue
feed = self.refresh_feed(feed.pk)
if ((self.options['force']) or
(random.random() > .9) or
(fetched_feed and
@ -672,7 +673,7 @@ class Dispatcher:
if (not settings.DEBUG and hasattr(settings, 'RAVEN_CLIENT') and
settings.RAVEN_CLIENT):
settings.RAVEN_CLIENT.captureException()
feed = self.refresh_feed(feed.pk)
logging.debug(u' ---> [%-30s] ~FYFetching icon: %s' % (feed.title[:30], feed.feed_link))
force = self.options['force']
@ -722,7 +723,7 @@ class Dispatcher:
total=total_duration, feed_code=feed_code)
self.feed_stats[ret_feed] += 1
if len(feed_queue) == 1:
return feed