diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index bb0fdf06e..180aaedde 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -4,7 +4,6 @@ import random import re import math import mongoengine as mongo -import redis import zlib import urllib import hashlib @@ -13,7 +12,6 @@ from operator import itemgetter # from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures from django.db import models from django.db import IntegrityError -from django.core.cache import cache from django.conf import settings from django.db.models.query import QuerySet from mongoengine.queryset import OperationError @@ -287,6 +285,10 @@ class Feed(models.Model): self.save_feed_history(505, 'Timeout', '') feed_address = None + if feed_address: + self.feed.has_feed_exception = True + self.feed.schedule_feed_fetch_immediately() + return not not feed_address def save_feed_history(self, status_code, message, exception=None): @@ -304,7 +306,8 @@ class Feed(models.Model): # for history in old_fetch_histories: # history.delete() if status_code not in (200, 304): - self.count_errors_in_history('feed', status_code) + errors, non_errors = self.count_errors_in_history('feed', status_code) + self.set_next_scheduled_update(error_count=len(errors), non_error_count=len(non_errors)) elif self.has_feed_exception: self.has_feed_exception = False self.active = True @@ -333,8 +336,8 @@ class Feed(models.Model): history_class.objects(feed_id=self.pk)[:50]) non_errors = [h for h in fetch_history if int(h) in (200, 304)] errors = [h for h in fetch_history if int(h) not in (200, 304)] - - if len(non_errors) == 0 and len(errors) >= 1: + + if len(non_errors) == 0 and len(errors) > 1: if exception_type == 'feed': self.has_feed_exception = True self.active = False @@ -345,6 +348,10 @@ class Feed(models.Model): elif self.exception_code > 0: self.active = True self.exception_code = 0 + if exception_type == 'feed': + self.has_feed_exception = False + elif exception_type == 'page': + self.has_page_exception = False self.save() return errors, non_errors @@ -1007,11 +1014,12 @@ class Feed(models.Model): return total, random_factor*2 - def set_next_scheduled_update(self, multiplier=1): + def set_next_scheduled_update(self, error_count=0, non_error_count=0): total, random_factor = self.get_next_scheduled_update(force=True, verbose=False) - if multiplier > 1: - total = total * multiplier + if error_count: + logging.debug(' ---> [%-30s] ~FBScheduling feed fetch geometrically: ~SB%s errors, %s non-errors' % (unicode(self)[:30], error_count, non_error_count)) + total = total * error_count next_scheduled_update = datetime.datetime.utcnow() + datetime.timedelta( minutes = total + random_factor) @@ -1022,14 +1030,11 @@ class Feed(models.Model): self.save() def schedule_feed_fetch_immediately(self): + logging.debug(' ---> [%-30s] Scheduling feed fetch immediately...' % (unicode(self)[:30])) self.next_scheduled_update = datetime.datetime.utcnow() self.save() - def schedule_feed_fetch_geometrically(self): - errors, non_errors = self.count_errors_in_history('feed') - self.set_next_scheduled_update(multiplier=len(errors)) - # def calculate_collocations_story_content(self, # collocation_measures=TrigramAssocMeasures, # collocation_finder=TrigramCollocationFinder): diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index f629b292f..9d3ad34bb 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -47,11 +47,10 @@ class FetchFeed: datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) - self.feed.set_next_scheduled_update() etag=self.feed.etag modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None - if self.options.get('force') or not self.feed.fetched_once: + if self.options.get('force') or not self.feed.fetched_once or not self.feed.known_good: modified = None etag = None @@ -126,10 +125,9 @@ class ProcessFeed: if self.fpf.status in (302, 301): if not self.fpf.href.endswith('feedburner.com/atom.xml'): self.feed.feed_address = self.fpf.href - if not self.feed.fetched_once: - self.feed.has_feed_exception = True + if not self.feed.known_good: self.feed.fetched_once = True - logging.debug(" ---> [%-30s] Feed is 302'ing, but it's not new. Refetching..." % (unicode(self.feed)[:30])) + logging.debug(" ---> [%-30s] Feed is %s'ing. Refetching..." % (unicode(self.feed)[:30], self.fpf.status)) self.feed.schedule_feed_fetch_immediately() if not self.fpf.entries: self.feed.save() @@ -142,9 +140,6 @@ class ProcessFeed: fixed_feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(self.fpf.status, "HTTP Error") - else: - self.feed.has_feed_exception = True - self.feed.schedule_feed_fetch_geometrically() self.feed.save() return FEED_ERRHTTP, ret_values @@ -156,9 +151,6 @@ class ProcessFeed: fixed_feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception) - else: - self.feed.has_feed_exception = True - self.feed.schedule_feed_fetch_immediately() self.feed.save() return FEED_ERRPARSE, ret_values elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException): @@ -169,9 +161,6 @@ class ProcessFeed: fixed_feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception) - else: - self.feed.has_feed_exception = True - self.feed.schedule_feed_fetch_immediately() self.feed.save() return FEED_ERRPARSE, ret_values