Adding new feeds db column 'known_good', for forcing updates of feeds that were once good but are now throwing 500s.

This commit is contained in:
Samuel Clay 2012-02-01 17:59:46 -08:00
parent 967b13f39f
commit c9cb7dcbc0
4 changed files with 109 additions and 18 deletions

View file

@ -46,4 +46,12 @@ class Command(BaseCommand):
active_subscribers__gte=1, active_subscribers__gte=1,
active=True active=True
).order_by('?') ).order_by('?')
if feeds: Feed.task_feeds(feeds)
feeds = Feed.objects.filter(
last_update__lte=day,
active_subscribers__gte=1,
active=False,
known_good=True
).order_by('?')
if feeds: Feed.task_feeds(feeds) if feeds: Feed.task_feeds(feeds)

View file

@ -0,0 +1,83 @@
# encoding: utf-8
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'Feed.known_good'
db.add_column('feeds', 'known_good', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True), keep_default=False)
def backwards(self, orm):
# Deleting field 'Feed.known_good'
db.delete_column('feeds', 'known_good')
models = {
'rss_feeds.duplicatefeed': {
'Meta': {'object_name': 'DuplicateFeed'},
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.feed': {
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255'}),
'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
},
'rss_feeds.feeddata': {
'Meta': {'object_name': 'FeedData'},
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedloadtime': {
'Meta': {'object_name': 'FeedLoadtime'},
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'loadtime': ('django.db.models.fields.FloatField', [], {})
}
}
complete_apps = ['rss_feeds']

View file

@ -50,6 +50,7 @@ class Feed(models.Model):
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True) branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
last_update = models.DateTimeField(db_index=True) last_update = models.DateTimeField(db_index=True)
fetched_once = models.BooleanField(default=False) fetched_once = models.BooleanField(default=False)
known_good = models.BooleanField(default=False, db_index=True)
has_feed_exception = models.BooleanField(default=False, db_index=True) has_feed_exception = models.BooleanField(default=False, db_index=True)
has_page_exception = models.BooleanField(default=False, db_index=True) has_page_exception = models.BooleanField(default=False, db_index=True)
has_page = models.BooleanField(default=True) has_page = models.BooleanField(default=True)

View file

@ -1,8 +1,13 @@
# from apps.rss_feeds.models import FeedXML import time
import datetime
import traceback
import multiprocessing
import urllib2
import xml.sax
import redis
from django.core.cache import cache from django.core.cache import cache
from django.conf import settings from django.conf import settings
from django.db import IntegrityError from django.db import IntegrityError
# from mongoengine.queryset import Q
from apps.reader.models import UserSubscription, MUserStory from apps.reader.models import UserSubscription, MUserStory
from apps.rss_feeds.models import Feed, MStory from apps.rss_feeds.models import Feed, MStory
from apps.rss_feeds.page_importer import PageImporter from apps.rss_feeds.page_importer import PageImporter
@ -11,18 +16,10 @@ from utils import feedparser
from utils.story_functions import pre_process_story from utils.story_functions import pre_process_story
from utils import log as logging from utils import log as logging
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
import time
import datetime
import traceback
import multiprocessing
import urllib2
import xml.sax
import redis
# Refresh feed code adapted from Feedjack. # Refresh feed code adapted from Feedjack.
# http://feedjack.googlecode.com # http://feedjack.googlecode.com
SLOWFEED_WARNING = 10
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4) ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5) FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
@ -132,6 +129,8 @@ class ProcessFeed:
if not self.feed.fetched_once: if not self.feed.fetched_once:
self.feed.has_feed_exception = True self.feed.has_feed_exception = True
self.feed.fetched_once = True self.feed.fetched_once = True
self.feed.known_good = True
logging.debug(" ---> [%-30s] Feed is 302'ing, but it's not new. Refetching..." % (unicode(self.feed)[:30]))
self.feed.schedule_feed_fetch_immediately() self.feed.schedule_feed_fetch_immediately()
if not self.fpf.entries: if not self.fpf.entries:
self.feed.save() self.feed.save()
@ -139,9 +138,9 @@ class ProcessFeed:
return FEED_ERRHTTP, ret_values return FEED_ERRHTTP, ret_values
if self.fpf.status >= 400: if self.fpf.status >= 400:
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.fetched_once else '')) logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.known_good else ''))
fixed_feed = None fixed_feed = None
if not self.feed.fetched_once: if not self.feed.known_good:
fixed_feed = self.feed.check_feed_link_for_feed_address() fixed_feed = self.feed.check_feed_link_for_feed_address()
if not fixed_feed: if not fixed_feed:
self.feed.save_feed_history(self.fpf.status, "HTTP Error") self.feed.save_feed_history(self.fpf.status, "HTTP Error")
@ -152,10 +151,10 @@ class ProcessFeed:
return FEED_ERRHTTP, ret_values return FEED_ERRHTTP, ret_values
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType): if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else '')) logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.feed.known_good and self.fpf.entries else ''))
if not self.fpf.entries: if not self.fpf.entries:
fixed_feed = None fixed_feed = None
if not self.feed.fetched_once: if not self.feed.known_good:
fixed_feed = self.feed.check_feed_link_for_feed_address() fixed_feed = self.feed.check_feed_link_for_feed_address()
if not fixed_feed: if not fixed_feed:
self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception) self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception)
@ -168,7 +167,7 @@ class ProcessFeed:
logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else '')) logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
if not self.fpf.entries: if not self.fpf.entries:
fixed_feed = None fixed_feed = None
if not self.feed.fetched_once: if not self.feed.known_good:
fixed_feed = self.feed.check_feed_link_for_feed_address() fixed_feed = self.feed.check_feed_link_for_feed_address()
if not fixed_feed: if not fixed_feed:
self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception) self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception)
@ -306,9 +305,9 @@ class Dispatcher:
feed = self.refresh_feed(feed_id) feed = self.refresh_feed(feed_id)
if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once: if ret_entries.get(ENTRY_NEW) or self.options['force']:
if not feed.fetched_once: if not feed.known_good:
feed.fetched_once = True feed.known_good = True
feed.save() feed.save()
MUserStory.delete_old_stories(feed_id=feed.pk) MUserStory.delete_old_stories(feed_id=feed.pk)
try: try: