mirror of
https://github.com/viq/NewsBlur.git
synced 2025-08-05 16:49:45 +00:00
Adding new feeds db column 'known_good', for forcing updates of feeds that were once good but are now throwing 500s.
This commit is contained in:
parent
967b13f39f
commit
c9cb7dcbc0
4 changed files with 109 additions and 18 deletions
|
@ -46,4 +46,12 @@ class Command(BaseCommand):
|
|||
active_subscribers__gte=1,
|
||||
active=True
|
||||
).order_by('?')
|
||||
if feeds: Feed.task_feeds(feeds)
|
||||
|
||||
feeds = Feed.objects.filter(
|
||||
last_update__lte=day,
|
||||
active_subscribers__gte=1,
|
||||
active=False,
|
||||
known_good=True
|
||||
).order_by('?')
|
||||
if feeds: Feed.task_feeds(feeds)
|
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# encoding: utf-8
|
||||
import datetime
|
||||
from south.db import db
|
||||
from south.v2 import SchemaMigration
|
||||
from django.db import models
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
|
||||
# Adding field 'Feed.known_good'
|
||||
db.add_column('feeds', 'known_good', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True), keep_default=False)
|
||||
|
||||
|
||||
def backwards(self, orm):
|
||||
|
||||
# Deleting field 'Feed.known_good'
|
||||
db.delete_column('feeds', 'known_good')
|
||||
|
||||
|
||||
models = {
|
||||
'rss_feeds.duplicatefeed': {
|
||||
'Meta': {'object_name': 'DuplicateFeed'},
|
||||
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||
},
|
||||
'rss_feeds.feed': {
|
||||
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
|
||||
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
|
||||
'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
|
||||
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
|
||||
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
|
||||
'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255'}),
|
||||
'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
|
||||
},
|
||||
'rss_feeds.feeddata': {
|
||||
'Meta': {'object_name': 'FeedData'},
|
||||
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
|
||||
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
|
||||
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.feedloadtime': {
|
||||
'Meta': {'object_name': 'FeedLoadtime'},
|
||||
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'loadtime': ('django.db.models.fields.FloatField', [], {})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['rss_feeds']
|
|
@ -50,6 +50,7 @@ class Feed(models.Model):
|
|||
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
|
||||
last_update = models.DateTimeField(db_index=True)
|
||||
fetched_once = models.BooleanField(default=False)
|
||||
known_good = models.BooleanField(default=False, db_index=True)
|
||||
has_feed_exception = models.BooleanField(default=False, db_index=True)
|
||||
has_page_exception = models.BooleanField(default=False, db_index=True)
|
||||
has_page = models.BooleanField(default=True)
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
# from apps.rss_feeds.models import FeedXML
|
||||
import time
|
||||
import datetime
|
||||
import traceback
|
||||
import multiprocessing
|
||||
import urllib2
|
||||
import xml.sax
|
||||
import redis
|
||||
from django.core.cache import cache
|
||||
from django.conf import settings
|
||||
from django.db import IntegrityError
|
||||
# from mongoengine.queryset import Q
|
||||
from apps.reader.models import UserSubscription, MUserStory
|
||||
from apps.rss_feeds.models import Feed, MStory
|
||||
from apps.rss_feeds.page_importer import PageImporter
|
||||
|
@ -11,18 +16,10 @@ from utils import feedparser
|
|||
from utils.story_functions import pre_process_story
|
||||
from utils import log as logging
|
||||
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
||||
import time
|
||||
import datetime
|
||||
import traceback
|
||||
import multiprocessing
|
||||
import urllib2
|
||||
import xml.sax
|
||||
import redis
|
||||
|
||||
# Refresh feed code adapted from Feedjack.
|
||||
# http://feedjack.googlecode.com
|
||||
|
||||
SLOWFEED_WARNING = 10
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
|
||||
|
||||
|
@ -132,6 +129,8 @@ class ProcessFeed:
|
|||
if not self.feed.fetched_once:
|
||||
self.feed.has_feed_exception = True
|
||||
self.feed.fetched_once = True
|
||||
self.feed.known_good = True
|
||||
logging.debug(" ---> [%-30s] Feed is 302'ing, but it's not new. Refetching..." % (unicode(self.feed)[:30]))
|
||||
self.feed.schedule_feed_fetch_immediately()
|
||||
if not self.fpf.entries:
|
||||
self.feed.save()
|
||||
|
@ -139,9 +138,9 @@ class ProcessFeed:
|
|||
return FEED_ERRHTTP, ret_values
|
||||
|
||||
if self.fpf.status >= 400:
|
||||
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.fetched_once else ''))
|
||||
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.known_good else ''))
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(self.fpf.status, "HTTP Error")
|
||||
|
@ -152,10 +151,10 @@ class ProcessFeed:
|
|||
return FEED_ERRHTTP, ret_values
|
||||
|
||||
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
|
||||
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
||||
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.feed.known_good and self.fpf.entries else ''))
|
||||
if not self.fpf.entries:
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception)
|
||||
|
@ -168,7 +167,7 @@ class ProcessFeed:
|
|||
logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
||||
if not self.fpf.entries:
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception)
|
||||
|
@ -306,9 +305,9 @@ class Dispatcher:
|
|||
|
||||
feed = self.refresh_feed(feed_id)
|
||||
|
||||
if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
|
||||
if not feed.fetched_once:
|
||||
feed.fetched_once = True
|
||||
if ret_entries.get(ENTRY_NEW) or self.options['force']:
|
||||
if not feed.known_good:
|
||||
feed.known_good = True
|
||||
feed.save()
|
||||
MUserStory.delete_old_stories(feed_id=feed.pk)
|
||||
try:
|
||||
|
|
Loading…
Add table
Reference in a new issue