mirror of
https://github.com/viq/NewsBlur.git
synced 2025-08-05 16:49:45 +00:00
Adding new feeds db column 'known_good', for forcing updates of feeds that were once good but are now throwing 500s.
This commit is contained in:
parent
967b13f39f
commit
c9cb7dcbc0
4 changed files with 109 additions and 18 deletions
|
@ -46,4 +46,12 @@ class Command(BaseCommand):
|
||||||
active_subscribers__gte=1,
|
active_subscribers__gte=1,
|
||||||
active=True
|
active=True
|
||||||
).order_by('?')
|
).order_by('?')
|
||||||
|
if feeds: Feed.task_feeds(feeds)
|
||||||
|
|
||||||
|
feeds = Feed.objects.filter(
|
||||||
|
last_update__lte=day,
|
||||||
|
active_subscribers__gte=1,
|
||||||
|
active=False,
|
||||||
|
known_good=True
|
||||||
|
).order_by('?')
|
||||||
if feeds: Feed.task_feeds(feeds)
|
if feeds: Feed.task_feeds(feeds)
|
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
import datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
|
||||||
|
# Adding field 'Feed.known_good'
|
||||||
|
db.add_column('feeds', 'known_good', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True), keep_default=False)
|
||||||
|
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
|
||||||
|
# Deleting field 'Feed.known_good'
|
||||||
|
db.delete_column('feeds', 'known_good')
|
||||||
|
|
||||||
|
|
||||||
|
models = {
|
||||||
|
'rss_feeds.duplicatefeed': {
|
||||||
|
'Meta': {'object_name': 'DuplicateFeed'},
|
||||||
|
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||||
|
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
|
||||||
|
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||||
|
},
|
||||||
|
'rss_feeds.feed': {
|
||||||
|
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
|
||||||
|
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
|
||||||
|
'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||||
|
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||||
|
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
|
||||||
|
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||||
|
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
|
||||||
|
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255'}),
|
||||||
|
'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||||
|
'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||||
|
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||||
|
'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||||
|
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
|
||||||
|
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||||
|
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||||
|
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||||
|
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||||
|
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||||
|
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
|
||||||
|
},
|
||||||
|
'rss_feeds.feeddata': {
|
||||||
|
'Meta': {'object_name': 'FeedData'},
|
||||||
|
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
|
||||||
|
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||||
|
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
|
||||||
|
},
|
||||||
|
'rss_feeds.feedloadtime': {
|
||||||
|
'Meta': {'object_name': 'FeedLoadtime'},
|
||||||
|
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||||
|
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'loadtime': ('django.db.models.fields.FloatField', [], {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['rss_feeds']
|
|
@ -50,6 +50,7 @@ class Feed(models.Model):
|
||||||
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
|
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
|
||||||
last_update = models.DateTimeField(db_index=True)
|
last_update = models.DateTimeField(db_index=True)
|
||||||
fetched_once = models.BooleanField(default=False)
|
fetched_once = models.BooleanField(default=False)
|
||||||
|
known_good = models.BooleanField(default=False, db_index=True)
|
||||||
has_feed_exception = models.BooleanField(default=False, db_index=True)
|
has_feed_exception = models.BooleanField(default=False, db_index=True)
|
||||||
has_page_exception = models.BooleanField(default=False, db_index=True)
|
has_page_exception = models.BooleanField(default=False, db_index=True)
|
||||||
has_page = models.BooleanField(default=True)
|
has_page = models.BooleanField(default=True)
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
# from apps.rss_feeds.models import FeedXML
|
import time
|
||||||
|
import datetime
|
||||||
|
import traceback
|
||||||
|
import multiprocessing
|
||||||
|
import urllib2
|
||||||
|
import xml.sax
|
||||||
|
import redis
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
# from mongoengine.queryset import Q
|
|
||||||
from apps.reader.models import UserSubscription, MUserStory
|
from apps.reader.models import UserSubscription, MUserStory
|
||||||
from apps.rss_feeds.models import Feed, MStory
|
from apps.rss_feeds.models import Feed, MStory
|
||||||
from apps.rss_feeds.page_importer import PageImporter
|
from apps.rss_feeds.page_importer import PageImporter
|
||||||
|
@ -11,18 +16,10 @@ from utils import feedparser
|
||||||
from utils.story_functions import pre_process_story
|
from utils.story_functions import pre_process_story
|
||||||
from utils import log as logging
|
from utils import log as logging
|
||||||
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import traceback
|
|
||||||
import multiprocessing
|
|
||||||
import urllib2
|
|
||||||
import xml.sax
|
|
||||||
import redis
|
|
||||||
|
|
||||||
# Refresh feed code adapted from Feedjack.
|
# Refresh feed code adapted from Feedjack.
|
||||||
# http://feedjack.googlecode.com
|
# http://feedjack.googlecode.com
|
||||||
|
|
||||||
SLOWFEED_WARNING = 10
|
|
||||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||||
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
|
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
|
||||||
|
|
||||||
|
@ -132,6 +129,8 @@ class ProcessFeed:
|
||||||
if not self.feed.fetched_once:
|
if not self.feed.fetched_once:
|
||||||
self.feed.has_feed_exception = True
|
self.feed.has_feed_exception = True
|
||||||
self.feed.fetched_once = True
|
self.feed.fetched_once = True
|
||||||
|
self.feed.known_good = True
|
||||||
|
logging.debug(" ---> [%-30s] Feed is 302'ing, but it's not new. Refetching..." % (unicode(self.feed)[:30]))
|
||||||
self.feed.schedule_feed_fetch_immediately()
|
self.feed.schedule_feed_fetch_immediately()
|
||||||
if not self.fpf.entries:
|
if not self.fpf.entries:
|
||||||
self.feed.save()
|
self.feed.save()
|
||||||
|
@ -139,9 +138,9 @@ class ProcessFeed:
|
||||||
return FEED_ERRHTTP, ret_values
|
return FEED_ERRHTTP, ret_values
|
||||||
|
|
||||||
if self.fpf.status >= 400:
|
if self.fpf.status >= 400:
|
||||||
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.fetched_once else ''))
|
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.known_good else ''))
|
||||||
fixed_feed = None
|
fixed_feed = None
|
||||||
if not self.feed.fetched_once:
|
if not self.feed.known_good:
|
||||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||||
if not fixed_feed:
|
if not fixed_feed:
|
||||||
self.feed.save_feed_history(self.fpf.status, "HTTP Error")
|
self.feed.save_feed_history(self.fpf.status, "HTTP Error")
|
||||||
|
@ -152,10 +151,10 @@ class ProcessFeed:
|
||||||
return FEED_ERRHTTP, ret_values
|
return FEED_ERRHTTP, ret_values
|
||||||
|
|
||||||
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
|
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
|
||||||
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.feed.known_good and self.fpf.entries else ''))
|
||||||
if not self.fpf.entries:
|
if not self.fpf.entries:
|
||||||
fixed_feed = None
|
fixed_feed = None
|
||||||
if not self.feed.fetched_once:
|
if not self.feed.known_good:
|
||||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||||
if not fixed_feed:
|
if not fixed_feed:
|
||||||
self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception)
|
self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception)
|
||||||
|
@ -168,7 +167,7 @@ class ProcessFeed:
|
||||||
logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
||||||
if not self.fpf.entries:
|
if not self.fpf.entries:
|
||||||
fixed_feed = None
|
fixed_feed = None
|
||||||
if not self.feed.fetched_once:
|
if not self.feed.known_good:
|
||||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||||
if not fixed_feed:
|
if not fixed_feed:
|
||||||
self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception)
|
self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception)
|
||||||
|
@ -306,9 +305,9 @@ class Dispatcher:
|
||||||
|
|
||||||
feed = self.refresh_feed(feed_id)
|
feed = self.refresh_feed(feed_id)
|
||||||
|
|
||||||
if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
|
if ret_entries.get(ENTRY_NEW) or self.options['force']:
|
||||||
if not feed.fetched_once:
|
if not feed.known_good:
|
||||||
feed.fetched_once = True
|
feed.known_good = True
|
||||||
feed.save()
|
feed.save()
|
||||||
MUserStory.delete_old_stories(feed_id=feed.pk)
|
MUserStory.delete_old_stories(feed_id=feed.pk)
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Reference in a new issue