mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Merge branch 'master' into social
* master: Adding build/ back to .gitignore. Updating .gitignore. Using dsa pem for retrieving rabbit update counts. Preserving feed fetch error histories for 2 weeks. Adding munin graph for known good feeds. Turning down verbosity on feed tasking. Adding known good feeds to munin to chart their progress. Fixing headers overflow in Chrome. Adding new feeds db column 'known_good', for forcing updates of feeds that were once good but are now throwing 500s. Adding blogging OT press story.
This commit is contained in:
commit
b3fffab05a
10 changed files with 125 additions and 32 deletions
8
.gitignore
vendored
8
.gitignore
vendored
|
@ -1,9 +1,5 @@
|
|||
logs/*.log
|
||||
*.pycmedia/release/
|
||||
*.pyc
|
||||
media/*/*-compressed-*.*
|
||||
media/css/*/*-compressed-*.*
|
||||
media/release
|
||||
static/*
|
||||
local_settings.py
|
||||
media/iphone/NewsBlur/build
|
||||
|
@ -12,10 +8,7 @@ build/
|
|||
.DS_Store
|
||||
**/*.perspectivev*
|
||||
data/
|
||||
logs
|
||||
mongo/
|
||||
**/*.xcuserstate
|
||||
media/iphone/NewsBlur.xcodeproj/project.xcworkspace/xcuserdata/conesus.xcuserdatad/UserInterfaceState.xcuserstate
|
||||
UserInterfaceState.xcuserstate
|
||||
UserInterfaceState\.xcuserstate
|
||||
*.xcuserstate
|
||||
|
@ -23,3 +16,4 @@ xcuserdata
|
|||
.xcodeproj/ push.xcodeproj/project.pbxproj
|
||||
*.mode1v3
|
||||
*.pbxuser
|
||||
media/maintenance.html
|
|
@ -46,4 +46,12 @@ class Command(BaseCommand):
|
|||
active_subscribers__gte=1,
|
||||
active=True
|
||||
).order_by('?')
|
||||
if feeds: Feed.task_feeds(feeds)
|
||||
|
||||
feeds = Feed.objects.filter(
|
||||
last_update__lte=day,
|
||||
active_subscribers__gte=1,
|
||||
active=False,
|
||||
known_good=True
|
||||
).order_by('?')
|
||||
if feeds: Feed.task_feeds(feeds)
|
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
83
apps/rss_feeds/migrations/0053_known_good.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# encoding: utf-8
|
||||
import datetime
|
||||
from south.db import db
|
||||
from south.v2 import SchemaMigration
|
||||
from django.db import models
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
|
||||
# Adding field 'Feed.known_good'
|
||||
db.add_column('feeds', 'known_good', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True), keep_default=False)
|
||||
|
||||
|
||||
def backwards(self, orm):
|
||||
|
||||
# Deleting field 'Feed.known_good'
|
||||
db.delete_column('feeds', 'known_good')
|
||||
|
||||
|
||||
models = {
|
||||
'rss_feeds.duplicatefeed': {
|
||||
'Meta': {'object_name': 'DuplicateFeed'},
|
||||
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||
},
|
||||
'rss_feeds.feed': {
|
||||
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
|
||||
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
|
||||
'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
|
||||
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
|
||||
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
|
||||
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
|
||||
'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255'}),
|
||||
'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
|
||||
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
|
||||
},
|
||||
'rss_feeds.feeddata': {
|
||||
'Meta': {'object_name': 'FeedData'},
|
||||
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
|
||||
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
|
||||
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.feedloadtime': {
|
||||
'Meta': {'object_name': 'FeedLoadtime'},
|
||||
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'loadtime': ('django.db.models.fields.FloatField', [], {})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['rss_feeds']
|
|
@ -52,6 +52,7 @@ class Feed(models.Model):
|
|||
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
|
||||
last_update = models.DateTimeField(db_index=True)
|
||||
fetched_once = models.BooleanField(default=False)
|
||||
known_good = models.BooleanField(default=False, db_index=True)
|
||||
has_feed_exception = models.BooleanField(default=False, db_index=True)
|
||||
has_page_exception = models.BooleanField(default=False, db_index=True)
|
||||
has_page = models.BooleanField(default=True)
|
||||
|
@ -1013,7 +1014,7 @@ class Feed(models.Model):
|
|||
return total, random_factor*2
|
||||
|
||||
def set_next_scheduled_update(self):
|
||||
total, random_factor = self.get_next_scheduled_update(force=True)
|
||||
total, random_factor = self.get_next_scheduled_update(force=True, verbose=False)
|
||||
|
||||
next_scheduled_update = datetime.datetime.utcnow() + datetime.timedelta(
|
||||
minutes = total + random_factor)
|
||||
|
|
|
@ -52,17 +52,20 @@ class MStatistics(mongo.Document):
|
|||
def collect_statistics_feeds_fetched(cls, last_day=None):
|
||||
if not last_day:
|
||||
last_day = datetime.datetime.now() - datetime.timedelta(hours=24)
|
||||
last_biweek = datetime.datetime.now() - datetime.timedelta(days=14)
|
||||
|
||||
feeds_fetched = MFeedFetchHistory.objects.count()
|
||||
feeds_fetched = MFeedFetchHistory.objects.filter(fetch_date__lt=last_day).count()
|
||||
cls.objects(key='feeds_fetched').update_one(upsert=True, key='feeds_fetched', value=feeds_fetched)
|
||||
pages_fetched = MPageFetchHistory.objects.count()
|
||||
pages_fetched = MPageFetchHistory.objects.filter(fetch_date__lt=last_day).count()
|
||||
cls.objects(key='pages_fetched').update_one(upsert=True, key='pages_fetched', value=pages_fetched)
|
||||
|
||||
from utils.feed_functions import timelimit, TimeoutError
|
||||
@timelimit(60)
|
||||
def delete_old_history():
|
||||
MFeedFetchHistory.objects(fetch_date__lt=last_day).delete()
|
||||
MPageFetchHistory.objects(fetch_date__lt=last_day).delete()
|
||||
MFeedFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
|
||||
MPageFetchHistory.objects(fetch_date__lt=last_day, status_code__in=[200, 304]).delete()
|
||||
MFeedFetchHistory.objects(fetch_date__lt=last_biweek).delete()
|
||||
MPageFetchHistory.objects(fetch_date__lt=last_biweek).delete()
|
||||
try:
|
||||
delete_old_history()
|
||||
except TimeoutError:
|
||||
|
|
|
@ -3785,6 +3785,7 @@ background: transparent;
|
|||
.NB-module h5 {
|
||||
margin: 0 0 12px;
|
||||
padding: 8px 12px 6px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.NB-module .NB-module-header-left {
|
||||
|
|
|
@ -132,6 +132,10 @@
|
|||
Mar 8, 2011
|
||||
</span>
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://www.bloggingot.com/blogging-tools/newsblur-rss-feed-client-for-feed-junkies/">NewsBlur: RSS Feed Client for Feed Junkies</a>
|
||||
<span class="NB-press-publisher"><img src="http://www.bloggingot.com/favicon.ico"> Blogging OT</span>, <span class="NB-press-author">Panah</span>, <span class="NB-press-date">Jan 21, 2012</span>
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://www.genbeta.com/web/newsblur-una-excelente-alternativa-a-google-reader-que-filtra-los-posts-mas-relevantes">
|
||||
NewsBlur, una excelente alternativa a Google Reader que filtra los posts más relevantes
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
# from apps.rss_feeds.models import FeedXML
|
||||
import time
|
||||
import datetime
|
||||
import traceback
|
||||
import multiprocessing
|
||||
import urllib2
|
||||
import xml.sax
|
||||
import redis
|
||||
from django.core.cache import cache
|
||||
from django.conf import settings
|
||||
from django.db import IntegrityError
|
||||
# from mongoengine.queryset import Q
|
||||
from apps.reader.models import UserSubscription, MUserStory
|
||||
from apps.rss_feeds.models import Feed, MStory
|
||||
from apps.rss_feeds.page_importer import PageImporter
|
||||
|
@ -11,18 +16,10 @@ from utils import feedparser
|
|||
from utils.story_functions import pre_process_story
|
||||
from utils import log as logging
|
||||
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
||||
import time
|
||||
import datetime
|
||||
import traceback
|
||||
import multiprocessing
|
||||
import urllib2
|
||||
import xml.sax
|
||||
import redis
|
||||
|
||||
# Refresh feed code adapted from Feedjack.
|
||||
# http://feedjack.googlecode.com
|
||||
|
||||
SLOWFEED_WARNING = 10
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5)
|
||||
|
||||
|
@ -132,16 +129,16 @@ class ProcessFeed:
|
|||
if not self.feed.fetched_once:
|
||||
self.feed.has_feed_exception = True
|
||||
self.feed.fetched_once = True
|
||||
logging.debug(" ---> [%-30s] Feed is 302'ing, but it's not new. Refetching..." % (unicode(self.feed)[:30]))
|
||||
self.feed.schedule_feed_fetch_immediately()
|
||||
if not self.fpf.entries:
|
||||
self.feed.save()
|
||||
self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
|
||||
return FEED_ERRHTTP, ret_values
|
||||
|
||||
if self.fpf.status >= 400:
|
||||
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.fetched_once else ''))
|
||||
logging.debug(" ---> [%-30s] HTTP Status code: %s.%s Checking address..." % (unicode(self.feed)[:30], self.fpf.status, ' Not' if self.feed.known_good else ''))
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(self.fpf.status, "HTTP Error")
|
||||
|
@ -152,10 +149,10 @@ class ProcessFeed:
|
|||
return FEED_ERRHTTP, ret_values
|
||||
|
||||
if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
|
||||
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
||||
logging.debug(" ---> [%-30s] Feed is Non-XML. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.feed.known_good and self.fpf.entries else ''))
|
||||
if not self.fpf.entries:
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(502, 'Non-xml feed', self.fpf.bozo_exception)
|
||||
|
@ -168,7 +165,7 @@ class ProcessFeed:
|
|||
logging.debug(" ---> [%-30s] Feed has SAX/XML parsing issues. %s entries.%s Checking address..." % (unicode(self.feed)[:30], len(self.fpf.entries), ' Not' if self.fpf.entries else ''))
|
||||
if not self.fpf.entries:
|
||||
fixed_feed = None
|
||||
if not self.feed.fetched_once:
|
||||
if not self.feed.known_good:
|
||||
fixed_feed = self.feed.check_feed_link_for_feed_address()
|
||||
if not fixed_feed:
|
||||
self.feed.save_feed_history(503, 'SAX Exception', self.fpf.bozo_exception)
|
||||
|
@ -306,9 +303,9 @@ class Dispatcher:
|
|||
|
||||
feed = self.refresh_feed(feed_id)
|
||||
|
||||
if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
|
||||
if not feed.fetched_once:
|
||||
feed.fetched_once = True
|
||||
if ret_entries.get(ENTRY_NEW) or self.options['force']:
|
||||
if not feed.known_good:
|
||||
feed.known_good = True
|
||||
feed.save()
|
||||
MUserStory.delete_old_stories(feed_id=feed.pk)
|
||||
try:
|
||||
|
|
|
@ -10,6 +10,7 @@ graph_config = {
|
|||
'inactive_feeds.label': 'inactive_feeds',
|
||||
'duplicate_feeds.label': 'duplicate_feeds',
|
||||
'active_feeds.label': 'active_feeds',
|
||||
'known_good_feeds.label': 'known_good',
|
||||
}
|
||||
def calculate_metrics():
|
||||
from apps.rss_feeds.models import Feed, DuplicateFeed
|
||||
|
@ -20,6 +21,7 @@ def calculate_metrics():
|
|||
'inactive_feeds': Feed.objects.filter(active=False).count(),
|
||||
'duplicate_feeds': DuplicateFeed.objects.count(),
|
||||
'active_feeds': Feed.objects.filter(active_subscribers__gt=0).count(),
|
||||
'known_good_feeds': Feed.objects.filter(known_good=True).count(),
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -18,7 +18,7 @@ def calculate_metrics():
|
|||
from apps.rss_feeds.models import Feed
|
||||
|
||||
hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1)
|
||||
update_feeds_query = "ssh sclay@db01 \"sudo rabbitmqctl list_queues -p newsblurvhost | grep %s\" | awk '{print $2}'"
|
||||
update_feeds_query = "ssh -i ~sclay/.ssh/id_dsa sclay@db01 \"sudo rabbitmqctl list_queues -p newsblurvhost | grep %s\" | awk '{print $2}'"
|
||||
|
||||
return {
|
||||
'update_queue': Feed.objects.filter(queued_date__gte=hour_ago).count(),
|
||||
|
|
Loading…
Add table
Reference in a new issue