Merge branch 'master' into jammit

* master:
  Adding active_premium count. Setting a minimum of 1 hour fetches for feeds with any active premiums.
  Allowing username refinement in fetching starred stories.
  Increasing timeout on gunicorn to 2 min.
  Adding management command to fetch starred stories on command.
  Adding error handling for insta-fetching stories.
  Turning off the page fetcher requests vs. urllib2 discrepency checker. I bet I've caught them all by now.
  The @kennethreitz commit: logging all discrepencies between requests and urllib2. This should take a few hours.
This commit is contained in:
Samuel Clay 2012-01-09 19:59:26 -08:00
commit 370d03472f
11 changed files with 167 additions and 30 deletions

View file

View file

@ -0,0 +1,29 @@
from django.core.management.base import BaseCommand
from django.conf import settings
from django.contrib.auth.models import User
from apps.feed_import.models import GoogleReaderImporter
from optparse import make_option
from utils.management_functions import daemonize
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option("-d", "--daemon", dest="daemonize", action="store_true"),
make_option('-u', '--username', type='str', dest='username'),
make_option('-c', '--count', type='int', dest='count', default=1000),
make_option('-V', '--verbose', action='store_true',
dest='verbose', default=False, help='Verbose output.'),
)
def handle(self, *args, **options):
if options['daemonize']:
daemonize()
settings.LOG_TO_STREAM = True
try:
user = User.objects.get(username__icontains=options['username'])
except User.MultipleObjectsReturned:
user = User.objects.get(username=options['username'])
reader_importer = GoogleReaderImporter(user)
reader_importer.import_starred_items(count=options['count'])

View file

@ -0,0 +1,82 @@
# encoding: utf-8
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'Feed.active_premium_subscribers'
db.add_column('feeds', 'active_premium_subscribers', self.gf('django.db.models.fields.IntegerField')(default=-1, db_index=True), keep_default=False)
def backwards(self, orm):
# Deleting field 'Feed.active_premium_subscribers'
db.delete_column('feeds', 'active_premium_subscribers')
models = {
'rss_feeds.duplicatefeed': {
'Meta': {'object_name': 'DuplicateFeed'},
'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.feed': {
'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}),
'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}),
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}),
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}),
'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '255'}),
'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
'queued_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'})
},
'rss_feeds.feeddata': {
'Meta': {'object_name': 'FeedData'},
'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedloadtime': {
'Meta': {'object_name': 'FeedLoadtime'},
'date_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'loadtime': ('django.db.models.fields.FloatField', [], {})
}
}
complete_apps = ['rss_feeds']

View file

@ -46,6 +46,7 @@ class Feed(models.Model):
num_subscribers = models.IntegerField(default=-1)
active_subscribers = models.IntegerField(default=-1, db_index=True)
premium_subscribers = models.IntegerField(default=-1)
active_premium_subscribers = models.IntegerField(default=-1, db_index=True)
branch_from_feed = models.ForeignKey('Feed', blank=True, null=True, db_index=True)
last_update = models.DateTimeField(db_index=True)
fetched_once = models.BooleanField(default=False)
@ -95,7 +96,7 @@ class Feed(models.Model):
if include_favicon:
try:
feed_icon = MFeedIcon.objects.get(feed_id=self.pk)
feed_icon = MFeedIcon.objects.filter(feed_id=self.pk).first()
feed['favicon'] = feed_icon.data
except MFeedIcon.DoesNotExist:
pass
@ -371,6 +372,14 @@ class Feed(models.Model):
)
self.premium_subscribers = premium_subs.count()
active_premium_subscribers = UserSubscription.objects.filter(
feed__in=feed_ids,
active=True,
user__profile__is_premium=True,
user__profile__last_seen_on__gte=SUBSCRIBER_EXPIRE
)
self.active_premium_subscribers = active_premium_subscribers.count()
self.save()
if verbose:
@ -756,15 +765,15 @@ class Feed(models.Model):
from apps.reader.models import MUserStory
trim_cutoff = 500
if self.active_subscribers <= 1 and self.premium_subscribers < 1:
trim_cutoff = 50
elif self.active_subscribers <= 3 and self.premium_subscribers < 2:
trim_cutoff = 100
elif self.active_subscribers <= 5 and self.premium_subscribers < 3:
elif self.active_subscribers <= 3 and self.premium_subscribers < 2:
trim_cutoff = 150
elif self.active_subscribers <= 5 and self.premium_subscribers < 3:
trim_cutoff = 200
elif self.active_subscribers <= 10 and self.premium_subscribers < 4:
trim_cutoff = 250
trim_cutoff = 300
elif self.active_subscribers <= 25 and self.premium_subscribers < 5:
trim_cutoff = 350
trim_cutoff = 400
stories = MStory.objects(
story_feed_id=self.pk,
).order_by('-story_date')
@ -937,7 +946,7 @@ class Feed(models.Model):
# print 'New/updated story: %s' % (story),
return story_in_system, story_has_changed
def get_next_scheduled_update(self, force=False):
def get_next_scheduled_update(self, force=False, verbose=True):
if self.min_to_decay and not force:
random_factor = random.randint(0, self.min_to_decay) / 4
return self.min_to_decay, random_factor
@ -979,7 +988,15 @@ class Feed(models.Model):
elif self.last_load_time >= 200:
slow_punishment = 6 * self.last_load_time
total = max(4, int(updates_per_day_delay + subscriber_bonus + slow_punishment))
# print "[%s] %s (%s-%s), %s, %s: %s" % (self, updates_per_day_delay, updates_per_day, self.num_subscribers, subscriber_bonus, slow_punishment, total)
if self.active_premium_subscribers:
total = min(total, 60) # 1 hour minimum for premiums
if verbose:
print "[%s] %s (%s/%s/%s/%s), %s, %s: %s" % (self, updates_per_day_delay,
self.num_subscribers, self.active_subscribers,
self.premium_subscribers, self.active_premium_subscribers,
subscriber_bonus, slow_punishment, total)
random_factor = random.randint(0, total) / 4
return total, random_factor*2

View file

@ -41,7 +41,7 @@ class PageImporter(object):
}
@timelimit(15)
def fetch_page(self, urllib_fallback=False):
def fetch_page(self, urllib_fallback=False, requests_exception=None):
feed_link = self.feed.feed_link
if not feed_link:
self.save_no_page()
@ -79,7 +79,7 @@ class PageImporter(object):
LookupError,
requests.packages.urllib3.exceptions.HTTPError), e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e))
return self.fetch_page(urllib_fallback=True)
return self.fetch_page(urllib_fallback=True, requests_exception=e)
except Exception, e:
logging.debug('[%d] ! -------------------------' % (self.feed.id,))
tb = traceback.format_exc()

View file

@ -97,6 +97,7 @@ def load_feed_statistics(request, feed_id):
stats['last_load_time'] = feed.last_load_time
stats['premium_subscribers'] = feed.premium_subscribers
stats['active_subscribers'] = feed.active_subscribers
stats['active_premium_subscribers'] = feed.active_premium_subscribers
# Classifier counts
stats['classifier_counts'] = json.decode(feed.data.feed_classifier_counts)

View file

@ -12,6 +12,6 @@ accesslog = "/home/sclay/newsblur/logs/production.log"
errorlog = "/home/sclay/newsblur/logs/errors.log"
loglevel = "debug"
name = "newsblur"
timeout = 60
timeout = 120
max_requests = 1000
workers = numCPUs()

View file

@ -73,7 +73,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
if (clear_queue) {
this.ajax[options['ajax_group']].clear(true);
}
this.ajax[options['ajax_group']].add(_.extend({
url: url,
data: data,
@ -93,12 +93,12 @@ NEWSBLUR.AssetModel.Reader.prototype = {
}
},
error: function(e, textStatus, errorThrown) {
NEWSBLUR.log(['AJAX Error', e, textStatus, errorThrown]);
NEWSBLUR.log(['AJAX Error', e, textStatus, errorThrown, !!error_callback, error_callback]);
if (errorThrown == 'abort') {
return;
}
if ($.isFunction(error_callback)) {
if (error_callback) {
error_callback();
} else if ($.isFunction(callback)) {
var message = "Please create an account. Not much to do without an account.";
@ -435,7 +435,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
}
},
refresh_feeds: function(callback, has_unfetched_feeds, feed_id) {
refresh_feeds: function(callback, has_unfetched_feeds, feed_id, error_callback) {
var self = this;
var pre_callback = function(data) {
@ -461,7 +461,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
}
if (NEWSBLUR.Globals.is_authenticated || feed_id) {
this.make_request('/reader/refresh_feeds', data, pre_callback);
this.make_request('/reader/refresh_feeds', data, pre_callback, error_callback);
}
},
@ -833,7 +833,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
}
},
save_exception_retry: function(feed_id, callback) {
save_exception_retry: function(feed_id, callback, error_callback) {
var self = this;
var pre_callback = function(data) {
@ -844,7 +844,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
this.make_request('/rss_feeds/exception_retry', {
'feed_id': feed_id,
'reset_fetch': !!(this.feeds[feed_id].has_feed_exception || this.feeds[feed_id].has_page_exception)
}, pre_callback);
}, pre_callback, error_callback);
},
save_exception_change_feed_link: function(feed_id, feed_link, callback) {

View file

@ -79,6 +79,12 @@
this.$s.$feed_stories.bind('mousemove', $.rescope(this.handle_mousemove_feed_view, this));
this.handle_keystrokes();
// ============
// = Bindings =
// ============
_.bindAll(this, 'show_stories_error');
// ==================
// = Initialization =
// ==================
@ -1741,7 +1747,7 @@
_.delay(_.bind(function() {
if (!delay || feed_id == self.next_feed) {
this.model.load_feed(feed_id, 1, true, $.rescope(this.post_open_feed, this),
_.bind(this.show_stories_error, this));
this.show_stories_error);
}
}, this), delay || 0);
@ -1964,7 +1970,7 @@
this.setup_mousemove_on_views();
this.model.fetch_starred_stories(1, _.bind(this.post_open_starred_stories, this),
_.bind(this.show_stories_error, this), true);
this.show_stories_error, true);
},
post_open_starred_stories: function(data, first_load) {
@ -2027,7 +2033,7 @@
this.cache['river_feeds_with_unreads'] = feeds;
this.show_stories_progress_bar(feeds.length);
this.model.fetch_river_stories(this.active_feed, feeds, 1,
_.bind(this.post_open_river_stories, this), _.bind(this.show_stories_error, this), true);
_.bind(this.post_open_river_stories, this), this.show_stories_error, true);
},
post_open_river_stories: function(data, first_load) {
@ -3157,14 +3163,14 @@
$story_titles.data('page', page+1);
if (this.active_feed == 'starred') {
this.model.fetch_starred_stories(page+1, _.bind(this.post_open_starred_stories, this),
_.bind(this.show_stories_error, this), false);
this.show_stories_error, false);
} else if (this.flags['river_view']) {
this.model.fetch_river_stories(this.active_feed, this.cache['river_feeds_with_unreads'],
page+1, _.bind(this.post_open_river_stories, this),
_.bind(this.show_stories_error, this), false);
this.show_stories_error, false);
} else {
this.model.load_feed(feed_id, page+1, false,
$.rescope(this.post_open_feed, this), _.bind(this.show_stories_error, this));
$.rescope(this.post_open_feed, this), this.show_stories_error);
}
}
},
@ -5119,7 +5125,7 @@
var $feed = this.find_feed_in_feed_list(feed_id);
$feed.addClass('NB-feed-unfetched').removeClass('NB-feed-exception');
this.model.save_exception_retry(feed_id, _.bind(this.force_feed_refresh, this, feed_id, $feed));
this.model.save_exception_retry(feed_id, _.bind(this.force_feed_refresh, this, feed_id, $feed), this.show_stories_error);
},
setup_socket_realtime_unread_counts: function(force) {
@ -5195,10 +5201,10 @@
if (self.active_feed == feed_id || self.active_feed == new_feed_id) {
self.open_feed(new_feed_id, true, $new_feed);
}
}, true, new_feed_id);
}, true, new_feed_id, this.show_stories_error);
},
force_feeds_refresh: function(callback, replace_active_feed, feed_id) {
force_feeds_refresh: function(callback, replace_active_feed, feed_id, error_callback) {
if (callback) {
this.cache.refresh_callback = callback;
} else {
@ -5209,7 +5215,7 @@
this.model.refresh_feeds(_.bind(function(updated_feeds) {
this.post_feed_refresh(updated_feeds, replace_active_feed, feed_id);
}, this), this.flags['has_unfetched_feeds'], feed_id);
}, this), this.flags['has_unfetched_feeds'], feed_id, error_callback);
},
post_feed_refresh: function(updated_feeds, replace_active_feed, single_feed_id) {

View file

@ -176,10 +176,12 @@ def add_object_to_folder(obj, folder, folders, parent='', added=False):
folders[k][f_k] = add_object_to_folder(obj, folder, f_v, f_k, added)
return folders
def mail_feed_error_to_admin(feed, e, local_vars=None):
def mail_feed_error_to_admin(feed, e, local_vars=None, subject=None):
# Mail the admins with the error
if not subject:
subject = "Feed update error"
exc_info = sys.exc_info()
subject = 'Feed update error: %s' % repr(e)
subject = '%s: %s' % (subject, repr(e))
message = 'Traceback:\n%s\n\Feed:\n%s\nLocals:\n%s' % (
'\n'.join(traceback.format_exception(*exc_info)),
pprint.pformat(feed.__dict__),