From c90d55367716aebc7021ff32a48bc50688d7f1bf Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 30 Aug 2010 22:58:41 -0400 Subject: [PATCH 1/8] Copy changes. --- utils/munin/newsblur_errors.py | 2 +- utils/munin/newsblur_feeds.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/munin/newsblur_errors.py b/utils/munin/newsblur_errors.py index 2887a7b68..ed71c0e12 100755 --- a/utils/munin/newsblur_errors.py +++ b/utils/munin/newsblur_errors.py @@ -7,7 +7,7 @@ import datetime graph_config = { 'graph_category' : 'NewsBlur', - 'graph_title' : 'NewsBlur Errors', + 'graph_title' : 'NewsBlur Fetching History', 'graph_vlabel' : 'errors', 'feed_errors.label': 'Feed Errors', 'feed_success.label': 'Feed Success', diff --git a/utils/munin/newsblur_feeds.py b/utils/munin/newsblur_feeds.py index 967477ebf..6f971d8a2 100755 --- a/utils/munin/newsblur_feeds.py +++ b/utils/munin/newsblur_feeds.py @@ -6,7 +6,7 @@ from apps.reader.models import UserSubscription graph_config = { 'graph_category' : 'NewsBlur', - 'graph_title' : 'NewsBlur Feeds', + 'graph_title' : 'NewsBlur Feeds & Subscriptions', 'graph_vlabel' : 'Feeds & Subscribers', 'feeds.label': 'feeds', 'subscriptions.label': 'subscriptions', From cb59861098f76721bfb21421427e62b3d23f0cba Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 30 Aug 2010 23:37:39 -0400 Subject: [PATCH 2/8] Correcting bug where next_update would be set in the future too quickly. --- utils/feed_fetcher.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 6f59c693d..0b104e30a 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -57,7 +57,9 @@ class FetchFeed: logging.debug(log_msg) feed.save_feed_history(303, "Already fetched") return FEED_SAME, None - + else: + feed.set_next_scheduled_update() + etag=self.feed.etag modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None @@ -263,9 +265,7 @@ class Dispatcher: ENTRY_ERR: 0 } start_time = datetime.datetime.now() - - feed.set_next_scheduled_update() - + ### Uncomment to test feed fetcher # from random import randint # if randint(0,10) < 10: From 48b147751d9e161ee5ab83ef9cfeee8f2960265e Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 30 Aug 2010 23:55:24 -0400 Subject: [PATCH 3/8] Adding a timelimit to feed fetching. 20 seconds, that's all you got. --- utils/bootstrap_mongo.py | 2 +- utils/feed_fetcher.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/utils/bootstrap_mongo.py b/utils/bootstrap_mongo.py index 244ad305f..45e17077a 100644 --- a/utils/bootstrap_mongo.py +++ b/utils/bootstrap_mongo.py @@ -112,7 +112,7 @@ def bootstrap_feedpages(): # db.feed_pages.drop() print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count() - print "FeedPages: %s" % MFeedPage.objects().count() + print "FeedPages: %s" % FeedPage.objects.count() pprint(db.feed_pages.index_information()) feeds = Feed.objects.all().order_by('-average_stories_per_month') diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 0b104e30a..876476289 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -33,12 +33,44 @@ def mtime(ttime): """ return datetime.datetime.fromtimestamp(time.mktime(ttime)) +import threading +class TimeoutError(Exception): pass +def timelimit(timeout): + """borrowed from web.py""" + def _1(function): + def _2(*args, **kw): + class Dispatch(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self.result = None + self.error = None + + self.setDaemon(True) + self.start() + + def run(self): + try: + self.result = function(*args, **kw) + except: + self.error = sys.exc_info() + + c = Dispatch() + c.join(timeout) + if c.isAlive(): + raise TimeoutError, 'took too long' + if c.error: + raise c.error[0], c.error[1] + return c.result + return _2 + return _1 + class FetchFeed: def __init__(self, feed, options): self.feed = feed self.options = options self.fpf = None - + + @timelimit(20) def fetch(self): """ Downloads and parses a feed. """ From e9f5fc3350273803c280bd74bec5d8aa88192349 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 30 Aug 2010 23:56:53 -0400 Subject: [PATCH 4/8] More .gitignore, and adding cron jobs to fetch feeds. These will be replaced by celery within the next day or two. --- .gitignore | 1 + media/maintenance.html.unused | 5 +++++ utils/feed_fetch.sh | 9 +++++++++ utils/feed_fetch_silent.sh | 11 +++++++++++ 4 files changed, 26 insertions(+) create mode 100644 media/maintenance.html.unused create mode 100755 utils/feed_fetch.sh create mode 100755 utils/feed_fetch_silent.sh diff --git a/.gitignore b/.gitignore index f1d78538f..18ca6e8a9 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ build/ **/*.perspectivev* *.pbxuser data/ +logs diff --git a/media/maintenance.html.unused b/media/maintenance.html.unused new file mode 100644 index 000000000..1e79995f3 --- /dev/null +++ b/media/maintenance.html.unused @@ -0,0 +1,5 @@ +

NewsBlur is upgrading...

+ +

As of Sunday, August 22nd 8:30pm, NewsBlur is going through wonderful backend changes that will make it much, much faster. (PostgreSQL -> MongoDB, for you nerd types.)

+ +

This change will take a few hours. Rest easy knowing I won't sleep until it's done.

diff --git a/utils/feed_fetch.sh b/utils/feed_fetch.sh new file mode 100755 index 000000000..673eaf76e --- /dev/null +++ b/utils/feed_fetch.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1 +python /home/conesus/newsblur/manage.py refresh_feeds -s & +python /home/conesus/newsblur/manage.py refresh_feeds -s & +python /home/conesus/newsblur/manage.py refresh_feeds -s & +python /home/conesus/newsblur/manage.py refresh_feeds -s & +python /home/conesus/newsblur/manage.py refresh_feeds -s & +python /home/conesus/newsblur/manage.py refresh_feeds -s & diff --git a/utils/feed_fetch_silent.sh b/utils/feed_fetch_silent.sh new file mode 100755 index 000000000..07aba8afc --- /dev/null +++ b/utils/feed_fetch_silent.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1 +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & +python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 & + From 2274178836a3ee96692aed225d7122888bb37cb7 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 31 Aug 2010 00:09:34 -0400 Subject: [PATCH 5/8] Whoops, refreshing feeds waaay too often. --- media/js/newsblur/reader.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media/js/newsblur/reader.js b/media/js/newsblur/reader.js index 9dfb9bba9..17db0ce5a 100644 --- a/media/js/newsblur/reader.js +++ b/media/js/newsblur/reader.js @@ -2330,7 +2330,7 @@ setup_feed_refresh: function() { var self = this; - var FEED_REFRESH_INTERVAL = (1000 * 60) / 12; // 1/2 minutes + var FEED_REFRESH_INTERVAL = (1000 * 60) / 2; // 1/2 minutes clearInterval(this.flags.feed_refresh); From 985946e63b279cc390918380319f2d576cab898b Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 31 Aug 2010 08:13:20 -0400 Subject: [PATCH 6/8] Removing timelimit from feed fetcher. This caused massive interrupts. --- utils/feed_fetcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 876476289..4085fb75f 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -70,7 +70,7 @@ class FetchFeed: self.options = options self.fpf = None - @timelimit(20) + # @timelimit(20) def fetch(self): """ Downloads and parses a feed. """ From 79ba0e695a5c0e5c1b1d52c6cdb46e1bf21a3d5e Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 31 Aug 2010 08:33:18 -0400 Subject: [PATCH 7/8] Removing weird threading-style time limiting of feed fetching. --- utils/feed_fetcher.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 4085fb75f..b652e4cdb 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -33,36 +33,6 @@ def mtime(ttime): """ return datetime.datetime.fromtimestamp(time.mktime(ttime)) -import threading -class TimeoutError(Exception): pass -def timelimit(timeout): - """borrowed from web.py""" - def _1(function): - def _2(*args, **kw): - class Dispatch(threading.Thread): - def __init__(self): - threading.Thread.__init__(self) - self.result = None - self.error = None - - self.setDaemon(True) - self.start() - - def run(self): - try: - self.result = function(*args, **kw) - except: - self.error = sys.exc_info() - - c = Dispatch() - c.join(timeout) - if c.isAlive(): - raise TimeoutError, 'took too long' - if c.error: - raise c.error[0], c.error[1] - return c.result - return _2 - return _1 class FetchFeed: def __init__(self, feed, options): @@ -70,7 +40,6 @@ class FetchFeed: self.options = options self.fpf = None - # @timelimit(20) def fetch(self): """ Downloads and parses a feed. """ From 1ac8402807c6dbba95126a6f867e0fafc40ed1be Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 31 Aug 2010 08:45:35 -0400 Subject: [PATCH 8/8] Adding indexes to feed/page fetch histories. --- apps/rss_feeds/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index afb0a7caa..026a5cd30 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -794,6 +794,7 @@ class MFeedFetchHistory(mongo.Document): meta = { 'collection': 'feed_fetch_history', 'allow_inheritance': False, + 'indexes': ['feed_id', ('feed_id', 'status_code'), ('feed_id', 'fetch_date')], } class PageFetchHistory(models.Model): @@ -823,6 +824,7 @@ class MPageFetchHistory(mongo.Document): meta = { 'collection': 'page_fetch_history', 'allow_inheritance': False, + 'indexes': ['feed_id', ('feed_id', 'status_code'), ('feed_id', 'fetch_date')], } class DuplicateFeed(models.Model):