diff --git a/fabfile.py b/fabfile.py index c85a37574..d3a39b2b3 100644 --- a/fabfile.py +++ b/fabfile.py @@ -79,10 +79,15 @@ env.roledefs ={ 'db20.newsblur.com', 'db21.newsblur.com', 'db22.newsblur.com', + 'db23.newsblur.com', ], - 'dbdo':['198.211.115.113', + 'dbdo':['198.211.109.225', + '198.211.109.224', + '198.211.110.164', + '198.211.115.113', '198.211.115.153', '198.211.115.8', + '198.211.117.116', ], 'task': ['task01.newsblur.com', 'task02.newsblur.com', @@ -521,7 +526,7 @@ def setup_imaging(): def setup_supervisor(): sudo('apt-get -y install supervisor') -@parallel +# @parallel def setup_hosts(): put('../secrets-newsblur/configs/hosts', '/etc/hosts', use_sudo=True) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index f1570714a..0f766f351 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -18,7 +18,7 @@ from apps.statistics.models import MAnalyticsFetcher from utils import feedparser from utils.story_functions import pre_process_story from utils import log as logging -from utils.feed_functions import timelimit, TimeoutError, utf8encode +from utils.feed_functions import timelimit, TimeoutError, utf8encode, cache_bust_url # from utils.feed_functions import mail_feed_error_to_admin @@ -54,11 +54,18 @@ class FetchFeed: etag=self.feed.etag modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None + address = self.feed.feed_address - if self.options.get('force') or not self.feed.fetched_once or not self.feed.known_good: + if (self.options.get('force') or random.random() <= .01): modified = None etag = None - + address = cache_bust_url(address) + logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % ( + self.feed.title[:30], address)) + elif (not self.feed.fetched_once or not self.feed.known_good): + modified = None + etag = None + USER_AGENT = 'NewsBlur Feed Fetcher - %s subscriber%s - %s (Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.2.3 (KHTML, like Gecko) Version/5.2)' % ( self.feed.num_subscribers, 's' if self.feed.num_subscribers != 1 else '', @@ -75,7 +82,7 @@ class FetchFeed: return FEED_OK, self.fpf try: - self.fpf = feedparser.parse(self.feed.feed_address, + self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified) @@ -83,7 +90,7 @@ class FetchFeed: logging.debug(u' ***> [%-30s] ~FR%s, turning off microformats.' % (self.feed.title[:30], e)) feedparser.PARSE_MICROFORMATS = False - self.fpf = feedparser.parse(self.feed.feed_address, + self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified) diff --git a/utils/feed_functions.py b/utils/feed_functions.py index e787e80d7..efbef9ca9 100644 --- a/utils/feed_functions.py +++ b/utils/feed_functions.py @@ -3,9 +3,11 @@ import threading import sys import traceback import pprint +import urllib +import urlparse +import random from django.core.mail import mail_admins from django.utils.translation import ungettext -from django.conf import settings from utils import log as logging class TimeoutError(Exception): pass @@ -56,6 +58,18 @@ def utf8encode(tstr): except UnicodeDecodeError: return u'' +def append_query_string_to_url(url, **kwargs): + url_parts = list(urlparse.urlparse(url)) + query = dict(urlparse.parse_qsl(url_parts[4])) + query.update(kwargs) + + url_parts[4] = urllib.urlencode(query) + + return urlparse.urlunparse(url_parts) + +def cache_bust_url(url): + return append_query_string_to_url(url, _=random.randint(0, 10000)) + # From: http://www.poromenos.org/node/87 def levenshtein_distance(first, second): """Find the Levenshtein distance between two strings."""