From 586ff3f572d2a39c7d5cbfc61782a454128a2fea Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 8 Mar 2016 13:26:34 -0800 Subject: [PATCH] Fixing unicode issues in feed fetcher. Also fixing broken last-modified cache header. --- utils/feed_fetcher.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 713ebd505..12921155d 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -40,12 +40,6 @@ from celery.exceptions import SoftTimeLimitExceeded # http://feedjack.googlecode.com FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5) - - -def mtime(ttime): - """ datetime auxiliar function. - """ - return datetime.datetime.fromtimestamp(time.mktime(ttime)) class FetchFeed: @@ -131,9 +125,10 @@ class FetchFeed: headers['If-Modified-Since'] = modified_header raw_feed = requests.get(address, headers=headers) if raw_feed.content: - self.fpf = feedparser.parse(raw_feed.content, response_headers={ - 'Content-Location': raw_feed.url, - }) + response_headers = raw_feed.headers + response_headers['Content-Location'] = raw_feed.url + self.fpf = feedparser.parse(smart_unicode(raw_feed.content), + response_headers=response_headers) except Exception, e: logging.debug(" ---> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.title[:30], e)) @@ -413,9 +408,10 @@ class ProcessFeed: original_last_modified = self.feed.last_modified try: - self.feed.last_modified = mtime(self.fpf.modified) - except: + self.feed.last_modified = datetime.datetime.strptime(self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z') + except Exception, e: self.feed.last_modified = None + logging.debug("Broken mtime %s: %s" % (self.feed.last_modified, e)) pass if self.feed.last_modified != original_last_modified: self.feed.save(update_fields=['last_modified'])