diff --git a/apps/rss_feeds/icon_importer.py b/apps/rss_feeds/icon_importer.py index abe8add59..84adea21f 100644 --- a/apps/rss_feeds/icon_importer.py +++ b/apps/rss_feeds/icon_importer.py @@ -25,6 +25,9 @@ class IconImporter(object): if not self.force and self.feed.icon.not_found: print 'Not found, skipping...' return + if not self.force and not self.feed.icon.not_found and self.feed.icon.icon_url: + print 'Found, but skipping...' + return image, image_file, icon_url = self.fetch_image_from_page_data() if not image: image, image_file, icon_url = self.fetch(force=self.force) @@ -59,12 +62,10 @@ class IconImporter(object): image_file.seek(0) header = struct.unpack('<3H', image_file.read(6)) except Exception, e: - print 'No on struct: %s'% e return # Check magic if header[:2] != (0, 1): - print 'No on header', header return # Collect icon directories @@ -157,11 +158,11 @@ class IconImporter(object): image, image_file = self.get_image_from_url(url) except(urllib2.HTTPError, urllib2.URLError): return None, None, None - print 'Found: %s - %s' % (url, image) + # print 'Found: %s - %s' % (url, image) return image, image_file, url def get_image_from_url(self, url): - print 'Requesting: %s' % url + # print 'Requesting: %s' % url try: request = urllib2.Request(url, headers=HEADERS) icon = urllib2.urlopen(request).read() @@ -186,7 +187,6 @@ class IconImporter(object): def normalize_image(self, image): # if image.size != (16, 16): # image = image.resize((16, 16), Image.BICUBIC) - print image if image.mode != 'RGBA': image = image.convert('RGBA') @@ -201,7 +201,7 @@ class IconImporter(object): ar = ar.reshape(scipy.product(shape[:2]), shape[2]) codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) - print "Before: %s" % codes + # print "Before: %s" % codes original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: codes = scipy.array([code for code in codes @@ -209,18 +209,17 @@ class IconImporter(object): (code[0] > hi and code[1] > hi and code[2] > hi))]) if not len(codes): codes = original_codes else: break - print "After: %s" % codes - colors = [''.join(chr(c) for c in code).encode('hex') for code in codes] + # print "After: %s" % codes vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences - print counts - total = scipy.sum(counts) - print dict(zip(colors, [count/float(total) for count in counts])) + # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes] + # total = scipy.sum(counts) + # print dict(zip(colors, [count/float(total) for count in counts])) index_max = scipy.argmax(counts) # find most frequent peak = codes[index_max] color = ''.join(chr(c) for c in peak).encode('hex') - print 'most frequent is %s (#%s)' % (peak, color) + # print 'most frequent is %s (#%s)' % (peak, color) return color[:6] diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index b02ceb491..a8c72e3c5 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -727,7 +727,7 @@ class FeedData(models.Model): class FeedIcon(models.Model): feed = AutoOneToOneField(Feed, primary_key=True, related_name='icon') - color = models.CharField(max_length=6, default="000000") + color = models.CharField(max_length=6, blank=True, null=True) data = models.TextField() icon_url = models.CharField(max_length=2000, blank=True, null=True) not_found = models.BooleanField(default=False) diff --git a/apps/rss_feeds/page_importer.py b/apps/rss_feeds/page_importer.py index 80a5486c8..b0407a039 100644 --- a/apps/rss_feeds/page_importer.py +++ b/apps/rss_feeds/page_importer.py @@ -5,6 +5,12 @@ import traceback import feedparser from utils import log as logging from apps.rss_feeds.models import MFeedPage +from utils.feed_functions import timelimit + +HEADERS = { + 'User-Agent': 'NewsBlur Page Fetcher - http://www.newsblur.com', + 'Connection': 'close', +} class PageImporter(object): @@ -12,12 +18,13 @@ class PageImporter(object): self.url = url self.feed = feed + @timelimit(30) def fetch_page(self): if not self.url: return try: - request = urllib2.Request(self.url) + request = urllib2.Request(self.url, headers=HEADERS) response = urllib2.urlopen(request) data = response.read() html = self.rewrite_page(data) diff --git a/media/css/reader.css b/media/css/reader.css index ed0e90575..4c79bc22d 100644 --- a/media/css/reader.css +++ b/media/css/reader.css @@ -1497,8 +1497,8 @@ background: transparent; linear, left bottom, left top, - color-stop(0.36, rgba(248, 221,105, 250)), - color-stop(0.84, rgba(268, 241, 125, 250)) + color-stop(0.36, rgba(76, 76, 76, 250)), + color-stop(0.84, rgba(55, 55, 55, 250)) ); background-image: -moz-linear-gradient( center bottom, diff --git a/media/js/newsblur/reader.js b/media/js/newsblur/reader.js index 7e32c2c8d..749de59cb 100644 --- a/media/js/newsblur/reader.js +++ b/media/js/newsblur/reader.js @@ -2173,6 +2173,9 @@ generate_gradient: function(feed, type) { var color = feed.favicon_color; + NEWSBLUR.log(['generate_gradient', feed.feed_title, color]); + if (!color) return ''; + var r = parseInt(color.substr(0, 2), 16); var g = parseInt(color.substr(2, 2), 16); var b = parseInt(color.substr(4, 2), 16); diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index b7d17e4e0..c1360c252 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -22,14 +22,11 @@ import xml.sax # Refresh feed code adapted from Feedjack. # http://feedjack.googlecode.com -VERSION = '1.0' URL = 'http://www.newsblur.com/' -USER_AGENT = 'NewsBlur Fetcher %s - %s' % (VERSION, URL) SLOWFEED_WARNING = 10 ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4) FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = range(5) - def mtime(ttime): """ datetime auxiliar function. """ @@ -61,6 +58,12 @@ class FetchFeed: modified = None etag = None + USER_AGENT = 'NewsBlur Feed Fetcher (%s subscriber%s) - %s' % ( + self.feed.num_subscribers, + 's' if self.feed.num_subscribers != 1 else '', + URL + ) + print USER_AGENT self.fpf = feedparser.parse(self.feed.feed_address, agent=USER_AGENT, etag=etag, @@ -210,9 +213,6 @@ class ProcessFeed: story_feed_id=self.feed.pk ).limit(len(story_guids)) - logging.info(u' ---> [%-30s] Parsing: %s existing stories' % ( - unicode(self.feed)[:30], - len(existing_stories))) # MStory.objects( # (Q(story_date__gte=start_date) & Q(story_date__lte=end_date)) # | (Q(story_guid__in=story_guids)), @@ -328,19 +328,23 @@ class Dispatcher: (ret_feed == FEED_OK or (ret_feed == FEED_SAME and feed.stories_last_month > 10)))): - logging.debug(u' ---> [%-30s] Fetching page' % (unicode(feed)[:30])) + logging.debug(u' ---> [%-30s] Fetching page: %s' % (unicode(feed)[:30], feed.feed_link)) page_importer = PageImporter(feed.feed_link, feed) try: page_importer.fetch_page() + except TimeoutError, e: + logging.debug(' ---> [%-30s] Page fetch timed out...' % (unicode(feed)[:30])) + feed.save_page_history(555, 'Timeout', '') except Exception, e: logging.debug('[%d] ! -------------------------' % (feed_id,)) tb = traceback.format_exc() logging.error(tb) logging.debug('[%d] ! -------------------------' % (feed_id,)) ret_feed = FEED_ERREXC - feed.save_feed_history(550, "Page Error", tb) + feed.save_page_history(550, "Page Error", tb) fetched_feed = None + logging.debug(u' ---> [%-30s] Fetching icon: %s' % (unicode(feed)[:30], feed.feed_link)) icon_importer = IconImporter(feed, force=self.options['force']) try: icon_importer.save() @@ -361,9 +365,9 @@ class Dispatcher: except IntegrityError: logging.debug(" ---> [%-30s] IntegrityError on feed: %s" % (unicode(feed)[:30], feed.feed_address,)) - done_msg = (u'%2s ---> [%-30s] Processed in %s [%s]' % ( + done_msg = (u'%2s ---> [%-30s] Processed in %s (%s) [%s]' % ( identity, feed.feed_title[:30], unicode(delta), - self.feed_trans[ret_feed],)) + feed.pk, self.feed_trans[ret_feed],)) logging.debug(done_msg) self.feed_stats[ret_feed] += 1 @@ -384,8 +388,9 @@ class Dispatcher: active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF)\ .order_by('-last_read_date') - logging.debug(u' ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % ( - unicode(feed)[:30], user_subs.count())) + logging.debug(u' ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers' % ( + unicode(feed)[:30], user_subs.count(), + feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers)) stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)