Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded.

This commit is contained in:
Samuel Clay 2011-01-29 19:16:40 -05:00
parent 696d0d6deb
commit b938e95124
7 changed files with 193 additions and 93 deletions

View file

@ -367,18 +367,7 @@ def load_feed_page(request):
if feed_id == 0:
raise Http404
feed_page = MFeedPage.objects.filter(feed_id=feed_id)
data = None
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
else:
dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
if dupe_feed:
feed = dupe_feed[0].feed
feed_page = MFeedPage.objects.filter(feed_id=feed.pk)
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
data = MFeedPage.get_data(feed_id=feed_id)
if not data:
data = "Fetching feed..."

View file

@ -3,12 +3,18 @@ import lxml.html
import scipy
import scipy.misc
import scipy.cluster
import Image
import urlparse
import operator
import struct
from StringIO import StringIO
from PIL import ImageFile
import ImageChops, Image
from django.conf import settings
from apps.rss_feeds.models import MFeedPage
from PIL import BmpImagePlugin, PngImagePlugin, ImageFile
class BadImage(Exception): pass
HEADERS = {
'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
'Connection': 'close',
}
class IconImporter(object):
@ -20,110 +26,126 @@ class IconImporter(object):
if not self.force and self.feed.icon.not_found:
print 'Not found, skipping...'
return
image, icon_url = self.fetch(force=self.force)
image, icon_url = self.fetch_image_from_page_data()
if not image:
image, icon_url = self.fetch(force=self.force)
if image:
image = self.normalize_image(image)
color = self.determine_dominant_color_in_image(image)
image_str = self.string_from_image(image)
self.feed.icon.save()
self.feed.icon.data = image_str
self.feed.icon.icon_url = icon_url
self.feed.icon.color = color
self.feed.icon.not_found = False
else:
self.feed.icon.save()
self.feed.icon.not_found = True
self.feed.icon.save()
return not self.feed.icon.not_found
def fetch_image_from_page_data(self):
image = None
content = MFeedPage.get_data(feed_id=self.feed.pk)
url = self._url_from_html(content)
if url:
image = self.get_image_from_url(url)
return image, url
def fetch(self, path='favicon.ico', force=False):
HEADERS = {
'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
'Connection': 'close',
}
image = None
url = None
if not force:
url = self.feed.icon.icon_url
if not url:
url = self.feed.feed_link
url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')
if not url.endswith('/') and not url.endswith('favicon.ico'):
url += '/favicon.ico'
if url.endswith('/'):
url += 'favicon.ico'
def request_image(url):
print 'Requesting: %s' % url
request = urllib2.Request(url, headers=HEADERS)
icon = urllib2.urlopen(request)
parser = ImageFile.Parser()
s = icon.read()
if s:
parser.feed(s)
try:
image = parser.close()
return image
except IOError:
raise BadImage
try:
image = request_image(url)
except (urllib2.HTTPError, urllib2.URLError, BadImage):
request = urllib2.Request(self.feed.feed_link, headers=HEADERS)
try:
# 2048 bytes should be enough for most of websites
content = urllib2.urlopen(request).read(2048)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
icon_path = lxml.html.fromstring(content).xpath(
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
)
if icon_path:
if str(icon_path[0]).startswith('http'):
url = icon_path[0]
else:
url = self.feed.feed_link + icon_path[0]
image = self.get_image_from_url(url)
if not image:
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
image = self.get_image_from_url(url)
if not image:
request = urllib2.Request(self.feed.feed_link, headers=HEADERS)
try:
image = request_image(url)
except(urllib2.HTTPError, urllib2.URLError, BadImage):
# 2048 bytes should be enough for most of websites
content = urllib2.urlopen(request).read(2048)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
url = self._url_from_html(content)
if url:
try:
image = self.get_image_from_url(url)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
print 'Found: %s - %s' % (url, image)
return image, url
def get_image_from_url(self, url):
print 'Requesting: %s' % url
try:
request = urllib2.Request(url, headers=HEADERS)
icon = urllib2.urlopen(request)
except (urllib2.HTTPError, urllib2.URLError), e:
return None
parser = ImageFile.Parser()
s = icon.read()
if s:
parser.feed(s)
try:
image = parser.close()
return image
except IOError, e:
return None
def _url_from_html(self, content):
url = None
icon_path = lxml.html.fromstring(content).xpath(
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
)
if icon_path:
if str(icon_path[0]).startswith('http'):
url = icon_path[0]
else:
url = urlparse.urljoin(self.feed.feed_link, icon_path[0])
return url
def normalize_image(self, image):
image = image.resize((16, 16), Image.ANTIALIAS)
print image.size
# if image.size != (16, 16):
# image = image.resize((16, 16), Image.BICUBIC)
print image
if image.mode != 'RGBA':
image = image.convert('RGBA')
# mask = Image.open(settings.IMAGE_MASK)
print image
print image.mode
print image.size
# mask = mask.convert('L')
# print mask
# image.paste(Image.new('RGBA', image.size, '#FFFFFF'), (0, 0), ImageChops.invert(mask))
# image.putalpha(mask)
return image
def determine_dominant_color_in_image(self, image):
NUM_CLUSTERS = 5
# if image.mode == 'P':
# image.putalpha(0)
ar = scipy.misc.fromimage(image)
shape = ar.shape
if len(shape) > 2:
ar = ar.reshape(scipy.product(shape[:2]), shape[2])
codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
print "Before: %s" % codes
original_codes = codes
for low, hi in [(60, 200), (35, 230), (10, 250)]:
codes = scipy.array([code for code in codes
if not ((code[0] < low and code[1] < low and code[2] < low) or
(code[0] > hi and code[1] > hi and code[2] > hi))])
if not len(codes): codes = original_codes
else: break
print "After: %s" % codes
colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]
vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes
vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes
counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences
print counts
total = scipy.sum(counts)
print dict(zip(colors, [count/float(total) for count in counts]))
index_max = scipy.argmax(counts) # find most frequent
@ -138,6 +160,5 @@ class IconImporter(object):
image.save(output, 'png', quality=95)
contents = output.getvalue()
output.close()
print contents.encode('base64')
return contents.encode('base64')

View file

@ -726,7 +726,7 @@ class FeedData(models.Model):
class FeedIcon(models.Model):
feed = AutoOneToOneField(Feed, related_name='icon')
feed = AutoOneToOneField(Feed, primary_key=True, related_name='icon')
color = models.CharField(max_length=6, default="000000")
data = models.TextField()
icon_url = models.CharField(max_length=2000, blank=True, null=True)
@ -735,7 +735,8 @@ class FeedIcon(models.Model):
def save(self, *args, **kwargs):
try:
super(FeedIcon, self).save(*args, **kwargs)
except (IntegrityError, OperationError):
except (IntegrityError, OperationError), e:
print "Error on Icon: %s" % e
if self.id: self.delete()
@ -752,8 +753,25 @@ class MFeedPage(mongo.Document):
if self.page_data:
self.page_data = zlib.compress(self.page_data)
super(MFeedPage, self).save(*args, **kwargs)
@classmethod
def get_data(cls, feed_id):
data = None
feed_page = cls.objects(feed_id=feed_id)
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
if not data:
dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
if dupe_feed:
feed = dupe_feed[0].feed
feed_page = MFeedPage.objects.filter(feed_id=feed.pk)
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
return data
class MStory(mongo.Document):
'''A feed item'''
story_feed_id = mongo.IntField()

View file

@ -912,6 +912,8 @@ background: transparent;
margin: 2px 4px 0 2px;
vertical-align: top;
float: left;
width: 16px;
height: 16px;
}
#story_titles .NB-feedbar .feed .feed_title {
@ -1247,6 +1249,8 @@ background: transparent;
left: 0;
vertical-align: text-bottom;
opacity: .6;
height: 16px;
width: 16px;
}
#story_titles .story .NB-story-feed .feed_title {
display: block;
@ -1488,11 +1492,12 @@ background: transparent;
}
#story_pane .NB-feed-story-header-feed {
background: #404040 url('../img/reader/feed_view_feed_background.png') repeat-x 0 0;
background-image: -webkit-gradient(
linear,
left bottom,
left top,
color-stop(0.36, rgba(248,221,105, 250)),
color-stop(0.36, rgba(248, 221,105, 250)),
color-stop(0.84, rgba(268, 241, 125, 250))
);
background-image: -moz-linear-gradient(
@ -1500,10 +1505,10 @@ background: transparent;
rgb(76,76,76) 36%,
rgb(55,55,55) 84%
);
background: #404040 url('../img/reader/feed_view_feed_background.png') repeat-x 0 0;
padding: 2px 200px 2px 28px;
position: relative;
border-bottom: 1px solid #000;
border-top: 1px solid #707070;
z-index: 2;
}
#story_pane .NB-feed-story-header-feed.NB-feed-story-river-same-feed {

BIN
media/img/icons/mini/icon_world.gif Executable file → Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 B

After

Width:  |  Height:  |  Size: 172 B

View file

@ -821,8 +821,7 @@
$.make('div', { className: 'feed_counts' }, [
this.make_feed_counts_floater(feed.ps, feed.nt, feed.ng)
]),
// $.make('img', { className: 'feed_favicon', src: NEWSBLUR.Globals.google_favicon_url + feed.feed_link }),
$.make('img', { className: 'feed_favicon', src: 'data:image/png;base64,' + feed.favicon }),
$.make('img', { className: 'feed_favicon', src: this.get_favicon(feed.favicon) }),
$.make('span', { className: 'feed_title' }, [
feed.feed_title,
(type == 'story' && $.make('span', { className: 'NB-feedbar-train-feed', title: 'Train Intelligence' })),
@ -846,6 +845,11 @@
return $feed;
},
get_favicon: function(feed_favicon) {
if (feed_favicon) return 'data:image/png;base64,' + feed_favicon;
return NEWSBLUR.Globals.MEDIA_URL + '/img/icons/silk/world.png';
},
make_feed_counts_floater: function(positive_count, neutral_count, negative_count) {
var unread_class = "";
if (positive_count) {
@ -2135,7 +2139,7 @@
$.make('a', { href: story.story_permalink, className: 'story_title' }, [
(options['river_stories'] && feed &&
$.make('div', { className: 'NB-story-feed' }, [
$.make('img', { className: 'feed_favicon', src: NEWSBLUR.Globals.google_favicon_url + feed.feed_link }),
$.make('img', { className: 'feed_favicon', src: this.get_favicon(feed.favicon) }),
$.make('span', { className: 'feed_title' }, feed.feed_title)
])),
$.make('div', { className: 'NB-storytitles-star'}),
@ -2167,6 +2171,58 @@
return $story_title;
},
generate_gradient: function(feed, type) {
var color = feed.favicon_color;
var r = parseInt(color.substr(0, 2), 16);
var g = parseInt(color.substr(2, 2), 16);
var b = parseInt(color.substr(4, 2), 16);
NEWSBLUR.log(['generate', color, r, g, b]);
if (type == 'webkit') {
return [
'-webkit-gradient(',
'linear,',
'left bottom,',
'left top,',
'color-stop(0.36, rgba(',
[
r,
g,
b,
255
].join(','),
')),',
'color-stop(0.84, rgba(',
[
r+35,
g+35,
b+35,
255
].join(','),
')))'
].join('');
} else if (type == 'moz') {
return [
'-moz-linear-gradient(',
'center bottom,',
'rgb(',
[
r,
g,
b
].join(','),
') 36%,',
'rgb(',
[
r+35,
g+35,
b+35
].join(','),
') 84%)'
].join('');
}
},
story_titles_clear_loading_endbar: function() {
var $story_titles = this.$s.$story_titles;
@ -2601,6 +2657,7 @@
var self = this;
var unread_view = this.model.preference('unread_view');
var river_same_feed;
var feed = this.model.get_feed(this.active_feed);
options = options || {};
@ -2610,7 +2667,7 @@
for (var s in stories) {
var story = stories[s];
if (options.river_stories) var feed = this.model.get_feed(story.story_feed_id);
if (options.river_stories) feed = this.model.get_feed(story.story_feed_id);
var read = story.read_status
? ' read '
: '';
@ -2632,11 +2689,12 @@
$.make('div', { className: 'NB-feed-story-header-feed ' + river_same_feed }, [
(options.river_stories && feed && // !river_same_feed
$.make('div', { className: 'NB-feed-story-feed' }, [
$.make('img', { className: 'feed_favicon', src: NEWSBLUR.Globals.google_favicon_url + feed.feed_link }),
$.make('img', { className: 'feed_favicon', src: this.get_favicon(feed.favicon) }),
$.make('span', { className: 'feed_title' }, feed.feed_title)
])
)
]),
]).css('background-image', this.generate_gradient(feed, 'webkit'))
.css('background-image', this.generate_gradient(feed, 'moz')),
$.make('div', { className: 'NB-feed-story-header-info' }, [
(story.story_authors &&
$.make('div', { className: 'NB-feed-story-author' }, story.story_authors)),
@ -2708,7 +2766,7 @@
if (!$new_header.find('.NB-feed-story-feed').length) {
var feed = this.model.get_feed(story.story_feed_id);
feed && $new_header.append($.make('div', { className: 'NB-feed-story-feed' }, [
$.make('img', { className: 'feed_favicon', src: NEWSBLUR.Globals.google_favicon_url + feed.feed_link }),
$.make('img', { className: 'feed_favicon', src: this.get_favicon(feed.favicon) }),
$.make('span', { className: 'feed_title' }, feed.feed_title)
]));
}
@ -2782,14 +2840,23 @@
},
fetch_story_locations_in_feed_view: function() {
if (!this.model.stories || !this.model.stories.length) return;
this.flags['feed_view_positions_calculated'] = true;
NEWSBLUR.log(['Feed view entirely loaded', this.model.stories.length + " stories"]);
var stories = this.model.stories;
if (!stories || !stories.length) return;
var $feed_view = this.$s.$feed_view;
var $stories = this.$s.$feed_stories;
var $endbar = $.make('div', { className: 'NB-feed-story-endbar' });
$stories.find('.NB-feed-story-endbar').remove();
$stories.append($endbar);
for (var s in stories) {
var story = stories[s];
var $story = this.cache.feed_view_stories[story.id];
this.determine_feed_view_story_position($story, story);
}
this.flags['feed_view_positions_calculated'] = true;
NEWSBLUR.log(['Feed view entirely loaded', this.model.stories.length + " stories"]);
},
determine_feed_view_story_position: function($story, story) {

View file

@ -180,7 +180,7 @@ class ProcessFeed:
self.feed.feed_title = self.fpf.feed.get('title', self.feed.feed_title)
self.feed.data.feed_tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
self.feed.feed_link = self.fpf.feed.get('link', self.feed.feed_link)
self.feed.feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
self.feed.last_update = datetime.datetime.utcnow()
guids = []
@ -341,7 +341,7 @@ class Dispatcher:
feed.save_feed_history(550, "Page Error", tb)
fetched_feed = None
icon_importer = IconImporter(feed)
icon_importer = IconImporter(feed, force=self.options['force'])
try:
icon_importer.save()
except Exception, e: