mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
108 lines
3.6 KiB
Python
108 lines
3.6 KiB
Python
![]() |
import urllib2
|
||
|
import lxml.html
|
||
|
import scipy
|
||
|
import scipy.misc
|
||
|
import scipy.cluster
|
||
|
import StringIO
|
||
|
from PIL import ImageFile
|
||
|
|
||
|
class IconImporter(object):
|
||
|
|
||
|
def __init__(self, feed):
|
||
|
self.feed = feed
|
||
|
|
||
|
def save(self):
|
||
|
image, icon_url = self.fetch()
|
||
|
if not image: return
|
||
|
color = self.determine_dominant_color_in_image(image)
|
||
|
image_str = self.string_from_image(image)
|
||
|
self.feed.icon.data = image_str
|
||
|
self.feed.icon.icon_url = icon_url
|
||
|
self.feed.icon.color = color
|
||
|
self.feed.icon.save()
|
||
|
|
||
|
def fetch(self, path='favicon.ico'):
|
||
|
HEADERS = {
|
||
|
'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
|
||
|
'Connection': 'close',
|
||
|
}
|
||
|
image = None
|
||
|
url = self.feed.icon.icon_url
|
||
|
|
||
|
if not url:
|
||
|
url = self.feed.feed_link
|
||
|
|
||
|
if not url.endswith('/') and not url.endswith('favicon.ico'):
|
||
|
url += '/favicon.ico'
|
||
|
if url.endswith('/'):
|
||
|
url += 'favicon.ico'
|
||
|
|
||
|
def request_image(request):
|
||
|
icon = urllib2.urlopen(request)
|
||
|
parser = ImageFile.Parser()
|
||
|
while True:
|
||
|
s = icon.read(1024)
|
||
|
if not s:
|
||
|
break
|
||
|
parser.feed(s)
|
||
|
image = parser.close()
|
||
|
return image
|
||
|
|
||
|
request = urllib2.Request(url, headers=HEADERS)
|
||
|
try:
|
||
|
image = request_image(request)
|
||
|
except(urllib2.HTTPError, urllib2.URLError):
|
||
|
request = urllib2.Request(self.feed.feed_link, headers=HEADERS)
|
||
|
try:
|
||
|
# 2048 bytes should be enough for most of websites
|
||
|
content = urllib2.urlopen(request).read(2048)
|
||
|
except(urllib2.HTTPError, urllib2.URLError):
|
||
|
return
|
||
|
icon_path = lxml.html.fromstring(content).xpath(
|
||
|
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
|
||
|
)
|
||
|
if icon_path:
|
||
|
url = self.feed.feed_link + icon_path[0]
|
||
|
request = urllib2.Request(url, headers=HEADERS)
|
||
|
try:
|
||
|
image = request_image(request)
|
||
|
except(urllib2.HTTPError, urllib2.URLError):
|
||
|
return
|
||
|
|
||
|
image = image.resize((16, 16))
|
||
|
|
||
|
return image, url
|
||
|
|
||
|
def determine_dominant_color_in_image(self, image):
|
||
|
NUM_CLUSTERS = 5
|
||
|
|
||
|
if image.mode == 'P':
|
||
|
image.putalpha(0)
|
||
|
|
||
|
ar = scipy.misc.fromimage(image)
|
||
|
shape = ar.shape
|
||
|
if len(shape) > 2:
|
||
|
ar = ar.reshape(scipy.product(shape[:2]), shape[2])
|
||
|
|
||
|
codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
|
||
|
colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]
|
||
|
|
||
|
vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes
|
||
|
counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences
|
||
|
total = scipy.sum(counts)
|
||
|
print dict(zip(colors, [count/float(total) for count in counts]))
|
||
|
index_max = scipy.argmax(counts) # find most frequent
|
||
|
peak = codes[index_max]
|
||
|
color = ''.join(chr(c) for c in peak).encode('hex')
|
||
|
print 'most frequent is %s (#%s)' % (peak, color)
|
||
|
|
||
|
return color
|
||
|
|
||
|
def string_from_image(self, image):
|
||
|
output = StringIO.StringIO()
|
||
|
image.save(output, format="PNG")
|
||
|
contents = output.getvalue()
|
||
|
output.close()
|
||
|
print contents.encode('base64')
|
||
|
return contents.encode('base64')
|
||
|
|