NewsBlur/apps/rss_feeds/icon_importer.py

import urllib2
import lxml.html
import scipy
import scipy.misc
import scipy.cluster
import urlparse
import struct
import operator
import BmpImagePlugin, PngImagePlugin, Image
from StringIO import StringIO
from apps.rss_feeds.models import MFeedPage
from utils.feed_functions import timelimit

HEADERS = {
    'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
    'Connection': 'close',
}

class IconImporter(object):
    
    def __init__(self, feed, force=False):
        self.feed = feed
        self.force = force
    
    def save(self):
        if not self.force and self.feed.icon.not_found:
            # print 'Not found, skipping...'
            return
        if not self.force and not self.feed.icon.not_found and self.feed.icon.icon_url:
            # print 'Found, but skipping...'
            return
        image, image_file, icon_url = self.fetch_image_from_page_data()
        if not image:
            image, image_file, icon_url = self.fetch_image_from_path(force=self.force)

        if image:
            try:
                ico_image = self.load_icon(image_file)
                if ico_image: image = ico_image
            except ValueError:
                # print "Bad .ICO"
                pass
            image     = self.normalize_image(image)
            color     = self.determine_dominant_color_in_image(image)
            image_str = self.string_from_image(image)

            self.feed.icon.save()
            self.feed.icon.data      = image_str
            self.feed.icon.icon_url  = icon_url
            self.feed.icon.color     = color
            self.feed.icon.not_found = False
        else:
            self.feed.icon.save()
            self.feed.icon.not_found = True
            
        self.feed.icon.save()
        return not self.feed.icon.not_found
     
    def load_icon(self, image_file, index=None):
        '''
        Load Windows ICO image.

        See http://en.wikipedia.org/w/index.php?oldid=264332061 for file format
        description.
        '''
        try:
            image_file.seek(0)
            header = struct.unpack('<3H', image_file.read(6))
        except Exception, e:
            return

        # Check magic
        if header[:2] != (0, 1):
            return

        # Collect icon directories
        directories = []
        for i in xrange(header[2]):
            directory = list(struct.unpack('<4B2H2I', image_file.read(16)))
            for j in xrange(3):
                if not directory[j]:
                    directory[j] = 256

            directories.append(directory)

        if index is None:
            # Select best icon
            directory = max(directories, key=operator.itemgetter(slice(0, 3)))
        else:
            directory = directories[index]

        # Seek to the bitmap data
        image_file.seek(directory[7])

        prefix = image_file.read(16)
        image_file.seek(-16, 1)

        if PngImagePlugin._accept(prefix):
            # Windows Vista icon with PNG inside
            image = PngImagePlugin.PngImageFile(image_file)
        else:
            # Load XOR bitmap
            image = BmpImagePlugin.DibImageFile(image_file)
            if image.mode == 'RGBA':
                # Windows XP 32-bit color depth icon without AND bitmap
                pass
            else:
                # Patch up the bitmap height
                image.size = image.size[0], image.size[1] >> 1
                d, e, o, a = image.tile[0]
                image.tile[0] = d, (0, 0) + image.size, o, a

                # Calculate AND bitmap dimensions. See
                # http://en.wikipedia.org/w/index.php?oldid=264236948#Pixel_storage
                # for description
                offset = o + a[1] * image.size[1]
                stride = ((image.size[0] + 31) >> 5) << 2
                size = stride * image.size[1]

                # Load AND bitmap
                image_file.seek(offset)
                string = image_file.read(size)
                mask = Image.fromstring('1', image.size, string, 'raw',
                                        ('1;I', stride, -1))

                image = image.convert('RGBA')
                image.putalpha(mask)

        return image
        
    def fetch_image_from_page_data(self):
        image = None
        image_file = None
        content = MFeedPage.get_data(feed_id=self.feed.pk)
        url = self._url_from_html(content)
        if url:
            image, image_file = self.get_image_from_url(url)
        return image, image_file, url

    def fetch_image_from_path(self, path='favicon.ico', force=False):
        image = None
        url = None

        if not force:
            url = self.feed.icon.icon_url
        if not url and self.feed.feed_link and len(self.feed.feed_link) > 6:
            url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')
        if not url: return None, None, None

        image, image_file = self.get_image_from_url(url)
        if not image:
            url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
            image, image_file = self.get_image_from_url(url)
        # print 'Found: %s - %s' % (url, image)
        return image, image_file, url
    
    def get_image_from_url(self, url):
        # print 'Requesting: %s' % url
        @timelimit(30)
        def _1(url):
            request = urllib2.Request(url, headers=HEADERS)
            icon = urllib2.urlopen(request).read()
            icon_file = StringIO(icon)
            image = Image.open(icon_file)
            return image, icon_file
        try:
            image, icon_file = _1(url)
        # except (urllib2.HTTPError, urllib2.URLError, IOError, TimeoutError, ValueError):
        except (Exception):
            return None, None
        return image, icon_file
    
    def _url_from_html(self, content):
        url = None
        if not content: return url
        try:
            icon_path = lxml.html.fromstring(content).xpath(
                '//link[@rel="icon" or @rel="shortcut icon"]/@href'
            )
        except (lxml.etree.ParserError, TypeError):
            return url
            
        if icon_path:
            if str(icon_path[0]).startswith('http'):
                url = icon_path[0]
            else:
                url = urlparse.urljoin(self.feed.feed_link, icon_path[0])
        return url
        
    def normalize_image(self, image):
        # if image.size != (16, 16):
        #     image = image.resize((16, 16), Image.BICUBIC)
        if image.mode != 'RGBA':
            image = image.convert('RGBA')
        
        return image

    def determine_dominant_color_in_image(self, image):
        NUM_CLUSTERS = 5
            
        ar = scipy.misc.fromimage(image)
        shape = ar.shape
        if len(shape) > 2:
            ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
        # print "Before: %s" % codes
        original_codes = codes
        for low, hi in [(60, 200), (35, 230), (10, 250)]:
            codes = scipy.array([code for code in codes 
                                 if not ((code[0] < low and code[1] < low and code[2] < low) or
                                         (code[0] > hi and code[1] > hi and code[2] > hi))])
            if not len(codes): codes = original_codes
            else: break
        # print "After: %s" % codes
    
        vecs, _ = scipy.cluster.vq.vq(ar, codes)         # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences
        # colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]
        # total = scipy.sum(counts)
        # print dict(zip(colors, [count/float(total) for count in counts]))
        index_max = scipy.argmax(counts)                    # find most frequent
        peak = codes[index_max]
        color = ''.join(chr(c) for c in peak).encode('hex')
        # print 'most frequent is %s (#%s)' % (peak, color)
        
        return color[:6]

    def string_from_image(self, image):
        output = StringIO()
        image.save(output, 'png', quality=95)
        contents = output.getvalue()
        output.close()
        return contents.encode('base64')
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`import urllib2`
			`import lxml.html`
			`import scipy`
			`import scipy.misc`
			`import scipy.cluster`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`import urlparse`
			`import struct`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`import operator`
Using global PIL imports rather than namespaced PIL imports. (import Image vs. from PIL import Image) 2011-01-30 11:07:15 -05:00			`import BmpImagePlugin, PngImagePlugin, Image`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`from StringIO import StringIO`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`from apps.rss_feeds.models import MFeedPage`
Screw it. Too many different exceptions come from fetching URLs. If anything goes wrong, not much we can do. 2011-01-30 14:58:14 -05:00			`from utils.feed_functions import timelimit`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`HEADERS = {`
			`'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',`
			`'Connection': 'close',`
			`}`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
			`class IconImporter(object):`

Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`def __init__(self, feed, force=False):`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`self.feed = feed`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`self.force = force`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
			`def save(self):`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`if not self.force and self.feed.icon.not_found:`
Adding timeout to icon fetching, and handling bad .ICOs 2011-01-30 12:53:05 -05:00			`# print 'Not found, skipping...'`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`return`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`if not self.force and not self.feed.icon.not_found and self.feed.icon.icon_url:`
Adding timeout to icon fetching, and handling bad .ICOs 2011-01-30 12:53:05 -05:00			`# print 'Found, but skipping...'`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`return`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`image, image_file, icon_url = self.fetch_image_from_page_data()`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`if not image:`
Adding a bit for finding the favicon. Also handling all errors on fetching the HTML of the site for finding the icon. 2011-01-30 20:39:37 -05:00			`image, image_file, icon_url = self.fetch_image_from_path(force=self.force)`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00
			`if image:`
Adding timeout to icon fetching, and handling bad .ICOs 2011-01-30 12:53:05 -05:00			`try:`
			`ico_image = self.load_icon(image_file)`
			`if ico_image: image = ico_image`
			`except ValueError:`
			`# print "Bad .ICO"`
			`pass`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`image = self.normalize_image(image)`
			`color = self.determine_dominant_color_in_image(image)`
			`image_str = self.string_from_image(image)`

Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`self.feed.icon.save()`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`self.feed.icon.data = image_str`
			`self.feed.icon.icon_url = icon_url`
			`self.feed.icon.color = color`
			`self.feed.icon.not_found = False`
			`else:`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`self.feed.icon.save()`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`self.feed.icon.not_found = True`

Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`self.feed.icon.save()`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`return not self.feed.icon.not_found`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`def load_icon(self, image_file, index=None):`
			`'''`
			`Load Windows ICO image.`

			`See http://en.wikipedia.org/w/index.php?oldid=264332061 for file format`
			`description.`
			`'''`
			`try:`
			`image_file.seek(0)`
			`header = struct.unpack('<3H', image_file.read(6))`
			`except Exception, e:`
			`return`

			`# Check magic`
			`if header[:2] != (0, 1):`
			`return`

			`# Collect icon directories`
			`directories = []`
			`for i in xrange(header[2]):`
			`directory = list(struct.unpack('<4B2H2I', image_file.read(16)))`
			`for j in xrange(3):`
			`if not directory[j]:`
			`directory[j] = 256`

			`directories.append(directory)`

			`if index is None:`
			`# Select best icon`
			`directory = max(directories, key=operator.itemgetter(slice(0, 3)))`
			`else:`
			`directory = directories[index]`

			`# Seek to the bitmap data`
			`image_file.seek(directory[7])`

			`prefix = image_file.read(16)`
			`image_file.seek(-16, 1)`

			`if PngImagePlugin._accept(prefix):`
			`# Windows Vista icon with PNG inside`
			`image = PngImagePlugin.PngImageFile(image_file)`
			`else:`
			`# Load XOR bitmap`
			`image = BmpImagePlugin.DibImageFile(image_file)`
			`if image.mode == 'RGBA':`
			`# Windows XP 32-bit color depth icon without AND bitmap`
			`pass`
			`else:`
			`# Patch up the bitmap height`
			`image.size = image.size[0], image.size[1] >> 1`
			`d, e, o, a = image.tile[0]`
			`image.tile[0] = d, (0, 0) + image.size, o, a`

			`# Calculate AND bitmap dimensions. See`
			`# http://en.wikipedia.org/w/index.php?oldid=264236948#Pixel_storage`
			`# for description`
			`offset = o + a[1] * image.size[1]`
			`stride = ((image.size[0] + 31) >> 5) << 2`
			`size = stride * image.size[1]`

			`# Load AND bitmap`
			`image_file.seek(offset)`
			`string = image_file.read(size)`
			`mask = Image.fromstring('1', image.size, string, 'raw',`
			`('1;I', stride, -1))`

			`image = image.convert('RGBA')`
			`image.putalpha(mask)`

			`return image`

Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`def fetch_image_from_page_data(self):`
			`image = None`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`image_file = None`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`content = MFeedPage.get_data(feed_id=self.feed.pk)`
			`url = self._url_from_html(content)`
			`if url:`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`image, image_file = self.get_image_from_url(url)`
			`return image, image_file, url`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00
Adding a bit for finding the favicon. Also handling all errors on fetching the HTML of the site for finding the icon. 2011-01-30 20:39:37 -05:00			`def fetch_image_from_path(self, path='favicon.ico', force=False):`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`image = None`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`url = None`

			`if not force:`
			`url = self.feed.icon.icon_url`
Forcing useful feed_links. 2011-01-30 13:07:55 -05:00			`if not url and self.feed.feed_link and len(self.feed.feed_link) > 6:`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')`
Don't bother fetching an icon if we don't even have the feed_link. 2011-01-30 12:44:34 -05:00			`if not url: return None, None, None`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`image, image_file = self.get_image_from_url(url)`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`if not image:`
			`url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`image, image_file = self.get_image_from_url(url)`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# print 'Found: %s - %s' % (url, image)`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`return image, image_file, url`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`def get_image_from_url(self, url):`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# print 'Requesting: %s' % url`
Handling timeouts from inside the icon_importer. 2011-01-30 13:06:31 -05:00			`@timelimit(30)`
			`def _1(url):`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`request = urllib2.Request(url, headers=HEADERS)`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`icon = urllib2.urlopen(request).read()`
			`icon_file = StringIO(icon)`
			`image = Image.open(icon_file)`
Handling timeouts from inside the icon_importer. 2011-01-30 13:06:31 -05:00			`return image, icon_file`
			`try:`
			`image, icon_file = _1(url)`
Screw it. Too many different exceptions come from fetching URLs. If anything goes wrong, not much we can do. 2011-01-30 14:58:14 -05:00			`# except (urllib2.HTTPError, urllib2.URLError, IOError, TimeoutError, ValueError):`
			`except (Exception):`
Handling Win32 ICOs with alpha. Holy god, this was fucking insane to handle. My saturday evenings, people. 2011-01-29 20:20:35 -05:00			`return None, None`
			`return image, icon_file`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00
			`def _url_from_html(self, content):`
			`url = None`
Short circuiting feeds with no pages during icon importing. 2011-01-30 12:40:29 -05:00			`if not content: return url`
Handling parser errors from lxml in icon fetching. 2011-01-30 13:23:20 -05:00			`try:`
			`icon_path = lxml.html.fromstring(content).xpath(`
			`'//link[@rel="icon" or @rel="shortcut icon"]/@href'`
			`)`
Handling a weird typeerror on feed parsaing. 2011-01-30 20:16:34 -05:00			`except (lxml.etree.ParserError, TypeError):`
Handling parser errors from lxml in icon fetching. 2011-01-30 13:23:20 -05:00			`return url`

Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`if icon_path:`
			`if str(icon_path[0]).startswith('http'):`
			`url = icon_path[0]`
			`else:`
			`url = urlparse.urljoin(self.feed.feed_link, icon_path[0])`
			`return url`

Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`def normalize_image(self, image):`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`# if image.size != (16, 16):`
			`# image = image.resize((16, 16), Image.BICUBIC)`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`if image.mode != 'RGBA':`
			`image = image.convert('RGBA')`

			`return image`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
			`def determine_dominant_color_in_image(self, image):`
			`NUM_CLUSTERS = 5`

			`ar = scipy.misc.fromimage(image)`
			`shape = ar.shape`
			`if len(shape) > 2:`
			`ar = ar.reshape(scipy.product(shape[:2]), shape[2])`

Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# print "Before: %s" % codes`
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`original_codes = codes`
			`for low, hi in [(60, 200), (35, 230), (10, 250)]:`
			`codes = scipy.array([code for code in codes`
			`if not ((code[0] < low and code[1] < low and code[2] < low) or`
			`(code[0] > hi and code[1] > hi and code[2] > hi))])`
			`if not len(codes): codes = original_codes`
			`else: break`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# print "After: %s" % codes`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
Correctly downloading, storing, retrieving, and showing favicons and gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded. 2011-01-29 19:16:40 -05:00			`vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]`
			`# total = scipy.sum(counts)`
			`# print dict(zip(colors, [count/float(total) for count in counts]))`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`index_max = scipy.argmax(counts) # find most frequent`
			`peak = codes[index_max]`
			`color = ''.join(chr(c) for c in peak).encode('hex')`
Cleaning up logging, adding several errors and timeout checks on page and icon fetching. Standardizing fetcher headers. 2011-01-29 22:01:09 -05:00			`# print 'most frequent is %s (#%s)' % (peak, color)`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`return color[:6]`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00
			`def string_from_image(self, image):`
Success on saving all images. Still some issues with transparency and resizing large images and having weird cutoffs. 2011-01-29 11:24:27 -05:00			`output = StringIO()`
			`image.save(output, 'png', quality=95)`
Finally saving proper base64 encoded favicons, along with their URLs, autodetecting when not found at /favicon.ico, and now all thats left is serving icons and their colors. Woohoo! 2011-01-27 19:05:50 -05:00			`contents = output.getvalue()`
			`output.close()`
			`return contents.encode('base64')`