mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Fixing a bunch of feed fetch errors.
This commit is contained in:
parent
ee72253695
commit
4a7516cb83
3 changed files with 17 additions and 3 deletions
|
@ -170,7 +170,10 @@ class IconImporter(object):
|
|||
compressed_content = key.get_contents_as_string()
|
||||
stream = StringIO(compressed_content)
|
||||
gz = gzip.GzipFile(fileobj=stream)
|
||||
content = gz.read()
|
||||
try:
|
||||
content = gz.read()
|
||||
except IOError:
|
||||
content = None
|
||||
else:
|
||||
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
||||
url = self._url_from_html(content)
|
||||
|
@ -197,6 +200,9 @@ class IconImporter(object):
|
|||
|
||||
def get_image_from_url(self, url):
|
||||
# print 'Requesting: %s' % url
|
||||
if not url:
|
||||
return None, None
|
||||
|
||||
@timelimit(30)
|
||||
def _1(url):
|
||||
try:
|
||||
|
|
|
@ -80,6 +80,9 @@ class PageImporter(object):
|
|||
response = requests.get(feed_link, headers=self.headers)
|
||||
except requests.exceptions.TooManyRedirects:
|
||||
response = requests.get(feed_link)
|
||||
except AttributeError:
|
||||
self.save_no_page()
|
||||
return
|
||||
try:
|
||||
data = response.text
|
||||
except (LookupError, TypeError):
|
||||
|
|
|
@ -2,6 +2,7 @@ import datetime
|
|||
import struct
|
||||
from HTMLParser import HTMLParser
|
||||
from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
|
||||
from lxml.etree import ParserError
|
||||
from itertools import chain
|
||||
from django.utils.dateformat import DateFormat
|
||||
from django.utils.html import strip_tags as strip_tags_django
|
||||
|
@ -250,8 +251,12 @@ def image_size(datastream):
|
|||
return content_type, width, height
|
||||
|
||||
def htmldiff(old_html, new_html):
|
||||
old_html_tokens = tokenize(old_html, include_hrefs=False)
|
||||
new_html_tokens = tokenize(new_html, include_hrefs=False)
|
||||
try:
|
||||
old_html_tokens = tokenize(old_html, include_hrefs=False)
|
||||
new_html_tokens = tokenize(new_html, include_hrefs=False)
|
||||
except (KeyError, ParserError):
|
||||
return new_html
|
||||
|
||||
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
||||
result = ''.join(result).strip()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue