mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Fixing a bunch of feed fetch errors.
This commit is contained in:
parent
ee72253695
commit
4a7516cb83
3 changed files with 17 additions and 3 deletions
|
@ -170,7 +170,10 @@ class IconImporter(object):
|
||||||
compressed_content = key.get_contents_as_string()
|
compressed_content = key.get_contents_as_string()
|
||||||
stream = StringIO(compressed_content)
|
stream = StringIO(compressed_content)
|
||||||
gz = gzip.GzipFile(fileobj=stream)
|
gz = gzip.GzipFile(fileobj=stream)
|
||||||
content = gz.read()
|
try:
|
||||||
|
content = gz.read()
|
||||||
|
except IOError:
|
||||||
|
content = None
|
||||||
else:
|
else:
|
||||||
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
content = MFeedPage.get_data(feed_id=self.feed.pk)
|
||||||
url = self._url_from_html(content)
|
url = self._url_from_html(content)
|
||||||
|
@ -197,6 +200,9 @@ class IconImporter(object):
|
||||||
|
|
||||||
def get_image_from_url(self, url):
|
def get_image_from_url(self, url):
|
||||||
# print 'Requesting: %s' % url
|
# print 'Requesting: %s' % url
|
||||||
|
if not url:
|
||||||
|
return None, None
|
||||||
|
|
||||||
@timelimit(30)
|
@timelimit(30)
|
||||||
def _1(url):
|
def _1(url):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -80,6 +80,9 @@ class PageImporter(object):
|
||||||
response = requests.get(feed_link, headers=self.headers)
|
response = requests.get(feed_link, headers=self.headers)
|
||||||
except requests.exceptions.TooManyRedirects:
|
except requests.exceptions.TooManyRedirects:
|
||||||
response = requests.get(feed_link)
|
response = requests.get(feed_link)
|
||||||
|
except AttributeError:
|
||||||
|
self.save_no_page()
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
data = response.text
|
data = response.text
|
||||||
except (LookupError, TypeError):
|
except (LookupError, TypeError):
|
||||||
|
|
|
@ -2,6 +2,7 @@ import datetime
|
||||||
import struct
|
import struct
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
|
from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
|
||||||
|
from lxml.etree import ParserError
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from django.utils.dateformat import DateFormat
|
from django.utils.dateformat import DateFormat
|
||||||
from django.utils.html import strip_tags as strip_tags_django
|
from django.utils.html import strip_tags as strip_tags_django
|
||||||
|
@ -250,8 +251,12 @@ def image_size(datastream):
|
||||||
return content_type, width, height
|
return content_type, width, height
|
||||||
|
|
||||||
def htmldiff(old_html, new_html):
|
def htmldiff(old_html, new_html):
|
||||||
old_html_tokens = tokenize(old_html, include_hrefs=False)
|
try:
|
||||||
new_html_tokens = tokenize(new_html, include_hrefs=False)
|
old_html_tokens = tokenize(old_html, include_hrefs=False)
|
||||||
|
new_html_tokens = tokenize(new_html, include_hrefs=False)
|
||||||
|
except (KeyError, ParserError):
|
||||||
|
return new_html
|
||||||
|
|
||||||
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
||||||
result = ''.join(result).strip()
|
result = ''.join(result).strip()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue