Adding necessary exception handling to text view.

This commit is contained in:
Samuel Clay 2013-07-15 11:06:50 -07:00
parent fe1d7177ec
commit 679195aadd
4 changed files with 42 additions and 13 deletions

View file

@ -1848,9 +1848,10 @@ class MStory(mongo.Document):
def fetch_original_text(self, force=False, request=None):
original_text_z = self.original_text_z
feed = Feed.get_by_id(self.story_feed_id)
if not original_text_z or force:
ti = TextImporter(self, request=request)
ti = TextImporter(self, feed=feed, request=request)
original_text = ti.fetch()
else:
logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.")
@ -1958,9 +1959,10 @@ class MStarredStory(mongo.Document):
def fetch_original_text(self, force=False, request=None):
original_text_z = self.original_text_z
feed = Feed.get_by_id(self.story_feed_id)
if not original_text_z or force:
ti = TextImporter(self, request=request)
ti = TextImporter(self, feed, request=request)
original_text = ti.fetch()
else:
logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.")

View file

@ -1,33 +1,50 @@
import requests
import zlib
from django.conf import settings
from socket import error as SocketError
from vendor.readability import readability
from utils import log as logging
from utils.feed_functions import timelimit, TimeoutError
class TextImporter:
def __init__(self, story, request=None):
def __init__(self, story, feed, request=None):
self.story = story
self.feed = feed
self.request = request
@property
def headers(self):
return {
'User-Agent': 'NewsBlur Content Fetcher - %s '
'User-Agent': 'NewsBlur Content Fetcher - %s subscriber%s - %s '
'(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
'Safari/534.48.3)' % (
settings.NEWSBLUR_URL
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
self.feed.permalink,
),
'Connection': 'close',
}
def fetch(self, skip_save=False):
html = requests.get(self.story.story_permalink, headers=self.headers, verify=False)
text = html.text
if html.encoding and html.encoding != 'utf-8':
text = text.encode(html.encoding)
original_text_doc = readability.Document(text, url=html.url, debug=settings.DEBUG)
try:
resp = self.fetch_request()
except TimeoutError:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: timed out")
resp = None
if not resp:
return
try:
text = resp.text
except (LookupError, TypeError):
text = resp.content
if resp.encoding and resp.encoding != 'utf-8':
text = text.encode(resp.encoding)
original_text_doc = readability.Document(text, url=resp.url, debug=settings.DEBUG)
content = original_text_doc.summary(html_partial=True)
if content:
@ -43,4 +60,13 @@ class TextImporter:
self.story.story_content_z and len(zlib.decompress(self.story.story_content_z))
)), warn_color=False)
return content
return content
@timelimit(10)
def fetch_request(self):
try:
r = requests.get(self.story.story_permalink, headers=self.headers, verify=False)
except (AttributeError, SocketError, requests.ConnectionError), e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
return
return r

View file

@ -444,4 +444,4 @@ def original_text(request):
'story_id': story_id,
'original_text': original_text,
'failed': not original_text or len(original_text) < 100,
}
}

View file

@ -1984,9 +1984,10 @@ class MSharedStory(mongo.Document):
def fetch_original_text(self, force=False, request=None):
original_text_z = self.original_text_z
feed = Feed.get_by_id(self.story_feed_id)
if not original_text_z or force:
ti = TextImporter(self, request=request)
ti = TextImporter(self, feed, request=request)
original_text = ti.fetch()
else:
logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.")