Handling lxml parser errors for original text.

This commit is contained in:
Samuel Clay 2016-06-28 16:11:46 -07:00
parent a7ceebcb0f
commit e43733ce30

View file

@ -4,6 +4,7 @@ from requests.packages.urllib3.exceptions import LocationParseError
from socket import error as SocketError from socket import error as SocketError
from mongoengine.queryset import NotUniqueError from mongoengine.queryset import NotUniqueError
from vendor.readability import readability from vendor.readability import readability
from lxml.etree import ParserError
from utils import log as logging from utils import log as logging
from utils.feed_functions import timelimit, TimeoutError from utils.feed_functions import timelimit, TimeoutError
from OpenSSL.SSL import Error as OpenSSLError from OpenSSL.SSL import Error as OpenSSLError
@ -61,7 +62,8 @@ class TextImporter:
positive_keywords=["postContent", "postField"]) positive_keywords=["postContent", "postField"])
try: try:
content = original_text_doc.summary(html_partial=True) content = original_text_doc.summary(html_partial=True)
except readability.Unparseable: except (readability.Unparseable, ParserError), e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
return return
try: try: