From e43733ce30d9c672a7cdeb57ba8e3da277b843a2 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 28 Jun 2016 16:11:46 -0700 Subject: [PATCH] Handling lxml parser errors for original text. --- apps/rss_feeds/text_importer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/rss_feeds/text_importer.py b/apps/rss_feeds/text_importer.py index 6548f0a7f..941181070 100644 --- a/apps/rss_feeds/text_importer.py +++ b/apps/rss_feeds/text_importer.py @@ -4,6 +4,7 @@ from requests.packages.urllib3.exceptions import LocationParseError from socket import error as SocketError from mongoengine.queryset import NotUniqueError from vendor.readability import readability +from lxml.etree import ParserError from utils import log as logging from utils.feed_functions import timelimit, TimeoutError from OpenSSL.SSL import Error as OpenSSLError @@ -61,7 +62,8 @@ class TextImporter: positive_keywords=["postContent", "postField"]) try: content = original_text_doc.summary(html_partial=True) - except readability.Unparseable: + except (readability.Unparseable, ParserError), e: + logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e) return try: