Updating readability class names to look for.

This commit is contained in:
Samuel Clay 2017-09-29 10:50:08 -07:00
parent ae6bf532f2
commit ef51152bcd

View file

@ -76,7 +76,7 @@ class TextImporter:
text = text.replace("\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
original_text_doc = readability.Document(text, url=resp.url,
positive_keywords="postContent, postField")
positive_keywords="post, entry, postProp, article, postContent, postField")
try:
content = original_text_doc.summary(html_partial=True)
except (readability.Unparseable, ParserError), e: