mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Sanitizing text/plain stories.
This commit is contained in:
parent
fb03d62610
commit
c1d1534d67
2 changed files with 6 additions and 2 deletions
|
@ -462,7 +462,7 @@ class ProcessFeed:
|
|||
story_hashes = []
|
||||
stories = []
|
||||
for entry in self.fpf.entries:
|
||||
story = pre_process_story(entry)
|
||||
story = pre_process_story(entry, self.fpf.encoding)
|
||||
if story.get('published') < start_date:
|
||||
start_date = story.get('published')
|
||||
if replace_guids:
|
||||
|
|
|
@ -13,6 +13,7 @@ from django.utils.html import strip_tags as strip_tags_django
|
|||
from utils.tornado_escape import linkify as linkify_tornado
|
||||
from utils.tornado_escape import xhtml_unescape as xhtml_unescape_tornado
|
||||
from vendor import reseekfile
|
||||
from utils import feedparser
|
||||
|
||||
# COMMENTS_RE = re.compile('\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>')
|
||||
COMMENTS_RE = re.compile('\<!--.*?--\>')
|
||||
|
@ -67,7 +68,7 @@ def _extract_date_tuples(date):
|
|||
|
||||
return parsed_date, date_tuple, today_tuple, yesterday_tuple
|
||||
|
||||
def pre_process_story(entry):
|
||||
def pre_process_story(entry, encoding):
|
||||
publish_date = entry.get('published_parsed') or entry.get('updated_parsed')
|
||||
if publish_date:
|
||||
publish_date = datetime.datetime(*publish_date[:6])
|
||||
|
@ -110,6 +111,9 @@ def pre_process_story(entry):
|
|||
else:
|
||||
entry['story_content'] = summary.strip()
|
||||
|
||||
if 'summary_detail' in entry and entry['summary_detail'].get('type', None) == 'text/plain':
|
||||
entry['story_content'] = feedparser._sanitizeHTML(entry['story_content'], encoding, 'text/plain')
|
||||
|
||||
# Add each media enclosure as a Download link
|
||||
for media_content in chain(entry.get('media_content', [])[:5], entry.get('links', [])[:5]):
|
||||
media_url = media_content.get('url', '')
|
||||
|
|
Loading…
Add table
Reference in a new issue