Stripping comments from all stories. This fixes the weird bug where '>more>' crap shows up.

This commit is contained in:
Samuel Clay 2012-10-19 12:49:39 -07:00
parent c9ce818d96
commit 5ce475428f
3 changed files with 33 additions and 4 deletions

View file

@ -29,7 +29,7 @@ from utils.feed_functions import levenshtein_distance
from utils.feed_functions import timelimit, TimeoutError
from utils.feed_functions import relative_timesince
from utils.feed_functions import seconds_timesince
from utils.story_functions import strip_tags, htmldiff
from utils.story_functions import strip_tags, htmldiff, strip_comments
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
@ -747,6 +747,7 @@ class Feed(models.Model):
continue
story_content = story.get('story_content')
story_content = strip_comments(story_content)
story_tags = self.get_tags(story)
story_link = self.get_permalink(story)

View file

@ -1,11 +1,11 @@
<table style="border: 1px solid #E0E0E0; margin: 0; padding: 0; background-color: #F0F0F0" valign="top" align="left" cellpadding="0" width="100%">
<tr>
<td rowspan="2" style="padding: 0 6px;width: 36px;white-space:nowrap" width="36"><img src="{{ social_profile.photo_url }}" style="width: 36px; height: 36px; border-radius: 4px; vertical-align: middle;"></td>
<td width="100%">
<td rowspan="2" style="padding: 6px;width: 36px;white-space:nowrap" width="36"><img src="{{ social_profile.photo_url }}" style="width: 36px; height: 36px; border-radius: 4px;"></td>
<td width="100%" style="padding: 6px;">
<b>
{{ user.username }}
<a href="{{ shared_story.blurblog_permalink }}">shared this story</a>
{% if feed %}from <img src="{{ feed.favicon_url_fqdn }}"> {{ feed.feed_title }}{% endif %}{% if shared_story.comments %}:{% else %}.{% endif %}</b>
{% if feed %}from <img src="{{ feed.favicon_url_fqdn }}" style="vertical-align: middle;"> {{ feed.feed_title }}{% endif %}{% if shared_story.comments %}:{% else %}.{% endif %}</b>
</td>
</tr>
{% if shared_story.comments %}

View file

@ -3,6 +3,8 @@ import struct
from HTMLParser import HTMLParser
from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
from lxml.etree import ParserError
import lxml.html, lxml.etree
from lxml.html.clean import Cleaner
from itertools import chain
from django.utils.dateformat import DateFormat
from django.utils.html import strip_tags as strip_tags_django
@ -175,6 +177,32 @@ def strip_tags(html):
s.feed(html)
return s.get_data()
def strip_comments(html_string):
params = {
'comments': True,
'scripts': False,
'javascript': False,
'style': False,
'links': False,
'meta': False,
'page_structure': False,
'processing_instructions': False,
'embedded': False,
'frames': False,
'forms': False,
'annoying_tags': False,
'remove_tags': None,
'allow_tags': None,
'kill_tags': None,
'remove_unknown_tags': True,
'safe_attrs_only': False,
}
cleaner = Cleaner(**params)
html = lxml.html.fromstring(html_string)
clean_html = cleaner.clean_html(html)
return lxml.etree.tostring(clean_html)
def linkify(*args, **kwargs):
return xhtml_unescape_tornado(linkify_tornado(*args, **kwargs))