Stripping comments from all stories. This fixes the weird bug where '>more>' crap shows up.

2025-09-18 21:50:56 +00:00 · 2012-10-19 12:49:39 -07:00 · 2012-10-19 12:49:39 -07:00 · 5ce475428f
commit 5ce475428f
parent c9ce818d96
3 changed files with 33 additions and 4 deletions
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@ -29,7 +29,7 @@ from utils.feed_functions import levenshtein_distance
 from utils.feed_functions import timelimit, TimeoutError
 from utils.feed_functions import relative_timesince
 from utils.feed_functions import seconds_timesince
-from utils.story_functions import strip_tags, htmldiff
+from utils.story_functions import strip_tags, htmldiff, strip_comments

 ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)

@ -747,6 +747,7 @@ class Feed(models.Model):
                continue
                
            story_content = story.get('story_content')
+            story_content = strip_comments(story_content)
            story_tags = self.get_tags(story)
            story_link = self.get_permalink(story)
                
--- a/templates/social/rss_story.xhtml
+++ b/templates/social/rss_story.xhtml
@ -1,11 +1,11 @@
 <table style="border: 1px solid #E0E0E0; margin: 0; padding: 0; background-color: #F0F0F0" valign="top" align="left" cellpadding="0" width="100%">
    <tr>
-        <td rowspan="2" style="padding: 0 6px;width: 36px;white-space:nowrap" width="36"><img src="{{ social_profile.photo_url }}" style="width: 36px; height: 36px; border-radius: 4px; vertical-align: middle;"></td>
-        <td width="100%">
+        <td rowspan="2" style="padding: 6px;width: 36px;white-space:nowrap" width="36"><img src="{{ social_profile.photo_url }}" style="width: 36px; height: 36px; border-radius: 4px;"></td>
+        <td width="100%" style="padding: 6px;">
            <b>
                {{ user.username }} 
                <a href="{{ shared_story.blurblog_permalink }}">shared this story</a>
-            {% if feed %}from <img src="{{ feed.favicon_url_fqdn }}"> {{ feed.feed_title }}{% endif %}{% if shared_story.comments %}:{% else %}.{% endif %}</b>
+            {% if feed %}from <img src="{{ feed.favicon_url_fqdn }}" style="vertical-align: middle;"> {{ feed.feed_title }}{% endif %}{% if shared_story.comments %}:{% else %}.{% endif %}</b>
        </td>
    </tr>
    {% if shared_story.comments %}
--- a/utils/story_functions.py
+++ b/utils/story_functions.py
@ -3,6 +3,8 @@ import struct
 from HTMLParser import HTMLParser
 from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
 from lxml.etree import ParserError
+import lxml.html, lxml.etree
+from lxml.html.clean import Cleaner
 from itertools import chain
 from django.utils.dateformat import DateFormat
 from django.utils.html import strip_tags as strip_tags_django
@ -175,6 +177,32 @@ def strip_tags(html):
    s.feed(html)
    return s.get_data()

+def strip_comments(html_string):
+    params = {
+        'comments': True,
+        'scripts': False,
+        'javascript': False,
+        'style': False,
+        'links': False,
+        'meta': False,
+        'page_structure': False,
+        'processing_instructions': False,
+        'embedded': False,
+        'frames': False,
+        'forms': False,
+        'annoying_tags': False,
+        'remove_tags': None,
+        'allow_tags': None,
+        'kill_tags': None,
+        'remove_unknown_tags': True,
+        'safe_attrs_only': False,
+    }
+    cleaner = Cleaner(**params)
+    html = lxml.html.fromstring(html_string)
+    clean_html = cleaner.clean_html(html)
+
+    return lxml.etree.tostring(clean_html)
+
 def linkify(*args, **kwargs):
    return xhtml_unescape_tornado(linkify_tornado(*args, **kwargs))