mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Auto-linkifying comments and replies, and stripping html from comments, replies, and story titles, tags, and authors.
This commit is contained in:
parent
1872b0a091
commit
18428a32a1
9 changed files with 396 additions and 12 deletions
|
@ -44,6 +44,7 @@ from utils.story_functions import format_story_link_date__short
|
|||
from utils.story_functions import format_story_link_date__long
|
||||
from utils.story_functions import bunch
|
||||
from utils.story_functions import story_score
|
||||
from utils.story_functions import strip_tags
|
||||
from utils import log as logging
|
||||
from utils.view_functions import get_argument_or_404, render_to, is_true
|
||||
from utils.ratelimit import ratelimit
|
||||
|
@ -478,7 +479,7 @@ def load_single_feed(request, feed_id):
|
|||
story['shared'] = True
|
||||
shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'], user.profile.timezone)
|
||||
story['shared_date'] = format_story_link_date__long(shared_date, now)
|
||||
story['shared_comments'] = shared_stories[story['id']]['comments']
|
||||
story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])
|
||||
else:
|
||||
story['read_status'] = 1
|
||||
story['intelligence'] = {
|
||||
|
|
|
@ -27,7 +27,7 @@ from utils.feed_functions import levenshtein_distance
|
|||
from utils.feed_functions import timelimit, TimeoutError
|
||||
from utils.feed_functions import relative_timesince
|
||||
from utils.feed_functions import seconds_timesince
|
||||
from utils.story_functions import pre_process_story
|
||||
from utils.story_functions import strip_tags
|
||||
from utils.diff import HTMLDiff
|
||||
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||
|
@ -724,8 +724,6 @@ class Feed(models.Model):
|
|||
}
|
||||
|
||||
for story in stories:
|
||||
story = pre_process_story(story)
|
||||
|
||||
if not story.get('title'):
|
||||
continue
|
||||
|
||||
|
@ -979,8 +977,8 @@ class Feed(models.Model):
|
|||
if not tagname or tagname == ' ':
|
||||
continue
|
||||
fcat.append(tagname)
|
||||
fcat = [t[:250] for t in fcat]
|
||||
return fcat[:12]
|
||||
fcat = [strip_tags(t)[:250] for t in fcat[:12]]
|
||||
return fcat
|
||||
|
||||
def get_permalink(self, entry):
|
||||
link = entry.get('link')
|
||||
|
|
|
@ -24,7 +24,7 @@ from vendor import facebook
|
|||
from vendor import tweepy
|
||||
from utils import log as logging
|
||||
from utils.feed_functions import relative_timesince
|
||||
from utils.story_functions import truncate_chars
|
||||
from utils.story_functions import truncate_chars, strip_tags, linkify
|
||||
from utils import json_functions as json
|
||||
|
||||
RECOMMENDATIONS_LIMIT = 5
|
||||
|
@ -475,6 +475,10 @@ class MSocialProfile(mongo.Document):
|
|||
def send_email_for_new_follower(self, follower_user_id):
|
||||
user = User.objects.get(pk=self.user_id)
|
||||
if not user.email or not user.profile.send_emails or self.user_id == follower_user_id:
|
||||
if not user.email:
|
||||
logging.user(user, "~BB~FMNo email to send to, skipping.")
|
||||
elif not user.profile.send_emails:
|
||||
logging.user(user, "~BB~FMDisabled emails, skipping.")
|
||||
return
|
||||
|
||||
emails_sent = MSentEmail.objects.filter(receiver_user_id=user.pk,
|
||||
|
@ -988,6 +992,10 @@ class MSharedStory(mongo.Document):
|
|||
self.story_original_content_z = zlib.compress(self.story_original_content)
|
||||
self.story_original_content = None
|
||||
|
||||
self.comments = linkify(strip_tags(self.comments))
|
||||
for reply in self.replies:
|
||||
reply.comments = linkify(strip_tags(reply.comments))
|
||||
|
||||
r = redis.Redis(connection_pool=settings.REDIS_POOL)
|
||||
share_key = "S:%s:%s" % (self.story_feed_id, self.guid_hash)
|
||||
r.sadd(share_key, self.user_id)
|
||||
|
@ -1363,6 +1371,10 @@ class MSharedStory(mongo.Document):
|
|||
user = User.objects.get(pk=user_id)
|
||||
|
||||
if not user.email or not user.profile.send_emails:
|
||||
if not user.email:
|
||||
logging.user(user, "~BB~FMNo email to send to, skipping.")
|
||||
elif not user.profile.send_emails:
|
||||
logging.user(user, "~BB~FMDisabled emails, skipping.")
|
||||
continue
|
||||
|
||||
mute_url = "http://%s%s" % (
|
||||
|
@ -1403,6 +1415,10 @@ class MSharedStory(mongo.Document):
|
|||
story_guid=self.story_guid)
|
||||
|
||||
if not original_user.email or not original_user.profile.send_emails:
|
||||
if not original_user.email:
|
||||
logging.user(original_user, "~BB~FMNo email to send to, skipping.")
|
||||
elif not original_user.profile.send_emails:
|
||||
logging.user(original_user, "~BB~FMDisabled emails, skipping.")
|
||||
return
|
||||
|
||||
story_feed = Feed.objects.get(pk=self.story_feed_id)
|
||||
|
|
|
@ -25,6 +25,7 @@ from utils.user_functions import get_user, ajax_login_required
|
|||
from utils.view_functions import render_to
|
||||
from utils.story_functions import format_story_link_date__short
|
||||
from utils.story_functions import format_story_link_date__long
|
||||
from utils.story_functions import strip_tags, linkify
|
||||
from utils import jennyholzer
|
||||
from vendor.timezones.utilities import localtime_for_timezone
|
||||
|
||||
|
@ -139,7 +140,7 @@ def load_social_stories(request, user_id, username=None):
|
|||
shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'],
|
||||
user.profile.timezone)
|
||||
story['shared_date'] = format_story_link_date__long(shared_date, now)
|
||||
story['shared_comments'] = shared_stories[story['id']]['comments']
|
||||
story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])
|
||||
|
||||
story['intelligence'] = {
|
||||
'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'],
|
||||
|
@ -343,7 +344,7 @@ def mark_story_as_shared(request):
|
|||
stories, profiles = MSharedStory.stories_with_comments_and_profiles([story], request.user.pk,
|
||||
check_all=check_all)
|
||||
story = stories[0]
|
||||
story['shared_comments'] = shared_story['comments'] or ""
|
||||
story['shared_comments'] = strip_tags(shared_story['comments'] or "")
|
||||
|
||||
if post_to_services:
|
||||
for service in post_to_services:
|
||||
|
@ -438,7 +439,7 @@ def save_comment_reply(request):
|
|||
replies = []
|
||||
for story_reply in shared_story.replies:
|
||||
if (story_reply.user_id == reply.user_id and
|
||||
story_reply.comments == original_message):
|
||||
strip_tags(story_reply.comments) == original_message):
|
||||
reply.publish_date = story_reply.publish_date
|
||||
replies.append(reply)
|
||||
else:
|
||||
|
@ -900,7 +901,7 @@ def load_activities(request):
|
|||
|
||||
public = user_id != request.user.pk
|
||||
page = max(1, int(request.REQUEST.get('page', 1)))
|
||||
limit = request.REQUEST.get('limit')
|
||||
limit = request.REQUEST.get('limit', 4)
|
||||
activities, has_next_page = MActivity.user(user_id, page=page, limit=limit, public=public)
|
||||
format = request.REQUEST.get('format', None)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ NEWSBLUR.Models.Story = Backbone.Model.extend({
|
|||
|
||||
initialize: function() {
|
||||
this.bind('change:selected', this.change_selected);
|
||||
this.bind('change:shared_comments', this.populate_comments);
|
||||
this.bind('change:comments', this.populate_comments);
|
||||
this.bind('change:comment_count', this.populate_comments);
|
||||
this.populate_comments();
|
||||
|
|
|
@ -215,6 +215,7 @@ NEWSBLUR.Views.StoryShareView = Backbone.View.extend({
|
|||
$share_button.removeClass('NB-saving').removeClass('NB-disabled').text('Share');
|
||||
$unshare_button.removeClass('NB-saving').removeClass('NB-disabled').text('Delete Share');
|
||||
$share_sideoption.text(shared_text).closest('.NB-sideoption');
|
||||
$comments_sideoptions.val(this.model.get('shared_comments'));
|
||||
|
||||
if (this.options.on_social_page) {
|
||||
this.model.social_page_story.$el.toggleClass('NB-story-shared', this.model.get('shared'));
|
||||
|
|
|
@ -217,12 +217,14 @@ class ProcessFeed:
|
|||
start_date = datetime.datetime.utcnow()
|
||||
# end_date = datetime.datetime.utcnow()
|
||||
story_guids = []
|
||||
stories = []
|
||||
for entry in self.fpf.entries:
|
||||
story = pre_process_story(entry)
|
||||
if story.get('published') < start_date:
|
||||
start_date = story.get('published')
|
||||
# if story.get('published') > end_date:
|
||||
# end_date = story.get('published')
|
||||
stories.append(story)
|
||||
story_guids.append(story.get('guid') or story.get('link'))
|
||||
|
||||
existing_stories = list(MStory.objects(
|
||||
|
@ -236,7 +238,8 @@ class ProcessFeed:
|
|||
# | (Q(story_guid__in=story_guids)),
|
||||
# story_feed=self.feed
|
||||
# ).order_by('-story_date')
|
||||
ret_values = self.feed.add_update_stories(self.fpf.entries, existing_stories, verbose=self.options['verbose'])
|
||||
ret_values = self.feed.add_update_stories(stories, existing_stories,
|
||||
verbose=self.options['verbose'])
|
||||
|
||||
if ((not self.feed.is_push or self.options.get('force'))
|
||||
and hasattr(self.fpf, 'feed') and
|
||||
|
|
|
@ -2,7 +2,9 @@ import datetime
|
|||
from HTMLParser import HTMLParser
|
||||
from itertools import chain
|
||||
from django.utils.dateformat import DateFormat
|
||||
from django.utils.html import strip_tags as strip_tags_django
|
||||
from django.conf import settings
|
||||
from utils.tornado_escape import linkify as linkify_tornado
|
||||
|
||||
def story_score(story, bottom_delta=None):
|
||||
# A) Date - Assumes story is unread and within unread range
|
||||
|
@ -110,6 +112,9 @@ def pre_process_story(entry):
|
|||
story_title = story_title[:80] + '...'
|
||||
entry['title'] = story_title
|
||||
|
||||
entry['title'] = strip_tags(entry.get('title'))
|
||||
entry['author'] = strip_tags(entry.get('author'))
|
||||
|
||||
return entry
|
||||
|
||||
class bunch(dict):
|
||||
|
@ -156,9 +161,16 @@ class MLStripper(HTMLParser):
|
|||
return ' '.join(self.fed)
|
||||
|
||||
def strip_tags(html):
|
||||
if not html:
|
||||
return ''
|
||||
return strip_tags_django(html)
|
||||
|
||||
s = MLStripper()
|
||||
s.feed(html)
|
||||
return s.get_data()
|
||||
|
||||
def linkify(*args, **kwargs):
|
||||
return linkify_tornado(*args, **kwargs)
|
||||
|
||||
def truncate_chars(value, max_length):
|
||||
if len(value) <= max_length:
|
||||
|
|
351
utils/tornado_escape.py
Normal file
351
utils/tornado_escape.py
Normal file
|
@ -0,0 +1,351 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2009 Facebook
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Escaping/unescaping methods for HTML, JSON, URLs, and others.
|
||||
|
||||
Also includes a few other miscellaneous string manipulation functions that
|
||||
have crept in over time.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, with_statement
|
||||
|
||||
import htmlentitydefs
|
||||
import re
|
||||
import sys
|
||||
import urllib
|
||||
|
||||
# Python3 compatibility: On python2.5, introduce the bytes alias from 2.6
|
||||
try:
|
||||
bytes
|
||||
except Exception:
|
||||
bytes = str
|
||||
|
||||
try:
|
||||
from urlparse import parse_qs # Python 2.6+
|
||||
except ImportError:
|
||||
from cgi import parse_qs
|
||||
|
||||
# json module is in the standard library as of python 2.6; fall back to
|
||||
# simplejson if present for older versions.
|
||||
try:
|
||||
import json
|
||||
assert hasattr(json, "loads") and hasattr(json, "dumps")
|
||||
_json_decode = json.loads
|
||||
_json_encode = json.dumps
|
||||
except Exception:
|
||||
try:
|
||||
import simplejson
|
||||
_json_decode = lambda s: simplejson.loads(_unicode(s))
|
||||
_json_encode = lambda v: simplejson.dumps(v)
|
||||
except ImportError:
|
||||
try:
|
||||
# For Google AppEngine
|
||||
from django.utils import simplejson
|
||||
_json_decode = lambda s: simplejson.loads(_unicode(s))
|
||||
_json_encode = lambda v: simplejson.dumps(v)
|
||||
except ImportError:
|
||||
def _json_decode(s):
|
||||
raise NotImplementedError(
|
||||
"A JSON parser is required, e.g., simplejson at "
|
||||
"http://pypi.python.org/pypi/simplejson/")
|
||||
_json_encode = _json_decode
|
||||
|
||||
|
||||
_XHTML_ESCAPE_RE = re.compile('[&<>"]')
|
||||
_XHTML_ESCAPE_DICT = {'&': '&', '<': '<', '>': '>', '"': '"'}
|
||||
|
||||
|
||||
def xhtml_escape(value):
|
||||
"""Escapes a string so it is valid within XML or XHTML."""
|
||||
return _XHTML_ESCAPE_RE.sub(lambda match: _XHTML_ESCAPE_DICT[match.group(0)],
|
||||
to_basestring(value))
|
||||
|
||||
|
||||
def xhtml_unescape(value):
|
||||
"""Un-escapes an XML-escaped string."""
|
||||
return re.sub(r"&(#?)(\w+?);", _convert_entity, _unicode(value))
|
||||
|
||||
|
||||
def json_encode(value):
|
||||
"""JSON-encodes the given Python object."""
|
||||
# JSON permits but does not require forward slashes to be escaped.
|
||||
# This is useful when json data is emitted in a <script> tag
|
||||
# in HTML, as it prevents </script> tags from prematurely terminating
|
||||
# the javscript. Some json libraries do this escaping by default,
|
||||
# although python's standard library does not, so we do it here.
|
||||
# http://stackoverflow.com/questions/1580647/json-why-are-forward-slashes-escaped
|
||||
return _json_encode(recursive_unicode(value)).replace("</", "<\\/")
|
||||
|
||||
|
||||
def json_decode(value):
|
||||
"""Returns Python objects for the given JSON string."""
|
||||
return _json_decode(to_basestring(value))
|
||||
|
||||
|
||||
def squeeze(value):
|
||||
"""Replace all sequences of whitespace chars with a single space."""
|
||||
return re.sub(r"[\x00-\x20]+", " ", value).strip()
|
||||
|
||||
|
||||
def url_escape(value):
|
||||
"""Returns a valid URL-encoded version of the given value."""
|
||||
return urllib.quote_plus(utf8(value))
|
||||
|
||||
# python 3 changed things around enough that we need two separate
|
||||
# implementations of url_unescape. We also need our own implementation
|
||||
# of parse_qs since python 3's version insists on decoding everything.
|
||||
if sys.version_info[0] < 3:
|
||||
def url_unescape(value, encoding='utf-8'):
|
||||
"""Decodes the given value from a URL.
|
||||
|
||||
The argument may be either a byte or unicode string.
|
||||
|
||||
If encoding is None, the result will be a byte string. Otherwise,
|
||||
the result is a unicode string in the specified encoding.
|
||||
"""
|
||||
if encoding is None:
|
||||
return urllib.unquote_plus(utf8(value))
|
||||
else:
|
||||
return unicode(urllib.unquote_plus(utf8(value)), encoding)
|
||||
|
||||
parse_qs_bytes = parse_qs
|
||||
else:
|
||||
def url_unescape(value, encoding='utf-8'):
|
||||
"""Decodes the given value from a URL.
|
||||
|
||||
The argument may be either a byte or unicode string.
|
||||
|
||||
If encoding is None, the result will be a byte string. Otherwise,
|
||||
the result is a unicode string in the specified encoding.
|
||||
"""
|
||||
if encoding is None:
|
||||
return urllib.parse.unquote_to_bytes(value)
|
||||
else:
|
||||
return urllib.unquote_plus(to_basestring(value), encoding=encoding)
|
||||
|
||||
def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
|
||||
"""Parses a query string like urlparse.parse_qs, but returns the
|
||||
values as byte strings.
|
||||
|
||||
Keys still become type str (interpreted as latin1 in python3!)
|
||||
because it's too painful to keep them as byte strings in
|
||||
python3 and in practice they're nearly always ascii anyway.
|
||||
"""
|
||||
# This is gross, but python3 doesn't give us another way.
|
||||
# Latin1 is the universal donor of character encodings.
|
||||
result = parse_qs(qs, keep_blank_values, strict_parsing,
|
||||
encoding='latin1', errors='strict')
|
||||
encoded = {}
|
||||
for k, v in result.iteritems():
|
||||
encoded[k] = [i.encode('latin1') for i in v]
|
||||
return encoded
|
||||
|
||||
|
||||
_UTF8_TYPES = (bytes, type(None))
|
||||
|
||||
|
||||
def utf8(value):
|
||||
"""Converts a string argument to a byte string.
|
||||
|
||||
If the argument is already a byte string or None, it is returned unchanged.
|
||||
Otherwise it must be a unicode string and is encoded as utf8.
|
||||
"""
|
||||
if isinstance(value, _UTF8_TYPES):
|
||||
return value
|
||||
assert isinstance(value, unicode)
|
||||
return value.encode("utf-8")
|
||||
|
||||
_TO_UNICODE_TYPES = (unicode, type(None))
|
||||
|
||||
|
||||
def to_unicode(value):
|
||||
"""Converts a string argument to a unicode string.
|
||||
|
||||
If the argument is already a unicode string or None, it is returned
|
||||
unchanged. Otherwise it must be a byte string and is decoded as utf8.
|
||||
"""
|
||||
if isinstance(value, _TO_UNICODE_TYPES):
|
||||
return value
|
||||
assert isinstance(value, bytes)
|
||||
return value.decode("utf-8")
|
||||
|
||||
# to_unicode was previously named _unicode not because it was private,
|
||||
# but to avoid conflicts with the built-in unicode() function/type
|
||||
_unicode = to_unicode
|
||||
|
||||
# When dealing with the standard library across python 2 and 3 it is
|
||||
# sometimes useful to have a direct conversion to the native string type
|
||||
if str is unicode:
|
||||
native_str = to_unicode
|
||||
else:
|
||||
native_str = utf8
|
||||
|
||||
_BASESTRING_TYPES = (basestring, type(None))
|
||||
|
||||
|
||||
def to_basestring(value):
|
||||
"""Converts a string argument to a subclass of basestring.
|
||||
|
||||
In python2, byte and unicode strings are mostly interchangeable,
|
||||
so functions that deal with a user-supplied argument in combination
|
||||
with ascii string constants can use either and should return the type
|
||||
the user supplied. In python3, the two types are not interchangeable,
|
||||
so this method is needed to convert byte strings to unicode.
|
||||
"""
|
||||
if isinstance(value, _BASESTRING_TYPES):
|
||||
return value
|
||||
assert isinstance(value, bytes)
|
||||
return value.decode("utf-8")
|
||||
|
||||
|
||||
def recursive_unicode(obj):
|
||||
"""Walks a simple data structure, converting byte strings to unicode.
|
||||
|
||||
Supports lists, tuples, and dictionaries.
|
||||
"""
|
||||
if isinstance(obj, dict):
|
||||
return dict((recursive_unicode(k), recursive_unicode(v)) for (k, v) in obj.iteritems())
|
||||
elif isinstance(obj, list):
|
||||
return list(recursive_unicode(i) for i in obj)
|
||||
elif isinstance(obj, tuple):
|
||||
return tuple(recursive_unicode(i) for i in obj)
|
||||
elif isinstance(obj, bytes):
|
||||
return to_unicode(obj)
|
||||
else:
|
||||
return obj
|
||||
|
||||
# I originally used the regex from
|
||||
# http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
||||
# but it gets all exponential on certain patterns (such as too many trailing
|
||||
# dots), causing the regex matcher to never return.
|
||||
# This regex should avoid those problems.
|
||||
_URL_RE = re.compile(ur"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()]|&|")*(?:[^!"#$%&'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&|")*\)))+)""")
|
||||
|
||||
|
||||
def linkify(text, shorten=False, extra_params="",
|
||||
require_protocol=False, permitted_protocols=["http", "https"]):
|
||||
"""Converts plain text into HTML with links.
|
||||
|
||||
For example: ``linkify("Hello http://tornadoweb.org!")`` would return
|
||||
``Hello <a href="http://tornadoweb.org">http://tornadoweb.org</a>!``
|
||||
|
||||
Parameters:
|
||||
|
||||
shorten: Long urls will be shortened for display.
|
||||
|
||||
extra_params: Extra text to include in the link tag, or a callable
|
||||
taking the link as an argument and returning the extra text
|
||||
e.g. ``linkify(text, extra_params='rel="nofollow" class="external"')``,
|
||||
or::
|
||||
|
||||
def extra_params_cb(url):
|
||||
if url.startswith("http://example.com"):
|
||||
return 'class="internal"'
|
||||
else:
|
||||
return 'class="external" rel="nofollow"'
|
||||
linkify(text, extra_params=extra_params_cb)
|
||||
|
||||
require_protocol: Only linkify urls which include a protocol. If this is
|
||||
False, urls such as www.facebook.com will also be linkified.
|
||||
|
||||
permitted_protocols: List (or set) of protocols which should be linkified,
|
||||
e.g. linkify(text, permitted_protocols=["http", "ftp", "mailto"]).
|
||||
It is very unsafe to include protocols such as "javascript".
|
||||
"""
|
||||
if extra_params and not callable(extra_params):
|
||||
extra_params = " " + extra_params.strip()
|
||||
|
||||
def make_link(m):
|
||||
url = m.group(1)
|
||||
proto = m.group(2)
|
||||
if require_protocol and not proto:
|
||||
return url # not protocol, no linkify
|
||||
|
||||
if proto and proto not in permitted_protocols:
|
||||
return url # bad protocol, no linkify
|
||||
|
||||
href = m.group(1)
|
||||
if not proto:
|
||||
href = "http://" + href # no proto specified, use http
|
||||
|
||||
if callable(extra_params):
|
||||
params = " " + extra_params(href).strip()
|
||||
else:
|
||||
params = extra_params
|
||||
|
||||
# clip long urls. max_len is just an approximation
|
||||
max_len = 30
|
||||
if shorten and len(url) > max_len:
|
||||
before_clip = url
|
||||
if proto:
|
||||
proto_len = len(proto) + 1 + len(m.group(3) or "") # +1 for :
|
||||
else:
|
||||
proto_len = 0
|
||||
|
||||
parts = url[proto_len:].split("/")
|
||||
if len(parts) > 1:
|
||||
# Grab the whole host part plus the first bit of the path
|
||||
# The path is usually not that interesting once shortened
|
||||
# (no more slug, etc), so it really just provides a little
|
||||
# extra indication of shortening.
|
||||
url = url[:proto_len] + parts[0] + "/" + \
|
||||
parts[1][:8].split('?')[0].split('.')[0]
|
||||
|
||||
if len(url) > max_len * 1.5: # still too long
|
||||
url = url[:max_len]
|
||||
|
||||
if url != before_clip:
|
||||
amp = url.rfind('&')
|
||||
# avoid splitting html char entities
|
||||
if amp > max_len - 5:
|
||||
url = url[:amp]
|
||||
url += "..."
|
||||
|
||||
if len(url) >= len(before_clip):
|
||||
url = before_clip
|
||||
else:
|
||||
# full url is visible on mouse-over (for those who don't
|
||||
# have a status bar, such as Safari by default)
|
||||
params += ' title="%s"' % href
|
||||
|
||||
return u'<a href="%s"%s>%s</a>' % (href, params, url)
|
||||
|
||||
# First HTML-escape so that our strings are all safe.
|
||||
# The regex is modified to avoid character entites other than & so
|
||||
# that we won't pick up ", etc.
|
||||
text = _unicode(xhtml_escape(text))
|
||||
return _URL_RE.sub(make_link, text)
|
||||
|
||||
|
||||
def _convert_entity(m):
|
||||
if m.group(1) == "#":
|
||||
try:
|
||||
return unichr(int(m.group(2)))
|
||||
except ValueError:
|
||||
return "&#%s;" % m.group(2)
|
||||
try:
|
||||
return _HTML_UNICODE_MAP[m.group(2)]
|
||||
except KeyError:
|
||||
return "&%s;" % m.group(2)
|
||||
|
||||
|
||||
def _build_unicode_map():
|
||||
unicode_map = {}
|
||||
for name, value in htmlentitydefs.name2codepoint.iteritems():
|
||||
unicode_map[name] = unichr(value)
|
||||
return unicode_map
|
||||
|
||||
_HTML_UNICODE_MAP = _build_unicode_map()
|
Loading…
Add table
Reference in a new issue