mirror of
https://github.com/viq/NewsBlur.git
synced 2025-08-05 16:49:45 +00:00
Using latest feedparser.
This commit is contained in:
parent
58ec30804e
commit
ee5c00bce5
2 changed files with 31 additions and 26 deletions
|
@ -16,7 +16,8 @@ from apps.rss_feeds.page_importer import PageImporter
|
|||
from apps.rss_feeds.icon_importer import IconImporter
|
||||
from apps.push.models import PushSubscription
|
||||
from apps.statistics.models import MAnalyticsFetcher
|
||||
from utils import feedparser
|
||||
# from utils import feedparser
|
||||
from utils import feedparser_trunk as feedparser
|
||||
from utils.story_functions import pre_process_story
|
||||
from utils import log as logging
|
||||
from utils.feed_functions import timelimit, TimeoutError, utf8encode, cache_bust_url
|
||||
|
@ -255,7 +256,12 @@ class ProcessFeed:
|
|||
hub_url = link['href']
|
||||
elif link['rel'] == 'self':
|
||||
self_url = link['href']
|
||||
push_expired = self.feed.is_push and self.feed.push.lease_expires < datetime.datetime.now()
|
||||
push_expired = False
|
||||
if self.feed.is_push:
|
||||
try:
|
||||
push_expired = self.feed.push.lease_expires < datetime.datetime.now()
|
||||
except PushSubscription.DoesNotExist:
|
||||
self.feed.is_push = False
|
||||
if (hub_url and self_url and not settings.DEBUG and
|
||||
self.feed.active_subscribers > 0 and
|
||||
(push_expired or not self.feed.is_push or self.options.get('force'))):
|
||||
|
|
|
@ -429,16 +429,15 @@ _cp1252 = {
|
|||
_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)')
|
||||
def _urljoin(base, uri):
|
||||
uri = _urifixer.sub(r'\1\3', uri)
|
||||
#try:
|
||||
if not isinstance(uri, unicode):
|
||||
uri = uri.decode('utf-8', 'ignore')
|
||||
uri = urlparse.urljoin(base, uri)
|
||||
try:
|
||||
uri = urlparse.urljoin(base, uri)
|
||||
except ValueError:
|
||||
uri = u''
|
||||
if not isinstance(uri, unicode):
|
||||
return uri.decode('utf-8', 'ignore')
|
||||
return uri
|
||||
#except:
|
||||
# uri = urlparse.urlunparse([urllib.quote(part) for part in urlparse.urlparse(uri)])
|
||||
# return urlparse.urljoin(base, uri)
|
||||
|
||||
class _FeedParserMixin:
|
||||
namespaces = {
|
||||
|
@ -553,7 +552,11 @@ class _FeedParserMixin:
|
|||
self.svgOK = 0
|
||||
self.title_depth = -1
|
||||
self.depth = 0
|
||||
self.psc_chapters_counter = 0
|
||||
# psc_chapters_flag prevents multiple psc_chapters from being
|
||||
# captured in a single entry or item. The transition states are
|
||||
# None -> True -> False. psc_chapter elements will only be
|
||||
# captured while it is True.
|
||||
self.psc_chapters_flag = None
|
||||
if baselang:
|
||||
self.feeddata['language'] = baselang.replace('_','-')
|
||||
|
||||
|
@ -878,7 +881,9 @@ class _FeedParserMixin:
|
|||
|
||||
# resolve relative URIs
|
||||
if (element in self.can_be_relative_uri) and output:
|
||||
output = self.resolveURI(output)
|
||||
# do not resolve guid elements with isPermalink="false"
|
||||
if not element == 'id' or self.guidislink:
|
||||
output = self.resolveURI(output)
|
||||
|
||||
# decode entities within embedded markup
|
||||
if not self.contentparams.get('base64', 0):
|
||||
|
@ -1344,7 +1349,7 @@ class _FeedParserMixin:
|
|||
self.inentry = 1
|
||||
self.guidislink = 0
|
||||
self.title_depth = -1
|
||||
self.psc_chapters_counter = 0
|
||||
self.psc_chapters_flag = None
|
||||
id = self._getAttribute(attrsD, 'rdf:about')
|
||||
if id:
|
||||
context = self._getContext()
|
||||
|
@ -1894,19 +1899,18 @@ class _FeedParserMixin:
|
|||
context['newlocation'] = _makeSafeAbsoluteURI(self.baseuri, url.strip())
|
||||
|
||||
def _start_psc_chapters(self, attrsD):
|
||||
version = self._getAttribute(attrsD, 'version')
|
||||
if version == '1.1' and self.psc_chapters_counter == 0:
|
||||
self.psc_chapters_counter += 1
|
||||
if self.psc_chapters_flag is None:
|
||||
# Transition from None -> True
|
||||
self.psc_chapters_flag = True
|
||||
attrsD['chapters'] = []
|
||||
self._getContext()['psc_chapters'] = FeedParserDict(attrsD)
|
||||
|
||||
def _end_psc_chapters(self):
|
||||
version = self._getContext()['psc_chapters']['version']
|
||||
if version == '1.1':
|
||||
self.psc_chapters_counter += 1
|
||||
# Transition from True -> False
|
||||
self.psc_chapters_flag = False
|
||||
|
||||
def _start_psc_chapter(self, attrsD):
|
||||
if self.psc_chapters_counter == 1:
|
||||
if self.psc_chapters_flag:
|
||||
start = self._getAttribute(attrsD, 'start')
|
||||
attrsD['start_parsed'] = _parse_psc_chapter_start(start)
|
||||
|
||||
|
@ -2280,10 +2284,7 @@ def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type):
|
|||
def _makeSafeAbsoluteURI(base, rel=None):
|
||||
# bail if ACCEPTABLE_URI_SCHEMES is empty
|
||||
if not ACCEPTABLE_URI_SCHEMES:
|
||||
try:
|
||||
return _urljoin(base, rel or u'')
|
||||
except ValueError:
|
||||
return u''
|
||||
return _urljoin(base, rel or u'')
|
||||
if not base:
|
||||
return rel or u''
|
||||
if not rel:
|
||||
|
@ -2294,10 +2295,7 @@ def _makeSafeAbsoluteURI(base, rel=None):
|
|||
if not scheme or scheme in ACCEPTABLE_URI_SCHEMES:
|
||||
return base
|
||||
return u''
|
||||
try:
|
||||
uri = _urljoin(base, rel)
|
||||
except ValueError:
|
||||
return u''
|
||||
uri = _urljoin(base, rel)
|
||||
if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES:
|
||||
return u''
|
||||
return uri
|
||||
|
@ -2315,7 +2313,8 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
|
|||
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
|
||||
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
|
||||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript'])
|
||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript',
|
||||
'object', 'embed', 'iframe', 'param'])
|
||||
|
||||
acceptable_attributes = set(['abbr', 'accept', 'accept-charset', 'accesskey',
|
||||
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
|
||||
|
|
Loading…
Add table
Reference in a new issue