mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Fixing a dozen text and feed fetching bugs.
This commit is contained in:
parent
0d32ae0623
commit
88f2a69a93
8 changed files with 28 additions and 39 deletions
|
@ -1831,7 +1831,11 @@ class MStory(mongo.Document):
|
|||
if not story_content:
|
||||
return
|
||||
|
||||
soup = BeautifulSoup(story_content)
|
||||
try:
|
||||
soup = BeautifulSoup(story_content)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
images = soup.findAll('img')
|
||||
if not images:
|
||||
return
|
||||
|
|
|
@ -2,6 +2,7 @@ import requests
|
|||
import zlib
|
||||
from django.conf import settings
|
||||
from socket import error as SocketError
|
||||
from mongoengine.queryset import NotUniqueError
|
||||
from vendor.readability import readability
|
||||
from utils import log as logging
|
||||
from utils.feed_functions import timelimit, TimeoutError
|
||||
|
@ -45,15 +46,21 @@ class TextImporter:
|
|||
if resp.encoding and resp.encoding != 'utf-8':
|
||||
try:
|
||||
text = text.encode(resp.encoding)
|
||||
except LookupError:
|
||||
except (LookupError, UnicodeEncodeError):
|
||||
pass
|
||||
original_text_doc = readability.Document(text, url=resp.url, debug=settings.DEBUG)
|
||||
content = original_text_doc.summary(html_partial=True)
|
||||
try:
|
||||
content = original_text_doc.summary(html_partial=True)
|
||||
except readability.Unparseable:
|
||||
return
|
||||
|
||||
if content:
|
||||
if not skip_save:
|
||||
self.story.original_text_z = zlib.compress(content)
|
||||
self.story.save()
|
||||
try:
|
||||
self.story.save()
|
||||
except NotUniqueError:
|
||||
pass
|
||||
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: now ~SB%s bytes~SN vs. was ~SB%s bytes" % (
|
||||
len(unicode(content)),
|
||||
self.story.story_content_z and len(zlib.decompress(self.story.story_content_z))
|
||||
|
@ -69,7 +76,8 @@ class TextImporter:
|
|||
def fetch_request(self):
|
||||
try:
|
||||
r = requests.get(self.story.story_permalink, headers=self.headers, verify=False)
|
||||
except (AttributeError, SocketError, requests.ConnectionError), e:
|
||||
except (AttributeError, SocketError, requests.ConnectionError,
|
||||
requests.models.MissingSchema, requests.sessions.InvalidSchema), e:
|
||||
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
|
||||
return
|
||||
return r
|
||||
|
|
|
@ -1873,10 +1873,11 @@ class MSharedStory(mongo.Document):
|
|||
'story_feed': story_feed,
|
||||
'mute_url': mute_url,
|
||||
}
|
||||
|
||||
story_title = self.story_title.replace('\n', ' ')
|
||||
|
||||
text = render_to_string('mail/email_reply.txt', data)
|
||||
html = pynliner.fromString(render_to_string('mail/email_reply.xhtml', data))
|
||||
subject = "%s replied to you on \"%s\" on NewsBlur" % (reply_user.username, self.story_title)
|
||||
subject = "%s replied to you on \"%s\" on NewsBlur" % (reply_user.username, story_title)
|
||||
msg = EmailMultiAlternatives(subject, text,
|
||||
from_email='NewsBlur <%s>' % settings.HELLO_EMAIL,
|
||||
to=['%s <%s>' % (user.username, user.email)])
|
||||
|
@ -1936,10 +1937,11 @@ class MSharedStory(mongo.Document):
|
|||
'story_feed': story_feed,
|
||||
'mute_url': mute_url,
|
||||
}
|
||||
|
||||
story_title = self.story_title.replace('\n', ' ')
|
||||
|
||||
text = render_to_string('mail/email_reshare.txt', data)
|
||||
html = pynliner.fromString(render_to_string('mail/email_reshare.xhtml', data))
|
||||
subject = "%s re-shared \"%s\" from you on NewsBlur" % (reshare_user.username, self.story_title)
|
||||
subject = "%s re-shared \"%s\" from you on NewsBlur" % (reshare_user.username, story_title)
|
||||
msg = EmailMultiAlternatives(subject, text,
|
||||
from_email='NewsBlur <%s>' % settings.HELLO_EMAIL,
|
||||
to=['%s <%s>' % (original_user.username, original_user.email)])
|
||||
|
|
2
fabfile.py
vendored
2
fabfile.py
vendored
|
@ -1172,7 +1172,7 @@ def staging_full():
|
|||
run('curl -s http://dev.newsblur.com > /dev/null')
|
||||
run('curl -s http://dev.newsblur.com/m/ > /dev/null')
|
||||
|
||||
@parallel
|
||||
# @parallel
|
||||
def celery():
|
||||
celery_slow()
|
||||
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s &
|
|
@ -1,9 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
|
||||
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
|
||||
|
||||
|
|
@ -91,15 +91,10 @@ class FetchFeed:
|
|||
agent=USER_AGENT,
|
||||
etag=etag,
|
||||
modified=modified)
|
||||
except (TypeError, ValueError), e:
|
||||
logging.debug(u' ***> [%-30s] ~FR%s, turning off microformats.' %
|
||||
except (TypeError, ValueError, KeyError), e:
|
||||
logging.debug(u' ***> [%-30s] ~FR%s, turning off headers.' %
|
||||
(self.feed.title[:30], e))
|
||||
feedparser.PARSE_MICROFORMATS = False
|
||||
self.fpf = feedparser.parse(address,
|
||||
agent=USER_AGENT,
|
||||
etag=etag,
|
||||
modified=modified)
|
||||
feedparser.PARSE_MICROFORMATS = True
|
||||
self.fpf = feedparser.parse(address, agent=USER_AGENT)
|
||||
|
||||
logging.debug(u' ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' % (
|
||||
self.feed.title[:30], time.time() - start))
|
||||
|
@ -396,8 +391,6 @@ class Dispatcher:
|
|||
if self.options['verbose']:
|
||||
logging.debug(u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
|
||||
feed.title[:30], time.time() - start))
|
||||
except KeyboardInterrupt:
|
||||
break
|
||||
except urllib2.HTTPError, e:
|
||||
logging.debug(' ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read()))
|
||||
feed.save_feed_history(e.code, e.msg, e.fp.read())
|
||||
|
|
|
@ -78,7 +78,7 @@ def pre_process_story(entry):
|
|||
entry['guid'] = unicode(entry['guid'])
|
||||
|
||||
# Normalize story content/summary
|
||||
summary = entry.get('summary', '')
|
||||
summary = entry.get('summary') or ""
|
||||
content = ""
|
||||
if not summary and 'summary_detail' in entry:
|
||||
summary = entry['summary_detail'].get('value', '')
|
||||
|
|
Loading…
Add table
Reference in a new issue