Fixing a dozen text and feed fetching bugs.

This commit is contained in:
Samuel Clay 2013-08-06 13:18:55 -07:00
parent 0d32ae0623
commit 88f2a69a93
8 changed files with 28 additions and 39 deletions

View file

@ -1831,7 +1831,11 @@ class MStory(mongo.Document):
if not story_content:
return
soup = BeautifulSoup(story_content)
try:
soup = BeautifulSoup(story_content)
except ValueError:
return
images = soup.findAll('img')
if not images:
return

View file

@ -2,6 +2,7 @@ import requests
import zlib
from django.conf import settings
from socket import error as SocketError
from mongoengine.queryset import NotUniqueError
from vendor.readability import readability
from utils import log as logging
from utils.feed_functions import timelimit, TimeoutError
@ -45,15 +46,21 @@ class TextImporter:
if resp.encoding and resp.encoding != 'utf-8':
try:
text = text.encode(resp.encoding)
except LookupError:
except (LookupError, UnicodeEncodeError):
pass
original_text_doc = readability.Document(text, url=resp.url, debug=settings.DEBUG)
content = original_text_doc.summary(html_partial=True)
try:
content = original_text_doc.summary(html_partial=True)
except readability.Unparseable:
return
if content:
if not skip_save:
self.story.original_text_z = zlib.compress(content)
self.story.save()
try:
self.story.save()
except NotUniqueError:
pass
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: now ~SB%s bytes~SN vs. was ~SB%s bytes" % (
len(unicode(content)),
self.story.story_content_z and len(zlib.decompress(self.story.story_content_z))
@ -69,7 +76,8 @@ class TextImporter:
def fetch_request(self):
try:
r = requests.get(self.story.story_permalink, headers=self.headers, verify=False)
except (AttributeError, SocketError, requests.ConnectionError), e:
except (AttributeError, SocketError, requests.ConnectionError,
requests.models.MissingSchema, requests.sessions.InvalidSchema), e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
return
return r

View file

@ -1873,10 +1873,11 @@ class MSharedStory(mongo.Document):
'story_feed': story_feed,
'mute_url': mute_url,
}
story_title = self.story_title.replace('\n', ' ')
text = render_to_string('mail/email_reply.txt', data)
html = pynliner.fromString(render_to_string('mail/email_reply.xhtml', data))
subject = "%s replied to you on \"%s\" on NewsBlur" % (reply_user.username, self.story_title)
subject = "%s replied to you on \"%s\" on NewsBlur" % (reply_user.username, story_title)
msg = EmailMultiAlternatives(subject, text,
from_email='NewsBlur <%s>' % settings.HELLO_EMAIL,
to=['%s <%s>' % (user.username, user.email)])
@ -1936,10 +1937,11 @@ class MSharedStory(mongo.Document):
'story_feed': story_feed,
'mute_url': mute_url,
}
story_title = self.story_title.replace('\n', ' ')
text = render_to_string('mail/email_reshare.txt', data)
html = pynliner.fromString(render_to_string('mail/email_reshare.xhtml', data))
subject = "%s re-shared \"%s\" from you on NewsBlur" % (reshare_user.username, self.story_title)
subject = "%s re-shared \"%s\" from you on NewsBlur" % (reshare_user.username, story_title)
msg = EmailMultiAlternatives(subject, text,
from_email='NewsBlur <%s>' % settings.HELLO_EMAIL,
to=['%s <%s>' % (original_user.username, original_user.email)])

2
fabfile.py vendored
View file

@ -1172,7 +1172,7 @@ def staging_full():
run('curl -s http://dev.newsblur.com > /dev/null')
run('curl -s http://dev.newsblur.com/m/ > /dev/null')
@parallel
# @parallel
def celery():
celery_slow()

View file

@ -1,9 +0,0 @@
#!/bin/sh
ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1
python /home/conesus/newsblur/manage.py refresh_feeds -s &
python /home/conesus/newsblur/manage.py refresh_feeds -s &
python /home/conesus/newsblur/manage.py refresh_feeds -s &
python /home/conesus/newsblur/manage.py refresh_feeds -s &
python /home/conesus/newsblur/manage.py refresh_feeds -s &
python /home/conesus/newsblur/manage.py refresh_feeds -s &

View file

@ -1,9 +0,0 @@
#!/bin/sh
ps aux | grep refresh_feeds | egrep -v grep | awk '{print $2}' | xargs kill > /dev/null 2>&1
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &
python /home/conesus/newsblur/manage.py refresh_feeds -s > /dev/null 2>&1 &

View file

@ -91,15 +91,10 @@ class FetchFeed:
agent=USER_AGENT,
etag=etag,
modified=modified)
except (TypeError, ValueError), e:
logging.debug(u' ***> [%-30s] ~FR%s, turning off microformats.' %
except (TypeError, ValueError, KeyError), e:
logging.debug(u' ***> [%-30s] ~FR%s, turning off headers.' %
(self.feed.title[:30], e))
feedparser.PARSE_MICROFORMATS = False
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
feedparser.PARSE_MICROFORMATS = True
self.fpf = feedparser.parse(address, agent=USER_AGENT)
logging.debug(u' ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' % (
self.feed.title[:30], time.time() - start))
@ -396,8 +391,6 @@ class Dispatcher:
if self.options['verbose']:
logging.debug(u' ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
feed.title[:30], time.time() - start))
except KeyboardInterrupt:
break
except urllib2.HTTPError, e:
logging.debug(' ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read()))
feed.save_feed_history(e.code, e.msg, e.fp.read())

View file

@ -78,7 +78,7 @@ def pre_process_story(entry):
entry['guid'] = unicode(entry['guid'])
# Normalize story content/summary
summary = entry.get('summary', '')
summary = entry.get('summary') or ""
content = ""
if not summary and 'summary_detail' in entry:
summary = entry['summary_detail'].get('value', '')