mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Gotta get rid of threading until I can figure out how to thread-safe the Django ORM. It's corrupting my tables!
This commit is contained in:
parent
fa123e7f3d
commit
9c522416a1
1 changed files with 54 additions and 17 deletions
|
@ -16,6 +16,8 @@ import threading
|
||||||
import traceback
|
import traceback
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
threadpool = None
|
||||||
|
|
||||||
# Refresh feed code adapted from Feedjack.
|
# Refresh feed code adapted from Feedjack.
|
||||||
# http://feedjack.googlecode.com
|
# http://feedjack.googlecode.com
|
||||||
|
|
||||||
|
@ -40,7 +42,7 @@ def mtime(ttime):
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
option_list = BaseCommand.option_list + (
|
option_list = BaseCommand.option_list + (
|
||||||
make_option("-f", "--feed", dest="feed", default=None),
|
make_option("-f", "--feed", default=None),
|
||||||
make_option("-d", "--daemon", dest="daemonize", action="store_true"),
|
make_option("-d", "--daemon", dest="daemonize", action="store_true"),
|
||||||
make_option('-t', '--timeout', type='int', default=10,
|
make_option('-t', '--timeout', type='int', default=10,
|
||||||
help='Wait timeout in seconds when connecting to feeds.'),
|
help='Wait timeout in seconds when connecting to feeds.'),
|
||||||
|
@ -67,12 +69,51 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ProcessFeed:
|
class FetchFeed:
|
||||||
def __init__(self, feed, options):
|
def __init__(self, feed, options):
|
||||||
self.feed = feed
|
self.feed = feed
|
||||||
self.options = options
|
self.options = options
|
||||||
self.fpf = None
|
self.fpf = None
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
""" Downloads and parses a feed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
logging.debug(u'[%d] Fetching %s' % (self.feed.id,
|
||||||
|
self.feed.feed_title))
|
||||||
|
|
||||||
|
# we check the etag and the modified time to save bandwith and
|
||||||
|
# avoid bans
|
||||||
|
try:
|
||||||
|
self.fpf = feedparser.parse(self.feed.feed_address,
|
||||||
|
agent=USER_AGENT,
|
||||||
|
etag=self.feed.etag)
|
||||||
|
except:
|
||||||
|
logging.error('! ERROR: feed cannot be parsed')
|
||||||
|
return FEED_ERRPARSE
|
||||||
|
|
||||||
|
return self.fpf
|
||||||
|
|
||||||
|
class FetchPage:
|
||||||
|
def __init__(self, feed, options):
|
||||||
|
self.feed = feed
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
logging.debug(u'[%d] Fetching page from %s' % (self.feed.id,
|
||||||
|
self.feed.feed_title))
|
||||||
|
|
||||||
|
page_importer = PageImporter(self.feed.feed_link, self.feed)
|
||||||
|
self.feed.page = page_importer.fetch_page()
|
||||||
|
|
||||||
|
self.feed.save()
|
||||||
|
|
||||||
|
class ProcessFeed:
|
||||||
|
def __init__(self, feed, fpf, options):
|
||||||
|
self.feed = feed
|
||||||
|
self.options = options
|
||||||
|
self.fpf = fpf
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
""" Downloads and parses a feed.
|
""" Downloads and parses a feed.
|
||||||
"""
|
"""
|
||||||
|
@ -86,16 +127,6 @@ class ProcessFeed:
|
||||||
logging.debug(u'[%d] Processing %s' % (self.feed.id,
|
logging.debug(u'[%d] Processing %s' % (self.feed.id,
|
||||||
self.feed.feed_title))
|
self.feed.feed_title))
|
||||||
|
|
||||||
# we check the etag and the modified time to save bandwith and
|
|
||||||
# avoid bans
|
|
||||||
try:
|
|
||||||
self.fpf = feedparser.parse(self.feed.feed_address,
|
|
||||||
agent=USER_AGENT,
|
|
||||||
etag=self.feed.etag)
|
|
||||||
except:
|
|
||||||
logging.error('! ERROR: feed cannot be parsed')
|
|
||||||
return FEED_ERRPARSE, ret_values
|
|
||||||
|
|
||||||
if hasattr(self.fpf, 'status'):
|
if hasattr(self.fpf, 'status'):
|
||||||
if self.options['verbose']:
|
if self.options['verbose']:
|
||||||
logging.debug(u'[%d] HTTP status %d: %s' % (self.feed.id,
|
logging.debug(u'[%d] HTTP status %d: %s' % (self.feed.id,
|
||||||
|
@ -157,9 +188,6 @@ class ProcessFeed:
|
||||||
guids.append(entry.link)
|
guids.append(entry.link)
|
||||||
|
|
||||||
|
|
||||||
page_importer = PageImporter(self.feed.feed_link, self.feed)
|
|
||||||
self.feed.page = page_importer.fetch_page()
|
|
||||||
|
|
||||||
self.feed.save()
|
self.feed.save()
|
||||||
|
|
||||||
# Compare new stories to existing stories, adding and updating
|
# Compare new stories to existing stories, adding and updating
|
||||||
|
@ -219,9 +247,18 @@ class Dispatcher:
|
||||||
"""
|
"""
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
try:
|
try:
|
||||||
pfeed = ProcessFeed(feed, self.options)
|
ffeed = FetchFeed(feed, self.options)
|
||||||
|
fetched_feed = ffeed.fetch()
|
||||||
|
|
||||||
|
pfeed = ProcessFeed(feed, fetched_feed, self.options)
|
||||||
ret_feed, ret_entries = pfeed.process()
|
ret_feed, ret_entries = pfeed.process()
|
||||||
|
|
||||||
|
fpage = FetchPage(feed, self.options)
|
||||||
|
fpage.fetch()
|
||||||
|
|
||||||
|
del ffeed
|
||||||
del pfeed
|
del pfeed
|
||||||
|
del fpage
|
||||||
except:
|
except:
|
||||||
(etype, eobj, etb) = sys.exc_info()
|
(etype, eobj, etb) = sys.exc_info()
|
||||||
print '[%d] ! -------------------------' % (feed.id,)
|
print '[%d] ! -------------------------' % (feed.id,)
|
||||||
|
@ -285,7 +322,7 @@ class Dispatcher:
|
||||||
for key in self.entry_keys)
|
for key in self.entry_keys)
|
||||||
))
|
))
|
||||||
break
|
break
|
||||||
except:
|
except Exception:
|
||||||
logging.error(u'I DONT KNOW')
|
logging.error(u'I DONT KNOW')
|
||||||
|
|
||||||
class FeedFetcher(threading.Thread):
|
class FeedFetcher(threading.Thread):
|
||||||
|
|
Loading…
Add table
Reference in a new issue