mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Moving from threadpool to a Queue. Much simpler.
This commit is contained in:
parent
0caf49e9a4
commit
02a4d292e8
3 changed files with 77 additions and 47 deletions
|
@ -31,6 +31,9 @@ class Command(BaseCommand):
|
||||||
disp = feed_fetcher.Dispatcher(options, options['workerthreads'])
|
disp = feed_fetcher.Dispatcher(options, options['workerthreads'])
|
||||||
|
|
||||||
feeds = Feed.objects.all()
|
feeds = Feed.objects.all()
|
||||||
|
|
||||||
|
disp.run_jobs()
|
||||||
|
|
||||||
for feed in feeds:
|
for feed in feeds:
|
||||||
disp.add_job(feed)
|
disp.add_job(feed)
|
||||||
|
|
||||||
|
|
|
@ -183,6 +183,7 @@ class Feed(models.Model):
|
||||||
|
|
||||||
# import pdb
|
# import pdb
|
||||||
# pdb.set_trace()
|
# pdb.set_trace()
|
||||||
|
|
||||||
# Title distance + content distance, checking if story changed
|
# Title distance + content distance, checking if story changed
|
||||||
story_title_difference = levenshtein_distance(story.get('title'),
|
story_title_difference = levenshtein_distance(story.get('title'),
|
||||||
existing_story['story_title'])
|
existing_story['story_title'])
|
||||||
|
|
|
@ -10,8 +10,9 @@ import logging
|
||||||
import datetime
|
import datetime
|
||||||
import threading
|
import threading
|
||||||
import traceback
|
import traceback
|
||||||
|
from Queue import Queue
|
||||||
|
|
||||||
# threadpool = None
|
threadpool = None
|
||||||
|
|
||||||
# Refresh feed code adapted from Feedjack.
|
# Refresh feed code adapted from Feedjack.
|
||||||
# http://feedjack.googlecode.com
|
# http://feedjack.googlecode.com
|
||||||
|
@ -55,8 +56,11 @@ class FetchFeed:
|
||||||
self.fpf = feedparser.parse(self.feed.feed_address,
|
self.fpf = feedparser.parse(self.feed.feed_address,
|
||||||
agent=USER_AGENT,
|
agent=USER_AGENT,
|
||||||
etag=self.feed.etag)
|
etag=self.feed.etag)
|
||||||
except:
|
except Exception, e:
|
||||||
logging.error('! ERROR: feed cannot be parsed')
|
log_msg = '! ERROR: feed cannot be parsed: %s' % e
|
||||||
|
logging.error(log_msg)
|
||||||
|
print(log_msg)
|
||||||
|
|
||||||
return FEED_ERRPARSE
|
return FEED_ERRPARSE
|
||||||
|
|
||||||
return self.fpf
|
return self.fpf
|
||||||
|
@ -82,7 +86,6 @@ class ProcessFeed:
|
||||||
self.options = options
|
self.options = options
|
||||||
self.fpf = fpf
|
self.fpf = fpf
|
||||||
|
|
||||||
@transaction.commit_on_success
|
|
||||||
def process(self):
|
def process(self):
|
||||||
""" Downloads and parses a feed.
|
""" Downloads and parses a feed.
|
||||||
"""
|
"""
|
||||||
|
@ -204,6 +207,7 @@ class Dispatcher:
|
||||||
FEED_ERREXC:'exception'}
|
FEED_ERREXC:'exception'}
|
||||||
self.entry_keys = sorted(self.entry_trans.keys())
|
self.entry_keys = sorted(self.entry_trans.keys())
|
||||||
self.feed_keys = sorted(self.feed_trans.keys())
|
self.feed_keys = sorted(self.feed_trans.keys())
|
||||||
|
self.num_threads = num_threads
|
||||||
if threadpool:
|
if threadpool:
|
||||||
self.tpool = threadpool.ThreadPool(num_threads)
|
self.tpool = threadpool.ThreadPool(num_threads)
|
||||||
else:
|
else:
|
||||||
|
@ -211,9 +215,11 @@ class Dispatcher:
|
||||||
self.time_start = datetime.datetime.now()
|
self.time_start = datetime.datetime.now()
|
||||||
|
|
||||||
|
|
||||||
def process_feed_wrapper(self, feed):
|
def process_feed_wrapper(self, feed_queue):
|
||||||
""" wrapper for ProcessFeed
|
""" wrapper for ProcessFeed
|
||||||
"""
|
"""
|
||||||
|
while True:
|
||||||
|
feed = feed_queue.get()
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
|
|
||||||
### Uncomment to test feed fetcher
|
### Uncomment to test feed fetcher
|
||||||
|
@ -237,7 +243,7 @@ class Dispatcher:
|
||||||
except:
|
except:
|
||||||
(etype, eobj, etb) = sys.exc_info()
|
(etype, eobj, etb) = sys.exc_info()
|
||||||
print '[%d] ! -------------------------' % (feed.id,)
|
print '[%d] ! -------------------------' % (feed.id,)
|
||||||
print traceback.format_exception(etype, eobj, etb)
|
# print traceback.format_exception(etype, eobj, etb)
|
||||||
traceback.print_exception(etype, eobj, etb)
|
traceback.print_exception(etype, eobj, etb)
|
||||||
print '[%d] ! -------------------------' % (feed.id,)
|
print '[%d] ! -------------------------' % (feed.id,)
|
||||||
ret_feed = FEED_ERREXC
|
ret_feed = FEED_ERREXC
|
||||||
|
@ -260,8 +266,7 @@ class Dispatcher:
|
||||||
for key, val in ret_entries.items():
|
for key, val in ret_entries.items():
|
||||||
self.entry_stats[key] += val
|
self.entry_stats[key] += val
|
||||||
|
|
||||||
return ret_feed, ret_entries
|
feed_queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
def add_job(self, feed):
|
def add_job(self, feed):
|
||||||
""" adds a feed processing job to the pool
|
""" adds a feed processing job to the pool
|
||||||
|
@ -272,13 +277,34 @@ class Dispatcher:
|
||||||
self.tpool.putRequest(req)
|
self.tpool.putRequest(req)
|
||||||
else:
|
else:
|
||||||
# no threadpool module, just run the job
|
# no threadpool module, just run the job
|
||||||
self.process_feed_wrapper(feed)
|
self.feed_queue.put(feed)
|
||||||
|
# self.process_feed_wrapper(feed)
|
||||||
|
|
||||||
|
def run_jobs(self):
|
||||||
|
self.feed_queue = Queue()
|
||||||
|
|
||||||
|
for i in range(self.num_threads):
|
||||||
|
worker = threading.Thread(target=self.process_feed_wrapper, args=(self.feed_queue,))
|
||||||
|
worker.setDaemon(True)
|
||||||
|
worker.start()
|
||||||
|
|
||||||
def poll(self):
|
def poll(self):
|
||||||
""" polls the active threads
|
""" polls the active threads
|
||||||
"""
|
"""
|
||||||
if not self.tpool:
|
if not self.tpool:
|
||||||
# no thread pool, nothing to poll
|
# no thread pool, nothing to poll
|
||||||
|
self.feed_queue.join()
|
||||||
|
done = (u'* DONE in %s\n* Feeds: %s\n* Entries: %s' % (
|
||||||
|
unicode(datetime.datetime.now() - self.time_start),
|
||||||
|
u' '.join(u'%s=%d' % (self.feed_trans[key],
|
||||||
|
self.feed_stats[key])
|
||||||
|
for key in self.feed_keys),
|
||||||
|
u' '.join(u'%s=%d' % (self.entry_trans[key],
|
||||||
|
self.entry_stats[key])
|
||||||
|
for key in self.entry_keys)
|
||||||
|
))
|
||||||
|
print done
|
||||||
|
logging.info(done)
|
||||||
return
|
return
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Reference in a new issue