mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
OK. Turning on threading for feed fetching. Enough logging to catch the bugs. Fixed the segmentation fault on exit.
This commit is contained in:
parent
dd95ef5a37
commit
0caf49e9a4
4 changed files with 49 additions and 21 deletions
|
@ -6,6 +6,7 @@ from utils import feed_fetcher
|
|||
from utils.management_functions import daemonize
|
||||
import logging
|
||||
import socket
|
||||
import os
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -16,7 +17,7 @@ class Command(BaseCommand):
|
|||
help='Wait timeout in seconds when connecting to feeds.'),
|
||||
make_option('-V', '--verbose', action='store_true',
|
||||
dest='verbose', default=False, help='Verbose output.'),
|
||||
make_option('-w', '--workerthreads', type='int', default=4,
|
||||
make_option('-w', '--workerthreads', type='int', default=20,
|
||||
help='Worker threads that will fetch feeds in parallel.'),
|
||||
)
|
||||
|
||||
|
@ -35,4 +36,6 @@ class Command(BaseCommand):
|
|||
|
||||
disp.poll()
|
||||
|
||||
os._exit(1)
|
||||
|
||||
|
|
@ -40,7 +40,6 @@ NEWSBLUR.AssetModel.Reader.prototype = {
|
|||
data: data,
|
||||
type: 'POST',
|
||||
success: function(o) {
|
||||
var log_regex = /\s+<div id="django_log"([\s|\S])*$/m;
|
||||
var log_index = o.indexOf('<div id="django_log"');
|
||||
var data;
|
||||
|
||||
|
@ -51,10 +50,16 @@ NEWSBLUR.AssetModel.Reader.prototype = {
|
|||
if (log) {
|
||||
var log_js_index_begin = log.indexOf('<script type=\"text\/javascript\">');
|
||||
var log_js_index_end = log.indexOf('</script>');
|
||||
var log_html = log.substring(0, log_js_index_begin);
|
||||
var log_js = log.substring(log_js_index_begin+31, log_js_index_end);
|
||||
$('#django_log').replaceWith(log_html);
|
||||
var js = eval(log_js);
|
||||
var log_html, log_js;
|
||||
|
||||
if (log_js_index_begin != -1) {
|
||||
log_html = log.substring(0, log_js_index_begin);
|
||||
log_js = log.substring(log_js_index_begin+31, log_js_index_end);
|
||||
$('#django_log').replaceWith(log_html);
|
||||
var js = eval(log_js);
|
||||
} else {
|
||||
$('#django_log').replaceWith(log);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
|
|
11
settings.py
11
settings.py
|
@ -67,10 +67,11 @@ if PRODUCTION:
|
|||
MEDIA_URL = 'http://www.newsblur.com/media/'
|
||||
DEBUG = False
|
||||
CACHE_BACKEND = 'file:///var/tmp/django_cache'
|
||||
logging.basicConfig(level=logging.WARN,
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s %(message)s',
|
||||
filename=LOG_FILE,
|
||||
filemode='w')
|
||||
PREPEND_WWW = True
|
||||
elif STAGING:
|
||||
DATABASE_ENGINE = 'mysql'
|
||||
DATABASE_NAME = 'newsblur'
|
||||
|
@ -117,7 +118,8 @@ elif DEV_SERVER2:
|
|||
# Example: "/Users/media/media.lawrence.com/"
|
||||
MEDIA_URL = '/media/'
|
||||
DEBUG = True
|
||||
CACHE_BACKEND = 'dummy:///'
|
||||
# CACHE_BACKEND = 'dummy:///'
|
||||
CACHE_BACKEND = 'locmem:///'
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
format='%(asctime)s %(levelname)s %(message)s',
|
||||
filename=LOG_FILE,
|
||||
|
@ -142,9 +144,10 @@ TEMPLATE_CONTEXT_PROCESSORS = (
|
|||
|
||||
MIDDLEWARE_CLASSES = (
|
||||
'django.middleware.gzip.GZipMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.cache.CacheMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.transaction.TransactionMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'djangologging.middleware.LoggingMiddleware',
|
||||
)
|
||||
|
@ -206,6 +209,8 @@ TEST_DATABASE_COLLATION = 'utf8_general_ci'
|
|||
ROOT_URLCONF = 'urls'
|
||||
INTERNAL_IPS = ('127.0.0.1',)
|
||||
LOGGING_LOG_SQL = True
|
||||
APPEND_SLASH = True
|
||||
SESSION_ENGINE = "django.contrib.sessions.backends.cache"
|
||||
|
||||
# ===============
|
||||
# = Django Apps =
|
||||
|
|
|
@ -3,6 +3,7 @@ from django.core.cache import cache
|
|||
from apps.reader.models import UserSubscription, UserSubscriptionFolders, UserStory
|
||||
from apps.rss_feeds.importer import PageImporter
|
||||
from utils import feedparser, threadpool
|
||||
from django.db import transaction
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
|
@ -10,12 +11,12 @@ import datetime
|
|||
import threading
|
||||
import traceback
|
||||
|
||||
threadpool = None
|
||||
# threadpool = None
|
||||
|
||||
# Refresh feed code adapted from Feedjack.
|
||||
# http://feedjack.googlecode.com
|
||||
|
||||
VERSION = '0.2'
|
||||
VERSION = '0.8'
|
||||
URL = 'http://www.newsblur.com/'
|
||||
USER_AGENT = 'NewsBlur %s - %s' % (VERSION, URL)
|
||||
SLOWFEED_WARNING = 10
|
||||
|
@ -44,11 +45,12 @@ class FetchFeed:
|
|||
""" Downloads and parses a feed.
|
||||
"""
|
||||
|
||||
logging.debug(u'[%d] Fetching %s' % (self.feed.id,
|
||||
self.feed.feed_title))
|
||||
|
||||
# we check the etag and the modified time to save bandwith and
|
||||
# avoid bans
|
||||
log_msg = u'[%d] Fetching %s' % (self.feed.id,
|
||||
self.feed.feed_title)
|
||||
logging.info(log_msg)
|
||||
print(log_msg)
|
||||
|
||||
# we check the etag and the modified time to save bandwith and avoid bans
|
||||
try:
|
||||
self.fpf = feedparser.parse(self.feed.feed_address,
|
||||
agent=USER_AGENT,
|
||||
|
@ -63,7 +65,8 @@ class FetchPage:
|
|||
def __init__(self, feed, options):
|
||||
self.feed = feed
|
||||
self.options = options
|
||||
|
||||
|
||||
@transaction.autocommit
|
||||
def fetch(self):
|
||||
logging.debug(u'[%d] Fetching page from %s' % (self.feed.id,
|
||||
self.feed.feed_title))
|
||||
|
@ -79,6 +82,7 @@ class ProcessFeed:
|
|||
self.options = options
|
||||
self.fpf = fpf
|
||||
|
||||
@transaction.commit_on_success
|
||||
def process(self):
|
||||
""" Downloads and parses a feed.
|
||||
"""
|
||||
|
@ -211,6 +215,12 @@ class Dispatcher:
|
|||
""" wrapper for ProcessFeed
|
||||
"""
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
### Uncomment to test feed fetcher
|
||||
# from random import randint
|
||||
# if randint(0,10) < 10:
|
||||
# return 5, {}
|
||||
|
||||
try:
|
||||
ffeed = FetchFeed(feed, self.options)
|
||||
fetched_feed = ffeed.fetch()
|
||||
|
@ -238,13 +248,14 @@ class Dispatcher:
|
|||
comment = u' (SLOW FEED!)'
|
||||
else:
|
||||
comment = u''
|
||||
logging.debug(u'[%d] Processed %s in %s [%s] [%s]%s' % (
|
||||
done = (u'[%d] Processed %s in %s [%s] [%s]%s' % (
|
||||
feed.id, feed.feed_title, unicode(delta),
|
||||
self.feed_trans[ret_feed],
|
||||
u' '.join(u'%s=%d' % (self.entry_trans[key],
|
||||
ret_entries[key]) for key in self.entry_keys),
|
||||
comment))
|
||||
|
||||
logging.debug(done)
|
||||
print(done)
|
||||
self.feed_stats[ret_feed] += 1
|
||||
for key, val in ret_entries.items():
|
||||
self.entry_stats[key] += val
|
||||
|
@ -277,7 +288,7 @@ class Dispatcher:
|
|||
logging.debug('! Cancelled by user')
|
||||
break
|
||||
except threadpool.NoResultsPending:
|
||||
logging.info(u'* DONE in %s\n* Feeds: %s\n* Entries: %s' % (
|
||||
done = (u'* DONE in %s\n* Feeds: %s\n* Entries: %s' % (
|
||||
unicode(datetime.datetime.now() - self.time_start),
|
||||
u' '.join(u'%s=%d' % (self.feed_trans[key],
|
||||
self.feed_stats[key])
|
||||
|
@ -286,9 +297,13 @@ class Dispatcher:
|
|||
self.entry_stats[key])
|
||||
for key in self.entry_keys)
|
||||
))
|
||||
print done
|
||||
logging.info(done)
|
||||
break
|
||||
except Exception, e:
|
||||
logging.error(u'I DONT KNOW')
|
||||
print(u'I DONT KNOW: %s - %s' % (e, locals()))
|
||||
except:
|
||||
print(u'I REALLY DONT KNOW: %s - %s' % (e, locals()))
|
||||
|
||||
class FeedFetcher(threading.Thread):
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue