OK. Turning on threading for feed fetching. Enough logging to catch the bugs. Fixed the segmentation fault on exit.

This commit is contained in:
Samuel Clay 2009-09-10 02:31:55 +00:00
parent dd95ef5a37
commit 0caf49e9a4
4 changed files with 49 additions and 21 deletions

View file

@ -6,6 +6,7 @@ from utils import feed_fetcher
from utils.management_functions import daemonize
import logging
import socket
import os
class Command(BaseCommand):
@ -16,7 +17,7 @@ class Command(BaseCommand):
help='Wait timeout in seconds when connecting to feeds.'),
make_option('-V', '--verbose', action='store_true',
dest='verbose', default=False, help='Verbose output.'),
make_option('-w', '--workerthreads', type='int', default=4,
make_option('-w', '--workerthreads', type='int', default=20,
help='Worker threads that will fetch feeds in parallel.'),
)
@ -35,4 +36,6 @@ class Command(BaseCommand):
disp.poll()
os._exit(1)

View file

@ -40,7 +40,6 @@ NEWSBLUR.AssetModel.Reader.prototype = {
data: data,
type: 'POST',
success: function(o) {
var log_regex = /\s+<div id="django_log"([\s|\S])*$/m;
var log_index = o.indexOf('<div id="django_log"');
var data;
@ -51,10 +50,16 @@ NEWSBLUR.AssetModel.Reader.prototype = {
if (log) {
var log_js_index_begin = log.indexOf('<script type=\"text\/javascript\">');
var log_js_index_end = log.indexOf('</script>');
var log_html = log.substring(0, log_js_index_begin);
var log_js = log.substring(log_js_index_begin+31, log_js_index_end);
$('#django_log').replaceWith(log_html);
var js = eval(log_js);
var log_html, log_js;
if (log_js_index_begin != -1) {
log_html = log.substring(0, log_js_index_begin);
log_js = log.substring(log_js_index_begin+31, log_js_index_end);
$('#django_log').replaceWith(log_html);
var js = eval(log_js);
} else {
$('#django_log').replaceWith(log);
}
}
} else {
try {

View file

@ -67,10 +67,11 @@ if PRODUCTION:
MEDIA_URL = 'http://www.newsblur.com/media/'
DEBUG = False
CACHE_BACKEND = 'file:///var/tmp/django_cache'
logging.basicConfig(level=logging.WARN,
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s',
filename=LOG_FILE,
filemode='w')
PREPEND_WWW = True
elif STAGING:
DATABASE_ENGINE = 'mysql'
DATABASE_NAME = 'newsblur'
@ -117,7 +118,8 @@ elif DEV_SERVER2:
# Example: "/Users/media/media.lawrence.com/"
MEDIA_URL = '/media/'
DEBUG = True
CACHE_BACKEND = 'dummy:///'
# CACHE_BACKEND = 'dummy:///'
CACHE_BACKEND = 'locmem:///'
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s',
filename=LOG_FILE,
@ -142,9 +144,10 @@ TEMPLATE_CONTEXT_PROCESSORS = (
MIDDLEWARE_CLASSES = (
'django.middleware.gzip.GZipMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.cache.CacheMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.transaction.TransactionMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'djangologging.middleware.LoggingMiddleware',
)
@ -206,6 +209,8 @@ TEST_DATABASE_COLLATION = 'utf8_general_ci'
ROOT_URLCONF = 'urls'
INTERNAL_IPS = ('127.0.0.1',)
LOGGING_LOG_SQL = True
APPEND_SLASH = True
SESSION_ENGINE = "django.contrib.sessions.backends.cache"
# ===============
# = Django Apps =

View file

@ -3,6 +3,7 @@ from django.core.cache import cache
from apps.reader.models import UserSubscription, UserSubscriptionFolders, UserStory
from apps.rss_feeds.importer import PageImporter
from utils import feedparser, threadpool
from django.db import transaction
import sys
import time
import logging
@ -10,12 +11,12 @@ import datetime
import threading
import traceback
threadpool = None
# threadpool = None
# Refresh feed code adapted from Feedjack.
# http://feedjack.googlecode.com
VERSION = '0.2'
VERSION = '0.8'
URL = 'http://www.newsblur.com/'
USER_AGENT = 'NewsBlur %s - %s' % (VERSION, URL)
SLOWFEED_WARNING = 10
@ -44,11 +45,12 @@ class FetchFeed:
""" Downloads and parses a feed.
"""
logging.debug(u'[%d] Fetching %s' % (self.feed.id,
self.feed.feed_title))
# we check the etag and the modified time to save bandwith and
# avoid bans
log_msg = u'[%d] Fetching %s' % (self.feed.id,
self.feed.feed_title)
logging.info(log_msg)
print(log_msg)
# we check the etag and the modified time to save bandwith and avoid bans
try:
self.fpf = feedparser.parse(self.feed.feed_address,
agent=USER_AGENT,
@ -63,7 +65,8 @@ class FetchPage:
def __init__(self, feed, options):
self.feed = feed
self.options = options
@transaction.autocommit
def fetch(self):
logging.debug(u'[%d] Fetching page from %s' % (self.feed.id,
self.feed.feed_title))
@ -79,6 +82,7 @@ class ProcessFeed:
self.options = options
self.fpf = fpf
@transaction.commit_on_success
def process(self):
""" Downloads and parses a feed.
"""
@ -211,6 +215,12 @@ class Dispatcher:
""" wrapper for ProcessFeed
"""
start_time = datetime.datetime.now()
### Uncomment to test feed fetcher
# from random import randint
# if randint(0,10) < 10:
# return 5, {}
try:
ffeed = FetchFeed(feed, self.options)
fetched_feed = ffeed.fetch()
@ -238,13 +248,14 @@ class Dispatcher:
comment = u' (SLOW FEED!)'
else:
comment = u''
logging.debug(u'[%d] Processed %s in %s [%s] [%s]%s' % (
done = (u'[%d] Processed %s in %s [%s] [%s]%s' % (
feed.id, feed.feed_title, unicode(delta),
self.feed_trans[ret_feed],
u' '.join(u'%s=%d' % (self.entry_trans[key],
ret_entries[key]) for key in self.entry_keys),
comment))
logging.debug(done)
print(done)
self.feed_stats[ret_feed] += 1
for key, val in ret_entries.items():
self.entry_stats[key] += val
@ -277,7 +288,7 @@ class Dispatcher:
logging.debug('! Cancelled by user')
break
except threadpool.NoResultsPending:
logging.info(u'* DONE in %s\n* Feeds: %s\n* Entries: %s' % (
done = (u'* DONE in %s\n* Feeds: %s\n* Entries: %s' % (
unicode(datetime.datetime.now() - self.time_start),
u' '.join(u'%s=%d' % (self.feed_trans[key],
self.feed_stats[key])
@ -286,9 +297,13 @@ class Dispatcher:
self.entry_stats[key])
for key in self.entry_keys)
))
print done
logging.info(done)
break
except Exception, e:
logging.error(u'I DONT KNOW')
print(u'I DONT KNOW: %s - %s' % (e, locals()))
except:
print(u'I REALLY DONT KNOW: %s - %s' % (e, locals()))
class FeedFetcher(threading.Thread):