Crazy refactor of feedfinder to extract timelimits, so they can be caught by a lower-level api instead of messing up the feed fetcher.

This commit is contained in:
Samuel Clay 2011-02-02 13:07:12 -05:00
parent cd2ec335b7
commit 22cf5ede6c
8 changed files with 102 additions and 89 deletions

View file

@ -21,7 +21,7 @@ from utils import json_functions as json
from utils import feedfinder
from utils.fields import AutoOneToOneField
from utils.feed_functions import levenshtein_distance
from utils.feed_functions import timelimit
from utils.feed_functions import timelimit, TimeoutError
from utils.story_functions import pre_process_story
from utils.diff import HTMLDiff
from utils import log as logging
@ -106,32 +106,40 @@ class Feed(models.Model):
self.count_subscribers()
self.set_next_scheduled_update()
@timelimit(20)
def check_feed_address_for_feed_link(self):
feed_address = None
if not feedfinder.isFeed(self.feed_address):
feed_address = feedfinder.feed(self.feed_address)
if not feed_address:
feed_address = feedfinder.feed(self.feed_link)
else:
feed_address_from_link = feedfinder.feed(self.feed_link)
if feed_address_from_link != self.feed_address:
feed_address = feed_address_from_link
@timelimit(10)
def _1():
feed_address = None
if not feedfinder.isFeed(self.feed_address):
feed_address = feedfinder.feed(self.feed_address)
if not feed_address and self.feed_link:
feed_address = feedfinder.feed(self.feed_link)
else:
feed_address_from_link = feedfinder.feed(self.feed_link)
if feed_address_from_link != self.feed_address:
feed_address = feed_address_from_link
if feed_address:
try:
self.feed_address = feed_address
self.next_scheduled_update = datetime.datetime.utcnow()
self.has_feed_exception = False
self.active = True
self.save()
except IntegrityError:
original_feed = Feed.objects.get(feed_address=feed_address)
original_feed.has_feed_exception = False
original_feed.active = True
original_feed.save()
merge_feeds(original_feed.pk, self.pk)
if feed_address:
try:
self.feed_address = feed_address
self.next_scheduled_update = datetime.datetime.utcnow()
self.has_feed_exception = False
self.active = True
self.save()
except IntegrityError:
original_feed = Feed.objects.get(feed_address=feed_address)
original_feed.has_feed_exception = False
original_feed.active = True
original_feed.save()
merge_feeds(original_feed.pk, self.pk)
return feed_address
try:
feed_address = _1()
except TimeoutError:
logging.debug(' ---> [%-30s] Feed address check timed out...' % (unicode(self.feed_title)[:30]))
self.save_feed_history(505, 'Timeout', '')
feed_address = None
return not not feed_address

View file

@ -18,7 +18,7 @@ class PageImporter(object):
self.url = url
self.feed = feed
@timelimit(30)
@timelimit(15)
def fetch_page(self):
if not self.url:
return

View file

@ -2,12 +2,13 @@
* project-site: http://plugins.jquery.com/project/AjaxManager
* repository: http://github.com/aFarkas/Ajaxmanager
* @author Alexander Farkas
* @version 3.06
* @version 3.10
* Copyright 2010, Alexander Farkas
* Dual licensed under the MIT or GPL Version 2 licenses.
*/
(function($){
"use strict";
var managed = {},
cache = {}
;
@ -58,7 +59,14 @@
that = this,
ajaxFn = this._createAjax(xhrID, o, origSuc, origCom)
;
if(o.preventDoubbleRequests && o.queueDuplicateRequests){
if(o.preventDoubbleRequests){
o.queueDuplicateRequests = false;
}
setTimeout(function(){
throw("preventDoubbleRequests and queueDuplicateRequests can't be both true");
}, 0);
}
if(this.requests[xhrID] && o.preventDoubbleRequests){
return;
}
@ -106,9 +114,9 @@
$(document).clearQueue(this.qName);
}
if(o.queue){
if(o.queue || (o.queueDuplicateRequests && this.requests[xhrID])){
$.queue(document, this.qName, ajaxFn);
if(this.inProgress < o.maxRequests){
if(this.inProgress < o.maxRequests && (!this.requests[xhrID] || !o.queueDuplicateRequests)){
$.dequeue(document, this.qName);
}
return xhrID;
@ -124,12 +132,17 @@
$.event.trigger(that.name +'AjaxStart');
}
if(o.cacheResponse && cache[id]){
that.requests[id] = {};
setTimeout(function(){
that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o);
that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o);
}, 0);
} else {
if(!cache[id].cacheTTL || cache[id].cacheTTL < 0 || ((new Date().getTime() - cache[id].timestamp) < cache[id].cacheTTL)){
that.requests[id] = {};
setTimeout(function(){
that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o);
that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o);
}, 0);
} else {
delete cache[id];
}
}
if(!o.cacheResponse || !cache[id]) {
if (o.async) {
that.requests[id] = $.ajax(o);
} else {
@ -140,20 +153,26 @@
};
},
_removeXHR: function(xhrID){
if(this.opts.queue){
if(this.opts.queue || this.opts.queueDuplicateRequests){
$.dequeue(document, this.qName);
}
this.inProgress--;
this.requests[xhrID] = null;
delete this.requests[xhrID];
},
_isAbort: function(xhr, o){
var ret = !!( o.abortIsNoSuccess && ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) );
clearCache: function () {
cache = {};
},
_isAbort: function(xhr, status, o){
if(!o.abortIsNoSuccess || (!xhr && !status)){
return false;
}
var ret = !!( ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) );
xhr = null;
return ret;
},
_complete: function(context, origFn, xhr, status, xhrID, o){
if(this._isAbort(xhr, o)){
if(this._isAbort(xhr, status, o)){
status = 'abort';
o.abort.call(context, xhr, status, o);
}
@ -176,7 +195,7 @@
},
_success: function(context, origFn, data, status, xhr, o){
var that = this;
if(this._isAbort(xhr, o)){
if(this._isAbort(xhr, status, o)){
xhr = null;
return;
}
@ -189,25 +208,35 @@
});
}
if(o.cacheResponse && !cache[o.xhrID]){
if(!xhr){
xhr = {};
}
cache[o.xhrID] = {
status: xhr.status,
statusText: xhr.statusText,
responseText: xhr.responseText,
responseXML: xhr.responseXML,
_successData: data
_successData: data,
cacheTTL: o.cacheTTL,
timestamp: new Date().getTime()
};
if(xhr.getAllResponseHeaders){
if('getAllResponseHeaders' in xhr){
var responseHeaders = xhr.getAllResponseHeaders();
var parsedHeaders;
var parseHeaders = function(){
if(parsedHeaders){return;}
parsedHeaders = {};
$.each(responseHeaders.split("\n"), function(i, headerLine){
var delimiter = headerLine.indexOf(":");
parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2);
});
};
$.extend(cache[o.xhrID], {
getAllResponseHeaders: function() {return responseHeaders;},
getResponseHeader: (function(){
var parsedHeaders = {};
$.each(responseHeaders.split("\n"), function(i, headerLine){
var delimiter = headerLine.indexOf(":");
parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2);
});
return function(name) {return parsedHeaders[name];};
}())
getResponseHeader: function(name) {
parseHeaders();
return (name in parsedHeaders) ? parsedHeaders[name] : null;
}
});
}
}
@ -284,6 +313,8 @@
domCompleteTrigger: false,
domSuccessTrigger: false,
preventDoubbleRequests: true,
queueDuplicateRequests: false,
cacheTTL: -1,
queue: false // true, false, clear
};

View file

@ -85,8 +85,8 @@ NEWSBLUR.AssetModel.Reader.prototype = {
callback(o);
}
},
error: function(e) {
// NEWSBLUR.log(['AJAX Error', e]);
error: function(e, textStatus, errorThrown) {
NEWSBLUR.log(['AJAX Error', textStatus, errorThrown]);
if ($.isFunction(error_callback)) {
error_callback();
} else if ($.isFunction(callback)) {
@ -187,6 +187,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
var self = this;
var pre_callback = function(subscriptions) {
NEWSBLUR.log(['subscriptions', subscriptions]);
var flat_feeds = function(feeds) {
var flattened = _.flatten(_.map(feeds, _.values));
return _.flatten(_.map(flattened, function(feed) {

View file

@ -246,19 +246,20 @@ INSTALLED_APPS = (
'apps.analyzer',
'apps.feed_import',
'apps.profile',
'devserver',
'south',
# 'test_utils',
'utils',
'utils.typogrify',
'utils.paypal.standard.ipn',
# 'debug_toolbar'
)
if not DEVELOPMENT:
INSTALLED_APPS += (
'gunicorn',
)
elif DEVELOPMENT:
INSTALLED_APPS += (
'devserver',
)
DEVSERVER_MODULES = (
'devserver.modules.sql.SQLRealTimeModule',

View file

@ -39,7 +39,7 @@ class FetchFeed:
self.options = options
self.fpf = None
@timelimit(30)
@timelimit(20)
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.

View file

@ -1,8 +1,10 @@
import datetime
import threading
import sys
import traceback
from django.utils.translation import ungettext
from utils import feedfinder
from utils import log as logging
class TimeoutError(Exception): pass
def timelimit(timeout):
@ -29,6 +31,8 @@ def timelimit(timeout):
if c.isAlive():
raise TimeoutError, 'took too long'
if c.error:
tb = ''.join(traceback.format_exception(c.error[0], c.error[1], c.error[2]))
logging.debug(tb)
raise c.error[0], c.error[1]
return c.result
return _2

View file

@ -48,37 +48,6 @@ _debug = 0
import sgmllib, urllib, urlparse, re, sys, robotparser
import threading
class TimeoutError(Exception): pass
def timelimit(timeout):
"""borrowed from web.py"""
def _1(function):
def _2(*args, **kw):
class Dispatch(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.result = None
self.error = None
self.setDaemon(True)
self.start()
def run(self):
try:
self.result = function(*args, **kw)
except:
self.error = sys.exc_info()
c = Dispatch()
c.join(timeout)
if c.isAlive():
raise TimeoutError, 'took too long'
if c.error:
raise c.error[0], c.error[1]
return c.result
return _2
return _1
# XML-RPC support allows feedfinder to query Syndic8 for possible matches.
# Python 2.3 now comes with this module by default, otherwise you can download it
try:
@ -128,7 +97,6 @@ class URLGatekeeper:
_debuglog("gatekeeper of %s says %s" % (url, allow))
return allow
@timelimit(10)
def get(self, url, check=True):
if check and not self.can_fetch(url): return ''
try: