mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Crazy refactor of feedfinder to extract timelimits, so they can be caught by a lower-level api instead of messing up the feed fetcher.
This commit is contained in:
parent
cd2ec335b7
commit
22cf5ede6c
8 changed files with 102 additions and 89 deletions
|
@ -21,7 +21,7 @@ from utils import json_functions as json
|
|||
from utils import feedfinder
|
||||
from utils.fields import AutoOneToOneField
|
||||
from utils.feed_functions import levenshtein_distance
|
||||
from utils.feed_functions import timelimit
|
||||
from utils.feed_functions import timelimit, TimeoutError
|
||||
from utils.story_functions import pre_process_story
|
||||
from utils.diff import HTMLDiff
|
||||
from utils import log as logging
|
||||
|
@ -106,32 +106,40 @@ class Feed(models.Model):
|
|||
self.count_subscribers()
|
||||
self.set_next_scheduled_update()
|
||||
|
||||
@timelimit(20)
|
||||
def check_feed_address_for_feed_link(self):
|
||||
feed_address = None
|
||||
|
||||
if not feedfinder.isFeed(self.feed_address):
|
||||
feed_address = feedfinder.feed(self.feed_address)
|
||||
if not feed_address:
|
||||
feed_address = feedfinder.feed(self.feed_link)
|
||||
else:
|
||||
feed_address_from_link = feedfinder.feed(self.feed_link)
|
||||
if feed_address_from_link != self.feed_address:
|
||||
feed_address = feed_address_from_link
|
||||
@timelimit(10)
|
||||
def _1():
|
||||
feed_address = None
|
||||
if not feedfinder.isFeed(self.feed_address):
|
||||
feed_address = feedfinder.feed(self.feed_address)
|
||||
if not feed_address and self.feed_link:
|
||||
feed_address = feedfinder.feed(self.feed_link)
|
||||
else:
|
||||
feed_address_from_link = feedfinder.feed(self.feed_link)
|
||||
if feed_address_from_link != self.feed_address:
|
||||
feed_address = feed_address_from_link
|
||||
|
||||
if feed_address:
|
||||
try:
|
||||
self.feed_address = feed_address
|
||||
self.next_scheduled_update = datetime.datetime.utcnow()
|
||||
self.has_feed_exception = False
|
||||
self.active = True
|
||||
self.save()
|
||||
except IntegrityError:
|
||||
original_feed = Feed.objects.get(feed_address=feed_address)
|
||||
original_feed.has_feed_exception = False
|
||||
original_feed.active = True
|
||||
original_feed.save()
|
||||
merge_feeds(original_feed.pk, self.pk)
|
||||
if feed_address:
|
||||
try:
|
||||
self.feed_address = feed_address
|
||||
self.next_scheduled_update = datetime.datetime.utcnow()
|
||||
self.has_feed_exception = False
|
||||
self.active = True
|
||||
self.save()
|
||||
except IntegrityError:
|
||||
original_feed = Feed.objects.get(feed_address=feed_address)
|
||||
original_feed.has_feed_exception = False
|
||||
original_feed.active = True
|
||||
original_feed.save()
|
||||
merge_feeds(original_feed.pk, self.pk)
|
||||
return feed_address
|
||||
|
||||
try:
|
||||
feed_address = _1()
|
||||
except TimeoutError:
|
||||
logging.debug(' ---> [%-30s] Feed address check timed out...' % (unicode(self.feed_title)[:30]))
|
||||
self.save_feed_history(505, 'Timeout', '')
|
||||
feed_address = None
|
||||
|
||||
return not not feed_address
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ class PageImporter(object):
|
|||
self.url = url
|
||||
self.feed = feed
|
||||
|
||||
@timelimit(30)
|
||||
@timelimit(15)
|
||||
def fetch_page(self):
|
||||
if not self.url:
|
||||
return
|
||||
|
|
|
@ -2,12 +2,13 @@
|
|||
* project-site: http://plugins.jquery.com/project/AjaxManager
|
||||
* repository: http://github.com/aFarkas/Ajaxmanager
|
||||
* @author Alexander Farkas
|
||||
* @version 3.06
|
||||
* @version 3.10
|
||||
* Copyright 2010, Alexander Farkas
|
||||
* Dual licensed under the MIT or GPL Version 2 licenses.
|
||||
*/
|
||||
|
||||
(function($){
|
||||
"use strict";
|
||||
var managed = {},
|
||||
cache = {}
|
||||
;
|
||||
|
@ -58,7 +59,14 @@
|
|||
that = this,
|
||||
ajaxFn = this._createAjax(xhrID, o, origSuc, origCom)
|
||||
;
|
||||
|
||||
if(o.preventDoubbleRequests && o.queueDuplicateRequests){
|
||||
if(o.preventDoubbleRequests){
|
||||
o.queueDuplicateRequests = false;
|
||||
}
|
||||
setTimeout(function(){
|
||||
throw("preventDoubbleRequests and queueDuplicateRequests can't be both true");
|
||||
}, 0);
|
||||
}
|
||||
if(this.requests[xhrID] && o.preventDoubbleRequests){
|
||||
return;
|
||||
}
|
||||
|
@ -106,9 +114,9 @@
|
|||
$(document).clearQueue(this.qName);
|
||||
}
|
||||
|
||||
if(o.queue){
|
||||
if(o.queue || (o.queueDuplicateRequests && this.requests[xhrID])){
|
||||
$.queue(document, this.qName, ajaxFn);
|
||||
if(this.inProgress < o.maxRequests){
|
||||
if(this.inProgress < o.maxRequests && (!this.requests[xhrID] || !o.queueDuplicateRequests)){
|
||||
$.dequeue(document, this.qName);
|
||||
}
|
||||
return xhrID;
|
||||
|
@ -124,12 +132,17 @@
|
|||
$.event.trigger(that.name +'AjaxStart');
|
||||
}
|
||||
if(o.cacheResponse && cache[id]){
|
||||
that.requests[id] = {};
|
||||
setTimeout(function(){
|
||||
that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o);
|
||||
that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o);
|
||||
}, 0);
|
||||
} else {
|
||||
if(!cache[id].cacheTTL || cache[id].cacheTTL < 0 || ((new Date().getTime() - cache[id].timestamp) < cache[id].cacheTTL)){
|
||||
that.requests[id] = {};
|
||||
setTimeout(function(){
|
||||
that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o);
|
||||
that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o);
|
||||
}, 0);
|
||||
} else {
|
||||
delete cache[id];
|
||||
}
|
||||
}
|
||||
if(!o.cacheResponse || !cache[id]) {
|
||||
if (o.async) {
|
||||
that.requests[id] = $.ajax(o);
|
||||
} else {
|
||||
|
@ -140,20 +153,26 @@
|
|||
};
|
||||
},
|
||||
_removeXHR: function(xhrID){
|
||||
if(this.opts.queue){
|
||||
if(this.opts.queue || this.opts.queueDuplicateRequests){
|
||||
$.dequeue(document, this.qName);
|
||||
}
|
||||
this.inProgress--;
|
||||
this.requests[xhrID] = null;
|
||||
delete this.requests[xhrID];
|
||||
},
|
||||
_isAbort: function(xhr, o){
|
||||
var ret = !!( o.abortIsNoSuccess && ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) );
|
||||
clearCache: function () {
|
||||
cache = {};
|
||||
},
|
||||
_isAbort: function(xhr, status, o){
|
||||
if(!o.abortIsNoSuccess || (!xhr && !status)){
|
||||
return false;
|
||||
}
|
||||
var ret = !!( ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) );
|
||||
xhr = null;
|
||||
return ret;
|
||||
},
|
||||
_complete: function(context, origFn, xhr, status, xhrID, o){
|
||||
if(this._isAbort(xhr, o)){
|
||||
if(this._isAbort(xhr, status, o)){
|
||||
status = 'abort';
|
||||
o.abort.call(context, xhr, status, o);
|
||||
}
|
||||
|
@ -176,7 +195,7 @@
|
|||
},
|
||||
_success: function(context, origFn, data, status, xhr, o){
|
||||
var that = this;
|
||||
if(this._isAbort(xhr, o)){
|
||||
if(this._isAbort(xhr, status, o)){
|
||||
xhr = null;
|
||||
return;
|
||||
}
|
||||
|
@ -189,25 +208,35 @@
|
|||
});
|
||||
}
|
||||
if(o.cacheResponse && !cache[o.xhrID]){
|
||||
if(!xhr){
|
||||
xhr = {};
|
||||
}
|
||||
cache[o.xhrID] = {
|
||||
status: xhr.status,
|
||||
statusText: xhr.statusText,
|
||||
responseText: xhr.responseText,
|
||||
responseXML: xhr.responseXML,
|
||||
_successData: data
|
||||
_successData: data,
|
||||
cacheTTL: o.cacheTTL,
|
||||
timestamp: new Date().getTime()
|
||||
};
|
||||
if(xhr.getAllResponseHeaders){
|
||||
if('getAllResponseHeaders' in xhr){
|
||||
var responseHeaders = xhr.getAllResponseHeaders();
|
||||
var parsedHeaders;
|
||||
var parseHeaders = function(){
|
||||
if(parsedHeaders){return;}
|
||||
parsedHeaders = {};
|
||||
$.each(responseHeaders.split("\n"), function(i, headerLine){
|
||||
var delimiter = headerLine.indexOf(":");
|
||||
parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2);
|
||||
});
|
||||
};
|
||||
$.extend(cache[o.xhrID], {
|
||||
getAllResponseHeaders: function() {return responseHeaders;},
|
||||
getResponseHeader: (function(){
|
||||
var parsedHeaders = {};
|
||||
$.each(responseHeaders.split("\n"), function(i, headerLine){
|
||||
var delimiter = headerLine.indexOf(":");
|
||||
parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2);
|
||||
});
|
||||
return function(name) {return parsedHeaders[name];};
|
||||
}())
|
||||
getResponseHeader: function(name) {
|
||||
parseHeaders();
|
||||
return (name in parsedHeaders) ? parsedHeaders[name] : null;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -284,6 +313,8 @@
|
|||
domCompleteTrigger: false,
|
||||
domSuccessTrigger: false,
|
||||
preventDoubbleRequests: true,
|
||||
queueDuplicateRequests: false,
|
||||
cacheTTL: -1,
|
||||
queue: false // true, false, clear
|
||||
};
|
||||
|
||||
|
|
|
@ -85,8 +85,8 @@ NEWSBLUR.AssetModel.Reader.prototype = {
|
|||
callback(o);
|
||||
}
|
||||
},
|
||||
error: function(e) {
|
||||
// NEWSBLUR.log(['AJAX Error', e]);
|
||||
error: function(e, textStatus, errorThrown) {
|
||||
NEWSBLUR.log(['AJAX Error', textStatus, errorThrown]);
|
||||
if ($.isFunction(error_callback)) {
|
||||
error_callback();
|
||||
} else if ($.isFunction(callback)) {
|
||||
|
@ -187,6 +187,7 @@ NEWSBLUR.AssetModel.Reader.prototype = {
|
|||
var self = this;
|
||||
|
||||
var pre_callback = function(subscriptions) {
|
||||
NEWSBLUR.log(['subscriptions', subscriptions]);
|
||||
var flat_feeds = function(feeds) {
|
||||
var flattened = _.flatten(_.map(feeds, _.values));
|
||||
return _.flatten(_.map(flattened, function(feed) {
|
||||
|
|
|
@ -246,19 +246,20 @@ INSTALLED_APPS = (
|
|||
'apps.analyzer',
|
||||
'apps.feed_import',
|
||||
'apps.profile',
|
||||
'devserver',
|
||||
'south',
|
||||
# 'test_utils',
|
||||
'utils',
|
||||
'utils.typogrify',
|
||||
'utils.paypal.standard.ipn',
|
||||
# 'debug_toolbar'
|
||||
)
|
||||
|
||||
if not DEVELOPMENT:
|
||||
INSTALLED_APPS += (
|
||||
'gunicorn',
|
||||
)
|
||||
elif DEVELOPMENT:
|
||||
INSTALLED_APPS += (
|
||||
'devserver',
|
||||
)
|
||||
|
||||
DEVSERVER_MODULES = (
|
||||
'devserver.modules.sql.SQLRealTimeModule',
|
||||
|
|
|
@ -39,7 +39,7 @@ class FetchFeed:
|
|||
self.options = options
|
||||
self.fpf = None
|
||||
|
||||
@timelimit(30)
|
||||
@timelimit(20)
|
||||
def fetch(self):
|
||||
"""
|
||||
Uses feedparser to download the feed. Will be parsed later.
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
import datetime
|
||||
import threading
|
||||
import sys
|
||||
import traceback
|
||||
from django.utils.translation import ungettext
|
||||
from utils import feedfinder
|
||||
from utils import log as logging
|
||||
|
||||
class TimeoutError(Exception): pass
|
||||
def timelimit(timeout):
|
||||
|
@ -29,6 +31,8 @@ def timelimit(timeout):
|
|||
if c.isAlive():
|
||||
raise TimeoutError, 'took too long'
|
||||
if c.error:
|
||||
tb = ''.join(traceback.format_exception(c.error[0], c.error[1], c.error[2]))
|
||||
logging.debug(tb)
|
||||
raise c.error[0], c.error[1]
|
||||
return c.result
|
||||
return _2
|
||||
|
|
|
@ -48,37 +48,6 @@ _debug = 0
|
|||
|
||||
import sgmllib, urllib, urlparse, re, sys, robotparser
|
||||
|
||||
import threading
|
||||
class TimeoutError(Exception): pass
|
||||
def timelimit(timeout):
|
||||
"""borrowed from web.py"""
|
||||
def _1(function):
|
||||
def _2(*args, **kw):
|
||||
class Dispatch(threading.Thread):
|
||||
def __init__(self):
|
||||
threading.Thread.__init__(self)
|
||||
self.result = None
|
||||
self.error = None
|
||||
|
||||
self.setDaemon(True)
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.result = function(*args, **kw)
|
||||
except:
|
||||
self.error = sys.exc_info()
|
||||
|
||||
c = Dispatch()
|
||||
c.join(timeout)
|
||||
if c.isAlive():
|
||||
raise TimeoutError, 'took too long'
|
||||
if c.error:
|
||||
raise c.error[0], c.error[1]
|
||||
return c.result
|
||||
return _2
|
||||
return _1
|
||||
|
||||
# XML-RPC support allows feedfinder to query Syndic8 for possible matches.
|
||||
# Python 2.3 now comes with this module by default, otherwise you can download it
|
||||
try:
|
||||
|
@ -128,7 +97,6 @@ class URLGatekeeper:
|
|||
_debuglog("gatekeeper of %s says %s" % (url, allow))
|
||||
return allow
|
||||
|
||||
@timelimit(10)
|
||||
def get(self, url, check=True):
|
||||
if check and not self.can_fetch(url): return ''
|
||||
try:
|
||||
|
|
Loading…
Add table
Reference in a new issue