From 22cf5ede6caa1d5739c493ea9ace5a34c67c2e0d Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Wed, 2 Feb 2011 13:07:12 -0500 Subject: [PATCH] Crazy refactor of feedfinder to extract timelimits, so they can be caught by a lower-level api instead of messing up the feed fetcher. --- apps/rss_feeds/models.py | 58 +++++++++++++---------- apps/rss_feeds/page_importer.py | 2 +- media/js/jquery.ajaxmanager.3.js | 81 ++++++++++++++++++++++---------- media/js/newsblur/assetmodel.js | 5 +- settings.py | 7 +-- utils/feed_fetcher.py | 2 +- utils/feed_functions.py | 4 ++ utils/feedfinder.py | 32 ------------- 8 files changed, 102 insertions(+), 89 deletions(-) diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index a9b8c83f5..2653f83c7 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -21,7 +21,7 @@ from utils import json_functions as json from utils import feedfinder from utils.fields import AutoOneToOneField from utils.feed_functions import levenshtein_distance -from utils.feed_functions import timelimit +from utils.feed_functions import timelimit, TimeoutError from utils.story_functions import pre_process_story from utils.diff import HTMLDiff from utils import log as logging @@ -106,32 +106,40 @@ class Feed(models.Model): self.count_subscribers() self.set_next_scheduled_update() - @timelimit(20) def check_feed_address_for_feed_link(self): - feed_address = None - - if not feedfinder.isFeed(self.feed_address): - feed_address = feedfinder.feed(self.feed_address) - if not feed_address: - feed_address = feedfinder.feed(self.feed_link) - else: - feed_address_from_link = feedfinder.feed(self.feed_link) - if feed_address_from_link != self.feed_address: - feed_address = feed_address_from_link + @timelimit(10) + def _1(): + feed_address = None + if not feedfinder.isFeed(self.feed_address): + feed_address = feedfinder.feed(self.feed_address) + if not feed_address and self.feed_link: + feed_address = feedfinder.feed(self.feed_link) + else: + feed_address_from_link = feedfinder.feed(self.feed_link) + if feed_address_from_link != self.feed_address: + feed_address = feed_address_from_link - if feed_address: - try: - self.feed_address = feed_address - self.next_scheduled_update = datetime.datetime.utcnow() - self.has_feed_exception = False - self.active = True - self.save() - except IntegrityError: - original_feed = Feed.objects.get(feed_address=feed_address) - original_feed.has_feed_exception = False - original_feed.active = True - original_feed.save() - merge_feeds(original_feed.pk, self.pk) + if feed_address: + try: + self.feed_address = feed_address + self.next_scheduled_update = datetime.datetime.utcnow() + self.has_feed_exception = False + self.active = True + self.save() + except IntegrityError: + original_feed = Feed.objects.get(feed_address=feed_address) + original_feed.has_feed_exception = False + original_feed.active = True + original_feed.save() + merge_feeds(original_feed.pk, self.pk) + return feed_address + + try: + feed_address = _1() + except TimeoutError: + logging.debug(' ---> [%-30s] Feed address check timed out...' % (unicode(self.feed_title)[:30])) + self.save_feed_history(505, 'Timeout', '') + feed_address = None return not not feed_address diff --git a/apps/rss_feeds/page_importer.py b/apps/rss_feeds/page_importer.py index b0407a039..451745c13 100644 --- a/apps/rss_feeds/page_importer.py +++ b/apps/rss_feeds/page_importer.py @@ -18,7 +18,7 @@ class PageImporter(object): self.url = url self.feed = feed - @timelimit(30) + @timelimit(15) def fetch_page(self): if not self.url: return diff --git a/media/js/jquery.ajaxmanager.3.js b/media/js/jquery.ajaxmanager.3.js index ffc6f44e3..4b70fe315 100644 --- a/media/js/jquery.ajaxmanager.3.js +++ b/media/js/jquery.ajaxmanager.3.js @@ -2,12 +2,13 @@ * project-site: http://plugins.jquery.com/project/AjaxManager * repository: http://github.com/aFarkas/Ajaxmanager * @author Alexander Farkas - * @version 3.06 + * @version 3.10 * Copyright 2010, Alexander Farkas * Dual licensed under the MIT or GPL Version 2 licenses. */ (function($){ + "use strict"; var managed = {}, cache = {} ; @@ -58,7 +59,14 @@ that = this, ajaxFn = this._createAjax(xhrID, o, origSuc, origCom) ; - + if(o.preventDoubbleRequests && o.queueDuplicateRequests){ + if(o.preventDoubbleRequests){ + o.queueDuplicateRequests = false; + } + setTimeout(function(){ + throw("preventDoubbleRequests and queueDuplicateRequests can't be both true"); + }, 0); + } if(this.requests[xhrID] && o.preventDoubbleRequests){ return; } @@ -106,9 +114,9 @@ $(document).clearQueue(this.qName); } - if(o.queue){ + if(o.queue || (o.queueDuplicateRequests && this.requests[xhrID])){ $.queue(document, this.qName, ajaxFn); - if(this.inProgress < o.maxRequests){ + if(this.inProgress < o.maxRequests && (!this.requests[xhrID] || !o.queueDuplicateRequests)){ $.dequeue(document, this.qName); } return xhrID; @@ -124,12 +132,17 @@ $.event.trigger(that.name +'AjaxStart'); } if(o.cacheResponse && cache[id]){ - that.requests[id] = {}; - setTimeout(function(){ - that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o); - that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o); - }, 0); - } else { + if(!cache[id].cacheTTL || cache[id].cacheTTL < 0 || ((new Date().getTime() - cache[id].timestamp) < cache[id].cacheTTL)){ + that.requests[id] = {}; + setTimeout(function(){ + that._success.call(that, o.context || o, origSuc, cache[id]._successData, 'success', cache[id], o); + that._complete.call(that, o.context || o, origCom, cache[id], 'success', id, o); + }, 0); + } else { + delete cache[id]; + } + } + if(!o.cacheResponse || !cache[id]) { if (o.async) { that.requests[id] = $.ajax(o); } else { @@ -140,20 +153,26 @@ }; }, _removeXHR: function(xhrID){ - if(this.opts.queue){ + if(this.opts.queue || this.opts.queueDuplicateRequests){ $.dequeue(document, this.qName); } this.inProgress--; this.requests[xhrID] = null; delete this.requests[xhrID]; }, - _isAbort: function(xhr, o){ - var ret = !!( o.abortIsNoSuccess && ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) ); + clearCache: function () { + cache = {}; + }, + _isAbort: function(xhr, status, o){ + if(!o.abortIsNoSuccess || (!xhr && !status)){ + return false; + } + var ret = !!( ( !xhr || xhr.readyState === 0 || this.lastAbort === o.xhrID ) ); xhr = null; return ret; }, _complete: function(context, origFn, xhr, status, xhrID, o){ - if(this._isAbort(xhr, o)){ + if(this._isAbort(xhr, status, o)){ status = 'abort'; o.abort.call(context, xhr, status, o); } @@ -176,7 +195,7 @@ }, _success: function(context, origFn, data, status, xhr, o){ var that = this; - if(this._isAbort(xhr, o)){ + if(this._isAbort(xhr, status, o)){ xhr = null; return; } @@ -189,25 +208,35 @@ }); } if(o.cacheResponse && !cache[o.xhrID]){ + if(!xhr){ + xhr = {}; + } cache[o.xhrID] = { status: xhr.status, statusText: xhr.statusText, responseText: xhr.responseText, responseXML: xhr.responseXML, - _successData: data + _successData: data, + cacheTTL: o.cacheTTL, + timestamp: new Date().getTime() }; - if(xhr.getAllResponseHeaders){ + if('getAllResponseHeaders' in xhr){ var responseHeaders = xhr.getAllResponseHeaders(); + var parsedHeaders; + var parseHeaders = function(){ + if(parsedHeaders){return;} + parsedHeaders = {}; + $.each(responseHeaders.split("\n"), function(i, headerLine){ + var delimiter = headerLine.indexOf(":"); + parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2); + }); + }; $.extend(cache[o.xhrID], { getAllResponseHeaders: function() {return responseHeaders;}, - getResponseHeader: (function(){ - var parsedHeaders = {}; - $.each(responseHeaders.split("\n"), function(i, headerLine){ - var delimiter = headerLine.indexOf(":"); - parsedHeaders[headerLine.substr(0, delimiter)] = headerLine.substr(delimiter + 2); - }); - return function(name) {return parsedHeaders[name];}; - }()) + getResponseHeader: function(name) { + parseHeaders(); + return (name in parsedHeaders) ? parsedHeaders[name] : null; + } }); } } @@ -284,6 +313,8 @@ domCompleteTrigger: false, domSuccessTrigger: false, preventDoubbleRequests: true, + queueDuplicateRequests: false, + cacheTTL: -1, queue: false // true, false, clear }; diff --git a/media/js/newsblur/assetmodel.js b/media/js/newsblur/assetmodel.js index 35ef2be37..31b39678d 100644 --- a/media/js/newsblur/assetmodel.js +++ b/media/js/newsblur/assetmodel.js @@ -85,8 +85,8 @@ NEWSBLUR.AssetModel.Reader.prototype = { callback(o); } }, - error: function(e) { - // NEWSBLUR.log(['AJAX Error', e]); + error: function(e, textStatus, errorThrown) { + NEWSBLUR.log(['AJAX Error', textStatus, errorThrown]); if ($.isFunction(error_callback)) { error_callback(); } else if ($.isFunction(callback)) { @@ -187,6 +187,7 @@ NEWSBLUR.AssetModel.Reader.prototype = { var self = this; var pre_callback = function(subscriptions) { + NEWSBLUR.log(['subscriptions', subscriptions]); var flat_feeds = function(feeds) { var flattened = _.flatten(_.map(feeds, _.values)); return _.flatten(_.map(flattened, function(feed) { diff --git a/settings.py b/settings.py index 8f4664bbf..dcf2665cf 100644 --- a/settings.py +++ b/settings.py @@ -246,19 +246,20 @@ INSTALLED_APPS = ( 'apps.analyzer', 'apps.feed_import', 'apps.profile', - 'devserver', 'south', - # 'test_utils', 'utils', 'utils.typogrify', 'utils.paypal.standard.ipn', - # 'debug_toolbar' ) if not DEVELOPMENT: INSTALLED_APPS += ( 'gunicorn', ) +elif DEVELOPMENT: + INSTALLED_APPS += ( + 'devserver', + ) DEVSERVER_MODULES = ( 'devserver.modules.sql.SQLRealTimeModule', diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 75b26c14b..0a0167741 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -39,7 +39,7 @@ class FetchFeed: self.options = options self.fpf = None - @timelimit(30) + @timelimit(20) def fetch(self): """ Uses feedparser to download the feed. Will be parsed later. diff --git a/utils/feed_functions.py b/utils/feed_functions.py index 334a31814..8d6bb209e 100644 --- a/utils/feed_functions.py +++ b/utils/feed_functions.py @@ -1,8 +1,10 @@ import datetime import threading import sys +import traceback from django.utils.translation import ungettext from utils import feedfinder +from utils import log as logging class TimeoutError(Exception): pass def timelimit(timeout): @@ -29,6 +31,8 @@ def timelimit(timeout): if c.isAlive(): raise TimeoutError, 'took too long' if c.error: + tb = ''.join(traceback.format_exception(c.error[0], c.error[1], c.error[2])) + logging.debug(tb) raise c.error[0], c.error[1] return c.result return _2 diff --git a/utils/feedfinder.py b/utils/feedfinder.py index a3abe95c5..13a3373e7 100644 --- a/utils/feedfinder.py +++ b/utils/feedfinder.py @@ -48,37 +48,6 @@ _debug = 0 import sgmllib, urllib, urlparse, re, sys, robotparser -import threading -class TimeoutError(Exception): pass -def timelimit(timeout): - """borrowed from web.py""" - def _1(function): - def _2(*args, **kw): - class Dispatch(threading.Thread): - def __init__(self): - threading.Thread.__init__(self) - self.result = None - self.error = None - - self.setDaemon(True) - self.start() - - def run(self): - try: - self.result = function(*args, **kw) - except: - self.error = sys.exc_info() - - c = Dispatch() - c.join(timeout) - if c.isAlive(): - raise TimeoutError, 'took too long' - if c.error: - raise c.error[0], c.error[1] - return c.result - return _2 - return _1 - # XML-RPC support allows feedfinder to query Syndic8 for possible matches. # Python 2.3 now comes with this module by default, otherwise you can download it try: @@ -128,7 +97,6 @@ class URLGatekeeper: _debuglog("gatekeeper of %s says %s" % (url, allow)) return allow - @timelimit(10) def get(self, url, check=True): if check and not self.can_fetch(url): return '' try: