From 08ef889e09267e652e56f012bf7ca6cdf373eceb Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 6 Jul 2010 13:21:12 -0400 Subject: [PATCH] Feed fetch history instrumentation. --- .../migrations/0006_feed_fetch_history.py | 111 ++++++++++++++++++ apps/rss_feeds/models.py | 27 ++++- utils/feed_fetcher.py | 10 +- 3 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 apps/rss_feeds/migrations/0006_feed_fetch_history.py diff --git a/apps/rss_feeds/migrations/0006_feed_fetch_history.py b/apps/rss_feeds/migrations/0006_feed_fetch_history.py new file mode 100644 index 000000000..97fd500d6 --- /dev/null +++ b/apps/rss_feeds/migrations/0006_feed_fetch_history.py @@ -0,0 +1,111 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Adding model 'FeedFetchHistory' + db.create_table('rss_feeds_feedfetchhistory', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('feed', self.gf('django.db.models.fields.related.ForeignKey')(related_name='fetch_history', to=orm['rss_feeds.Feed'])), + ('status_code', self.gf('django.db.models.fields.CharField')(max_length=10, null=True, blank=True)), + ('message', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)), + ('exception', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + ('fetch_date', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime.now)), + )) + db.send_create_signal('rss_feeds', ['FeedFetchHistory']) + + + def backwards(self, orm): + + # Deleting model 'FeedFetchHistory' + db.delete_table('rss_feeds_feedfetchhistory') + + + models = { + 'rss_feeds.feed': { + 'Meta': {'object_name': 'Feed', 'db_table': "'feeds'"}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), + 'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}), + 'etag': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}), + 'feed_address': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255'}), + 'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '200', 'null': 'True', 'blank': 'True'}), + 'feed_tagline': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'feed_title': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_update': ('django.db.models.fields.DateTimeField', [], {'default': '0', 'auto_now': 'True', 'blank': 'True'}), + 'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '15'}), + 'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}) + }, + 'rss_feeds.feedfetchhistory': { + 'Meta': {'object_name': 'FeedFetchHistory'}, + 'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'fetch_history'", 'to': "orm['rss_feeds.Feed']"}), + 'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'}) + }, + 'rss_feeds.feedpage': { + 'Meta': {'object_name': 'FeedPage'}, + 'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_page'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'page_data': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}) + }, + 'rss_feeds.feedupdatehistory': { + 'Meta': {'object_name': 'FeedUpdateHistory'}, + 'average_per_feed': ('django.db.models.fields.DecimalField', [], {'max_digits': '4', 'decimal_places': '1'}), + 'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'number_of_feeds': ('django.db.models.fields.IntegerField', [], {}), + 'seconds_taken': ('django.db.models.fields.IntegerField', [], {}) + }, + 'rss_feeds.feedxml': { + 'Meta': {'object_name': 'FeedXML'}, + 'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_xml'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'rss_xml': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}) + }, + 'rss_feeds.story': { + 'Meta': {'object_name': 'Story', 'db_table': "'stories'"}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'story_author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.StoryAuthor']"}), + 'story_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}), + 'story_content_type': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'story_date': ('django.db.models.fields.DateTimeField', [], {}), + 'story_feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'stories'", 'to': "orm['rss_feeds.Feed']"}), + 'story_guid': ('django.db.models.fields.CharField', [], {'max_length': '1000'}), + 'story_guid_hash': ('django.db.models.fields.CharField', [], {'max_length': '40'}), + 'story_original_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}), + 'story_past_trim_date': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), + 'story_permalink': ('django.db.models.fields.CharField', [], {'max_length': '1000'}), + 'story_tags': ('django.db.models.fields.CharField', [], {'max_length': '2000'}), + 'story_title': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['rss_feeds.Tag']", 'symmetrical': 'False'}) + }, + 'rss_feeds.storyauthor': { + 'Meta': {'object_name': 'StoryAuthor'}, + 'author_name': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + 'rss_feeds.tag': { + 'Meta': {'object_name': 'Tag'}, + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}) + } + } + + complete_apps = ['rss_feeds'] diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index d0ea1981e..8692af196 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -42,6 +42,15 @@ class Feed(models.Model): def __unicode__(self): return self.feed_title + def save_history(self, status_code, message, exception=None): + FeedFetchHistory.objects.create(feed=self, + status_code=status_code, + message=message, + exception=exception) + old_fetch_histories = self.fetch_history.all()[10:] + for history in old_fetch_histories: + history.delete() + def count_subscribers(self, verbose=False, lock=None): from apps.reader.models import UserSubscription subs = UserSubscription.objects.filter(feed=self) @@ -495,4 +504,20 @@ class FeedUpdateHistory(models.Model): def save(self, *args, **kwargs): self.average_per_feed = str(self.seconds_taken / float(max(1.0,self.number_of_feeds))) super(FeedUpdateHistory, self).save(*args, **kwargs) - \ No newline at end of file + +class FeedFetchHistory(models.Model): + feed = models.ForeignKey(Feed, related_name='fetch_history') + status_code = models.CharField(max_length=10, null=True, blank=True) + message = models.CharField(max_length=255, null=True, blank=True) + exception = models.TextField(null=True, blank=True) + fetch_date = models.DateTimeField(default=datetime.datetime.now) + + def __unicode__(self): + return "[%s] %s (%s): %s %s: %s" % ( + self.feed.id, + self.feed, + self.fetch_date, + self.status_code, + self.message, + self.exception[:50] + ) \ No newline at end of file diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index a9a96c372..2caeed271 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -64,6 +64,7 @@ class FetchFeed: self.feed.id) logging.info(log_msg) print(log_msg) + feed.save_history(201, "Already fetched") return FEED_SAME, None # we check the etag and the modified time to save bandwith and avoid bans @@ -75,6 +76,7 @@ class FetchFeed: log_msg = '! ERROR: TIMEOUT: %s' % e logging.error(log_msg) print(log_msg) + feed.save_history(300, "Timeout", e) return FEED_ERRPARSE, None @@ -86,9 +88,11 @@ class FetchFeed: log_msg = '! ERROR: feed cannot be parsed: %s' % e logging.error(log_msg) print(log_msg) + feed.save_history(301, "Parse error", e) return FEED_ERRPARSE, None + feed.save_history(200, "OK") return FEED_OK, self.fpf class ProcessFeed: @@ -285,12 +289,12 @@ class Dispatcher: page_importer = PageImporter(feed.feed_link, feed) page_importer.fetch_page() except: - (etype, eobj, etb) = sys.exc_info() print '[%d] ! -------------------------' % (feed.id,) - # print traceback.format_exception(etype, eobj, etb) - traceback.print_exception(etype, eobj, etb) + tb = traceback.format_exc() + print tb print '[%d] ! -------------------------' % (feed.id,) ret_feed = FEED_ERREXC + feed.save_history(500, "Error", tb) delta = datetime.datetime.now() - start_time if delta.seconds > SLOWFEED_WARNING: