Adding a duplicate feed table so future duplicates will be caught and corrected.

This commit is contained in:
Samuel Clay 2010-08-19 10:43:07 -04:00
parent 5e438ac439
commit 7e40103a4c
6 changed files with 176 additions and 9 deletions

View file

@ -1,7 +1,7 @@
from collections import defaultdict
from django.db import models
from django.contrib.auth.models import User
from apps.rss_feeds.models import Feed
from apps.rss_feeds.models import Feed, DuplicateFeed
from apps.reader.models import UserSubscription, UserSubscriptionFolders
import datetime
import lxml.etree
@ -60,7 +60,15 @@ class OPMLImporter(Importer):
logging.info(' ---> \t%s - %s - %s' % (feed.title, feed_link, feed_address,))
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
# feeds.append(feed_data)
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address, defaults=dict(**feed_data))
# See if it exists as a duplicate first
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
if duplicate_feed:
feed_db = duplicate_feed[0].feed
else:
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
defaults=dict(**feed_data))
us, _ = UserSubscription.objects.get_or_create(
feed=feed_db,
user=self.user,
@ -113,8 +121,15 @@ class GoogleReaderImporter(Importer):
feed_link = urlnorm.normalize(feed_link)
feed_address = urlnorm.normalize(feed_address)
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed_title)
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address, defaults=dict(**feed_data))
# See if it exists as a duplicate first
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
if duplicate_feed:
feed_db = duplicate_feed[0].feed
else:
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed_title)
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
defaults=dict(**feed_data))
us, _ = UserSubscription.objects.get_or_create(
feed=feed_db,
user=self.user,
@ -135,6 +150,6 @@ class GoogleReaderImporter(Importer):
if folder == 'Root':
self.subscription_folders += items
else:
folder_parents = folder.split(u' \u2014 ')
# folder_parents = folder.split(u' \u2014 ')
self.subscription_folders.append({folder: items})

View file

@ -9,6 +9,7 @@ from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
from utils.compressed_textfield import StoryField
DAYS_OF_UNREAD = 14
MONTH_AGO = datetime.datetime.now() - datetime.timedelta(days=30)
class UserSubscription(models.Model):
"""
@ -56,6 +57,11 @@ class UserSubscription(models.Model):
self.save()
def calculate_feed_scores(self, silent=False):
if self.user.profile.last_seen_on < MONTH_AGO:
if not silent:
logging.info(' ---> [%s] SKIPPING Computing scores: %s' % (self.user, self.feed))
return
if not self.feed.fetched_once:
if not silent:
logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))

View file

@ -17,7 +17,7 @@ from apps.analyzer.models import get_classifiers_for_user
from apps.reader.models import UserSubscription, UserSubscriptionFolders, UserStory, Feature
from apps.reader.forms import SignupForm, LoginForm, FeatureForm
try:
from apps.rss_feeds.models import Feed, Story, FeedPage
from apps.rss_feeds.models import Feed, Story, FeedPage, DuplicateFeed
except:
pass
from utils import json, urlnorm
@ -407,8 +407,13 @@ def add_url(request):
if url:
url = urlnorm.normalize(url)
feed = Feed.objects.filter(Q(feed_address=url)
| Q(feed_link__icontains=url))
# See if it exists as a duplicate first
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=url)
if duplicate_feed:
feed = [duplicate_feed[0].feed]
else:
feed = Feed.objects.filter(Q(feed_address=url)
| Q(feed_link__icontains=url))
if feed:
feed = feed[0]

View file

@ -1,5 +1,5 @@
from django.core.management.base import BaseCommand
from apps.rss_feeds.models import Feed, Story, Tag, StoryAuthor
from apps.rss_feeds.models import Feed, Story, Tag, StoryAuthor, DuplicateFeed
from apps.reader.models import UserSubscription, UserStory, UserSubscriptionFolders
from apps.analyzer.models import FeatureCategory, Category, ClassifierTitle
from apps.analyzer.models import ClassifierAuthor, ClassifierFeed, ClassifierTag
@ -121,6 +121,14 @@ class Command(BaseCommand):
switch_feed(ClassifierFeed)
switch_feed(ClassifierTag)
try:
DuplicateFeed.objects.create(
duplicate_address=duplicate_feed.feed_address,
feed=original_feed
)
except IntegrityError:
pass
duplicate_feed.delete()
def rewrite_folders(self, folders, original_feed, duplicate_feed):

View file

@ -0,0 +1,128 @@
# encoding: utf-8
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding model 'DuplicateFeed'
db.create_table('rss_feeds_duplicatefeed', (
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('duplicate_address', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255)),
('feed', self.gf('django.db.models.fields.related.ForeignKey')(related_name='duplicate_addresses', to=orm['rss_feeds.Feed'])),
))
db.send_create_signal('rss_feeds', ['DuplicateFeed'])
def backwards(self, orm):
# Deleting model 'DuplicateFeed'
db.delete_table('rss_feeds_duplicatefeed')
models = {
'rss_feeds.duplicatefeed': {
'Meta': {'object_name': 'DuplicateFeed'},
'duplicate_address': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.feed': {
'Meta': {'object_name': 'Feed', 'db_table': "'feeds'"},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}),
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
'etag': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
'feed_address': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255'}),
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
'feed_tagline': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'feed_title': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
'has_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
'last_update': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '15'}),
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedfetchhistory': {
'Meta': {'object_name': 'FeedFetchHistory'},
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'feed_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedpage': {
'Meta': {'object_name': 'FeedPage'},
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_page'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'page_data': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.feedupdatehistory': {
'Meta': {'object_name': 'FeedUpdateHistory'},
'average_per_feed': ('django.db.models.fields.DecimalField', [], {'max_digits': '4', 'decimal_places': '1'}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'number_of_feeds': ('django.db.models.fields.IntegerField', [], {}),
'seconds_taken': ('django.db.models.fields.IntegerField', [], {})
},
'rss_feeds.feedxml': {
'Meta': {'object_name': 'FeedXML'},
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_xml'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'rss_xml': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
},
'rss_feeds.pagefetchhistory': {
'Meta': {'object_name': 'PageFetchHistory'},
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'page_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
},
'rss_feeds.story': {
'Meta': {'unique_together': "(('story_feed', 'story_guid_hash'),)", 'object_name': 'Story', 'db_table': "'stories'"},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'story_author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.StoryAuthor']"}),
'story_author_name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'null': 'True', 'blank': 'True'}),
'story_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
'story_content_type': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'story_date': ('django.db.models.fields.DateTimeField', [], {}),
'story_feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'stories'", 'to': "orm['rss_feeds.Feed']"}),
'story_guid': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
'story_guid_hash': ('django.db.models.fields.CharField', [], {'max_length': '40'}),
'story_original_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
'story_past_trim_date': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
'story_permalink': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
'story_tags': ('django.db.models.fields.CharField', [], {'max_length': '2000', 'null': 'True', 'blank': 'True'}),
'story_title': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['rss_feeds.Tag']", 'symmetrical': 'False'})
},
'rss_feeds.storyauthor': {
'Meta': {'object_name': 'StoryAuthor'},
'author_name': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
'rss_feeds.tag': {
'Meta': {'object_name': 'Tag'},
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'})
}
}
complete_apps = ['rss_feeds']

View file

@ -698,3 +698,8 @@ class PageFetchHistory(models.Model):
self.message,
self.exception[:50]
)
class DuplicateFeed(models.Model):
duplicate_address = models.CharField(max_length=255, unique=True)
feed = models.ForeignKey(Feed, related_name='duplicate_addresses')