mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Adding a duplicate feed table so future duplicates will be caught and corrected.
This commit is contained in:
parent
5e438ac439
commit
7e40103a4c
6 changed files with 176 additions and 9 deletions
|
@ -1,7 +1,7 @@
|
|||
from collections import defaultdict
|
||||
from django.db import models
|
||||
from django.contrib.auth.models import User
|
||||
from apps.rss_feeds.models import Feed
|
||||
from apps.rss_feeds.models import Feed, DuplicateFeed
|
||||
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
||||
import datetime
|
||||
import lxml.etree
|
||||
|
@ -60,7 +60,15 @@ class OPMLImporter(Importer):
|
|||
logging.info(' ---> \t%s - %s - %s' % (feed.title, feed_link, feed_address,))
|
||||
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
|
||||
# feeds.append(feed_data)
|
||||
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address, defaults=dict(**feed_data))
|
||||
|
||||
# See if it exists as a duplicate first
|
||||
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
||||
if duplicate_feed:
|
||||
feed_db = duplicate_feed[0].feed
|
||||
else:
|
||||
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
|
||||
defaults=dict(**feed_data))
|
||||
|
||||
us, _ = UserSubscription.objects.get_or_create(
|
||||
feed=feed_db,
|
||||
user=self.user,
|
||||
|
@ -113,8 +121,15 @@ class GoogleReaderImporter(Importer):
|
|||
feed_link = urlnorm.normalize(feed_link)
|
||||
feed_address = urlnorm.normalize(feed_address)
|
||||
|
||||
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed_title)
|
||||
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address, defaults=dict(**feed_data))
|
||||
# See if it exists as a duplicate first
|
||||
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
||||
if duplicate_feed:
|
||||
feed_db = duplicate_feed[0].feed
|
||||
else:
|
||||
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed_title)
|
||||
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
|
||||
defaults=dict(**feed_data))
|
||||
|
||||
us, _ = UserSubscription.objects.get_or_create(
|
||||
feed=feed_db,
|
||||
user=self.user,
|
||||
|
@ -135,6 +150,6 @@ class GoogleReaderImporter(Importer):
|
|||
if folder == 'Root':
|
||||
self.subscription_folders += items
|
||||
else:
|
||||
folder_parents = folder.split(u' \u2014 ')
|
||||
# folder_parents = folder.split(u' \u2014 ')
|
||||
self.subscription_folders.append({folder: items})
|
||||
|
|
@ -9,6 +9,7 @@ from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds
|
|||
from utils.compressed_textfield import StoryField
|
||||
|
||||
DAYS_OF_UNREAD = 14
|
||||
MONTH_AGO = datetime.datetime.now() - datetime.timedelta(days=30)
|
||||
|
||||
class UserSubscription(models.Model):
|
||||
"""
|
||||
|
@ -56,6 +57,11 @@ class UserSubscription(models.Model):
|
|||
self.save()
|
||||
|
||||
def calculate_feed_scores(self, silent=False):
|
||||
if self.user.profile.last_seen_on < MONTH_AGO:
|
||||
if not silent:
|
||||
logging.info(' ---> [%s] SKIPPING Computing scores: %s' % (self.user, self.feed))
|
||||
return
|
||||
|
||||
if not self.feed.fetched_once:
|
||||
if not silent:
|
||||
logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
|
||||
|
|
|
@ -17,7 +17,7 @@ from apps.analyzer.models import get_classifiers_for_user
|
|||
from apps.reader.models import UserSubscription, UserSubscriptionFolders, UserStory, Feature
|
||||
from apps.reader.forms import SignupForm, LoginForm, FeatureForm
|
||||
try:
|
||||
from apps.rss_feeds.models import Feed, Story, FeedPage
|
||||
from apps.rss_feeds.models import Feed, Story, FeedPage, DuplicateFeed
|
||||
except:
|
||||
pass
|
||||
from utils import json, urlnorm
|
||||
|
@ -407,8 +407,13 @@ def add_url(request):
|
|||
|
||||
if url:
|
||||
url = urlnorm.normalize(url)
|
||||
feed = Feed.objects.filter(Q(feed_address=url)
|
||||
| Q(feed_link__icontains=url))
|
||||
# See if it exists as a duplicate first
|
||||
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=url)
|
||||
if duplicate_feed:
|
||||
feed = [duplicate_feed[0].feed]
|
||||
else:
|
||||
feed = Feed.objects.filter(Q(feed_address=url)
|
||||
| Q(feed_link__icontains=url))
|
||||
|
||||
if feed:
|
||||
feed = feed[0]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
from apps.rss_feeds.models import Feed, Story, Tag, StoryAuthor
|
||||
from apps.rss_feeds.models import Feed, Story, Tag, StoryAuthor, DuplicateFeed
|
||||
from apps.reader.models import UserSubscription, UserStory, UserSubscriptionFolders
|
||||
from apps.analyzer.models import FeatureCategory, Category, ClassifierTitle
|
||||
from apps.analyzer.models import ClassifierAuthor, ClassifierFeed, ClassifierTag
|
||||
|
@ -121,6 +121,14 @@ class Command(BaseCommand):
|
|||
switch_feed(ClassifierFeed)
|
||||
switch_feed(ClassifierTag)
|
||||
|
||||
try:
|
||||
DuplicateFeed.objects.create(
|
||||
duplicate_address=duplicate_feed.feed_address,
|
||||
feed=original_feed
|
||||
)
|
||||
except IntegrityError:
|
||||
pass
|
||||
|
||||
duplicate_feed.delete()
|
||||
|
||||
def rewrite_folders(self, folders, original_feed, duplicate_feed):
|
||||
|
|
128
apps/rss_feeds/migrations/0016_duplicate_feeds.py
Normal file
128
apps/rss_feeds/migrations/0016_duplicate_feeds.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
# encoding: utf-8
|
||||
import datetime
|
||||
from south.db import db
|
||||
from south.v2 import SchemaMigration
|
||||
from django.db import models
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
|
||||
# Adding model 'DuplicateFeed'
|
||||
db.create_table('rss_feeds_duplicatefeed', (
|
||||
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||
('duplicate_address', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255)),
|
||||
('feed', self.gf('django.db.models.fields.related.ForeignKey')(related_name='duplicate_addresses', to=orm['rss_feeds.Feed'])),
|
||||
))
|
||||
db.send_create_signal('rss_feeds', ['DuplicateFeed'])
|
||||
|
||||
|
||||
def backwards(self, orm):
|
||||
|
||||
# Deleting model 'DuplicateFeed'
|
||||
db.delete_table('rss_feeds_duplicatefeed')
|
||||
|
||||
|
||||
models = {
|
||||
'rss_feeds.duplicatefeed': {
|
||||
'Meta': {'object_name': 'DuplicateFeed'},
|
||||
'duplicate_address': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||
},
|
||||
'rss_feeds.feed': {
|
||||
'Meta': {'object_name': 'Feed', 'db_table': "'feeds'"},
|
||||
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}),
|
||||
'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}),
|
||||
'etag': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_address': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255'}),
|
||||
'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_tagline': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'feed_title': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
|
||||
'has_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'last_update': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||
'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '15'}),
|
||||
'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
|
||||
'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
|
||||
'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.feedfetchhistory': {
|
||||
'Meta': {'object_name': 'FeedFetchHistory'},
|
||||
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'feed_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.feedpage': {
|
||||
'Meta': {'object_name': 'FeedPage'},
|
||||
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_page'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'page_data': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.feedupdatehistory': {
|
||||
'Meta': {'object_name': 'FeedUpdateHistory'},
|
||||
'average_per_feed': ('django.db.models.fields.DecimalField', [], {'max_digits': '4', 'decimal_places': '1'}),
|
||||
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'number_of_feeds': ('django.db.models.fields.IntegerField', [], {}),
|
||||
'seconds_taken': ('django.db.models.fields.IntegerField', [], {})
|
||||
},
|
||||
'rss_feeds.feedxml': {
|
||||
'Meta': {'object_name': 'FeedXML'},
|
||||
'feed': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'feed_xml'", 'unique': 'True', 'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'rss_xml': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.pagefetchhistory': {
|
||||
'Meta': {'object_name': 'PageFetchHistory'},
|
||||
'exception': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'page_fetch_history'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'fetch_date': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'message': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'status_code': ('django.db.models.fields.CharField', [], {'max_length': '10', 'null': 'True', 'blank': 'True'})
|
||||
},
|
||||
'rss_feeds.story': {
|
||||
'Meta': {'unique_together': "(('story_feed', 'story_guid_hash'),)", 'object_name': 'Story', 'db_table': "'stories'"},
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'story_author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.StoryAuthor']"}),
|
||||
'story_author_name': ('django.db.models.fields.CharField', [], {'max_length': '500', 'null': 'True', 'blank': 'True'}),
|
||||
'story_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'story_content_type': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'story_date': ('django.db.models.fields.DateTimeField', [], {}),
|
||||
'story_feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'stories'", 'to': "orm['rss_feeds.Feed']"}),
|
||||
'story_guid': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
|
||||
'story_guid_hash': ('django.db.models.fields.CharField', [], {'max_length': '40'}),
|
||||
'story_original_content': ('utils.compressed_textfield.StoryField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'story_past_trim_date': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}),
|
||||
'story_permalink': ('django.db.models.fields.CharField', [], {'max_length': '1000'}),
|
||||
'story_tags': ('django.db.models.fields.CharField', [], {'max_length': '2000', 'null': 'True', 'blank': 'True'}),
|
||||
'story_title': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['rss_feeds.Tag']", 'symmetrical': 'False'})
|
||||
},
|
||||
'rss_feeds.storyauthor': {
|
||||
'Meta': {'object_name': 'StoryAuthor'},
|
||||
'author_name': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||
},
|
||||
'rss_feeds.tag': {
|
||||
'Meta': {'object_name': 'Tag'},
|
||||
'feed': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['rss_feeds.Feed']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['rss_feeds']
|
|
@ -698,3 +698,8 @@ class PageFetchHistory(models.Model):
|
|||
self.message,
|
||||
self.exception[:50]
|
||||
)
|
||||
|
||||
class DuplicateFeed(models.Model):
|
||||
duplicate_address = models.CharField(max_length=255, unique=True)
|
||||
feed = models.ForeignKey(Feed, related_name='duplicate_addresses')
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue