From 477d928d70a37ffb9d24dcc7decb5f9a3146c80b Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 7 May 2013 11:37:01 -0700 Subject: [PATCH 1/2] Disallowing branched feeds from feed search. --- apps/rss_feeds/models.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index f60b85e70..99c8eb17d 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -306,13 +306,17 @@ class Feed(models.Model): return {'%s' % key: value} def by_url(address): - feed = cls.objects.filter(**criteria('feed_address', address)).order_by('-num_subscribers') + feed = cls.objects.filter( + branch_from_feed=None + ).filter(**criteria('feed_address', address)).order_by('-num_subscribers') if not feed: duplicate_feed = DuplicateFeed.objects.filter(**criteria('duplicate_address', address)) if duplicate_feed and len(duplicate_feed) > offset: feed = [duplicate_feed[offset].feed] if not feed and aggressive: - feed = cls.objects.filter(**criteria('feed_link', address)).order_by('-num_subscribers') + feed = cls.objects.filter( + branch_from_feed=None + ).filter(**criteria('feed_link', address)).order_by('-num_subscribers') return feed From 5ceb0c45bfee63e5bc5fb579b4b1244e6684ce90 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Tue, 7 May 2013 15:37:36 -0700 Subject: [PATCH 2/2] Migrating comment feeds to non-comment feeds. --- apps/reader/models.py | 5 +- apps/rss_feeds/migrations/0070_commentless.py | 99 +++++++++++++++++++ .../rss_feeds/migrations/0071_commentless2.py | 99 +++++++++++++++++++ apps/rss_feeds/models.py | 8 +- 4 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 apps/rss_feeds/migrations/0070_commentless.py create mode 100644 apps/rss_feeds/migrations/0071_commentless2.py diff --git a/apps/reader/models.py b/apps/reader/models.py index 4302bd6b7..0a98e9919 100644 --- a/apps/reader/models.py +++ b/apps/reader/models.py @@ -493,9 +493,12 @@ class UserSubscription(models.Model): self.feed = new_feed self.needs_unread_recalc = True try: + new_sub = UserSubscription.objects.get(user=self.user, feed=new_feed) + except UserSubscription.DoesNotExist: self.save() user_sub_folders.rewrite_feed(new_feed, old_feed) - except (IntegrityError, OperationError): + else: + # except (IntegrityError, OperationError): logging.info(" !!!!> %s already subscribed" % self.user) self.delete() return diff --git a/apps/rss_feeds/migrations/0070_commentless.py b/apps/rss_feeds/migrations/0070_commentless.py new file mode 100644 index 000000000..d3e3aac0c --- /dev/null +++ b/apps/rss_feeds/migrations/0070_commentless.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import hashlib +from south.v2 import DataMigration +from apps.rss_feeds.models import Feed +from django.db import transaction + +class Migration(DataMigration): + + @transaction.autocommit + def forwards(self, orm): + feeds = Feed.objects.filter( + feed_address__contains='?feed=comments-rss2' + ) + feeds_count = feeds.count() + print " ---> Found %s feeds" % feeds_count + + for f, feed in enumerate(feeds): + print " ---> %4s/%s: %s - %s" % (f, feeds_count, feed, feed.feed_address) + feed.feed_address = feed.feed_address.replace('?feed=comments-rss2', '?feed=rss2') + feed_address = feed.feed_address or "" + feed_link = feed.feed_link or "" + hash_address_and_link = hashlib.sha1(feed_address+feed_link).hexdigest() + duplicate_feeds = Feed.objects.filter(hash_address_and_link=hash_address_and_link) + if not duplicate_feeds: + feed.save() + elif feed.pk != duplicate_feeds[0].pk: + print(" ---> Found different feed (%s), merging..." % duplicate_feeds[0]) + try: + Feed.merge_feeds(duplicate_feeds[0].pk, feed.pk, force=True) + except Exception, e: + print " ***> Exception in merge: %s" % e + else: + feed.save() + + def backwards(self, orm): + "Write your backwards methods here." + + models = { + u'rss_feeds.duplicatefeed': { + 'Meta': {'object_name': 'DuplicateFeed'}, + 'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '764', 'db_index': 'True'}), + 'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}), + 'duplicate_link': ('django.db.models.fields.CharField', [], {'max_length': '764', 'null': 'True', 'db_index': 'True'}), + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': u"orm['rss_feeds.Feed']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + u'rss_feeds.feed': { + 'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), + 'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}), + 'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}), + 'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}), + 'errors_since_good': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}), + 'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '764', 'db_index': 'True'}), + 'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}), + 'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_push': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_story_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {}), + 'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 's3_icon': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 's3_page': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}) + }, + u'rss_feeds.feeddata': { + 'Meta': {'object_name': 'FeedData'}, + 'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': u"orm['rss_feeds.Feed']"}), + 'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}) + } + } + + complete_apps = ['rss_feeds'] + symmetrical = True diff --git a/apps/rss_feeds/migrations/0071_commentless2.py b/apps/rss_feeds/migrations/0071_commentless2.py new file mode 100644 index 000000000..78717aa64 --- /dev/null +++ b/apps/rss_feeds/migrations/0071_commentless2.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import hashlib +from south.v2 import DataMigration +from apps.rss_feeds.models import Feed +from django.db import transaction + +class Migration(DataMigration): + + @transaction.autocommit + def forwards(self, orm): + feeds = Feed.objects.filter( + feed_address__contains='/comments/feed' + ) + feeds_count = feeds.count() + print " ---> Found %s feeds" % feeds_count + + for f, feed in enumerate(feeds): + print " ---> %4s/%s: %s - %s" % (f, feeds_count, feed, feed.feed_address) + feed.feed_address = feed.feed_address.replace('/comments/feed', '/feed') + feed_address = feed.feed_address or "" + feed_link = feed.feed_link or "" + hash_address_and_link = hashlib.sha1(feed_address+feed_link).hexdigest() + duplicate_feeds = Feed.objects.filter(hash_address_and_link=hash_address_and_link) + if not duplicate_feeds: + feed.save() + elif feed.pk != duplicate_feeds[0].pk: + print(" ---> Found different feed (%s), merging..." % duplicate_feeds[0]) + try: + Feed.merge_feeds(duplicate_feeds[0].pk, feed.pk, force=True) + except Exception, e: + print " ***> Exception in merge: %s" % e + else: + feed.save() + + def backwards(self, orm): + "Write your backwards methods here." + + models = { + u'rss_feeds.duplicatefeed': { + 'Meta': {'object_name': 'DuplicateFeed'}, + 'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '764', 'db_index': 'True'}), + 'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}), + 'duplicate_link': ('django.db.models.fields.CharField', [], {'max_length': '764', 'null': 'True', 'db_index': 'True'}), + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': u"orm['rss_feeds.Feed']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + u'rss_feeds.feed': { + 'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), + 'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}), + 'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}), + 'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}), + 'errors_since_good': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}), + 'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '764', 'db_index': 'True'}), + 'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}), + 'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_push': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_story_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {}), + 'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 's3_icon': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 's3_page': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}) + }, + u'rss_feeds.feeddata': { + 'Meta': {'object_name': 'FeedData'}, + 'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': u"orm['rss_feeds.Feed']"}), + 'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}) + } + } + + complete_apps = ['rss_feeds'] + symmetrical = True diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 99c8eb17d..7893f0be6 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -1933,6 +1933,9 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False): duplicate_feed.feed_link, " [B: %s]" % duplicate_feed.branch_from_feed.pk if duplicate_feed.branch_from_feed else "")) + original_feed.branch_from_feed = None + original_feed.save() + user_subs = UserSubscription.objects.filter(feed=duplicate_feed).order_by('-pk') for user_sub in user_subs: user_sub.switch_feed(original_feed, duplicate_feed) @@ -1963,8 +1966,9 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False): dupe_feed.duplicate_feed_id = duplicate_feed.pk dupe_feed.save() - logging.debug(' ---> Dupe subscribers: %s, Original subscribers: %s' % - (duplicate_feed.num_subscribers, original_feed.num_subscribers)) + logging.debug(' ---> Dupe subscribers (%s): %s, Original subscribers (%s): %s' % + (duplicate_feed.pk, duplicate_feed.num_subscribers, + original_feed.pk, original_feed.num_subscribers)) if duplicate_feed.pk != original_feed.pk: duplicate_feed.delete() else: