diff --git a/apps/rss_feeds/management/commands/merge_feeds.py b/apps/rss_feeds/management/commands/merge_feeds.py index ce0f904f8..b758c4b5c 100644 --- a/apps/rss_feeds/management/commands/merge_feeds.py +++ b/apps/rss_feeds/management/commands/merge_feeds.py @@ -1,12 +1,7 @@ from django.core.management.base import BaseCommand -from apps.rss_feeds.models import Feed, Story, DuplicateFeed -from apps.reader.models import UserSubscription, UserStory, UserSubscriptionFolders -from apps.analyzer.models import FeatureCategory, Category, ClassifierTitle -from apps.analyzer.models import ClassifierAuthor, ClassifierFeed, ClassifierTag +from apps.rss_feeds.models import merge_feeds, MStory from optparse import make_option from django.db import connection -from django.db.utils import IntegrityError -from utils import json class Command(BaseCommand): option_list = BaseCommand.option_list + ( @@ -18,21 +13,15 @@ class Command(BaseCommand): cursor = connection.cursor() cursor.execute("""SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id, f.feed_address AS original_feed_address, - f2.feed_address AS duplicate_feed_address - """ - # f.feed_title AS original_feed_title, - # f2.feed_title AS duplicate_feed_title, - # f.feed_link AS original_feed_link, - # f2.feed_link AS duplicate_feed_link, - # f2.feed_tagline AS original_feed_tagline, - # f.feed_tagline AS duplicate_feed_tagline - """ - FROM stories s1 - INNER JOIN stories s2 ON s1.story_guid_hash = s2.story_guid_hash - INNER JOIN feeds f ON f.id = s1.story_feed_id - INNER JOIN feeds f2 ON f2.id = s2.story_feed_id - WHERE s1.story_feed_id != s2.story_feed_id - AND f2.id > f.id + f2.feed_address AS duplicate_feed_address, + f.feed_title AS original_feed_title, + f2.feed_title AS duplicate_feed_title, + f.feed_link AS original_feed_link, + f2.feed_link AS duplicate_feed_link, + f2.feed_tagline AS original_feed_tagline, + f.feed_tagline AS duplicate_feed_tagline + FROM feeds f, feeds f2 + WHERE f2.id > f.id AND f.feed_tagline = f2.feed_tagline AND f.feed_link = f2.feed_link AND f.feed_title = f2.feed_title @@ -41,107 +30,13 @@ class Command(BaseCommand): feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address') for feeds_values in cursor.fetchall(): feeds = dict(zip(feed_fields, feeds_values)) - try: - original_feed = Feed.objects.get(pk=feeds['original_id']) - duplicate_feed = Feed.objects.get(pk=feeds['duplicate_id']) - except Feed.DoesNotExist: - print " ***> Already deleted feed: %s" % feeds['duplicate_id'] + + original_stories = MStory.objects(story_feed_id=feeds['original_id']).only('story_guid')[10:12] + original_story_ids = [story.id for story in original_stories] + if MStory.objects(story_feed_id=feeds['duplicate_id'], story_guid__in=original_story_ids).count() != 3: + print "Skipping: %s" % (feeds) continue - - print " ---> Feed: [%s - %s] %s - %s" % (feeds['original_id'], feeds['duplicate_id'], - original_feed, original_feed.feed_link) - print " --> %s" % feeds['original_feed_address'] - print " --> %s" % feeds['duplicate_feed_address'] - - user_subs = UserSubscription.objects.filter(feed=duplicate_feed) - for user_sub in user_subs: - # Rewrite feed in subscription folders - try: - user_sub_folders = UserSubscriptionFolders.objects.get(user=user_sub.user) - except Exception, e: - print " *** ---> UserSubscriptionFolders error: %s" % e - continue - folders = json.decode(user_sub_folders.folders) - folders = self.rewrite_folders(folders, original_feed, duplicate_feed) - user_sub_folders.folders = json.encode(folders) - user_sub_folders.save() - - # Switch to original feed for the user subscription - print " ===> %s " % user_sub.user - user_sub.feed = original_feed - user_sub.needs_unread_recalc = True - try: - user_sub.save() - pass - except IntegrityError: - print " !!!!> %s already subscribed" % user_sub.user - user_sub.delete() - - # Switch read stories - user_stories = UserStory.objects.filter(feed=duplicate_feed) - print " ---> %s read stories" % user_stories.count() - for user_story in user_stories: - user_story.feed = original_feed - duplicate_story = user_story.story - original_story = Story.objects.filter(story_guid_hash=duplicate_story.story_guid_hash, - story_feed=original_feed) - if original_story: - user_story.story = original_story[0] - else: - print " ***> Can't find original story: %s" % duplicate_story - try: - user_story.save() - except IntegrityError: - print " ***> Story already saved: %s" % user_story - - def delete_story_feed(model, feed_field='feed'): - duplicate_stories = model.objects.filter(**{feed_field: duplicate_feed}) - # if duplicate_stories.count(): - # print " ---> Deleting %s %s" % (duplicate_stories.count(), model) - duplicate_stories.delete() - def switch_feed(model): - duplicates = model.objects.filter(feed=duplicate_feed) - if duplicates.count(): - print " ---> Switching %s %s" % (duplicates.count(), model) - for duplicate in duplicates: - duplicate.feed = original_feed - try: - duplicate.save() - pass - except IntegrityError: - print " !!!!> %s already exists" % duplicate - duplicates.delete() - delete_story_feed(Story, 'story_feed') - switch_feed(FeatureCategory) - switch_feed(Category) - switch_feed(ClassifierTitle) - switch_feed(ClassifierAuthor) - switch_feed(ClassifierFeed) - switch_feed(ClassifierTag) - - try: - DuplicateFeed.objects.create( - duplicate_address=duplicate_feed.feed_address, - feed=original_feed - ) - except IntegrityError: - pass - - duplicate_feed.delete() - - def rewrite_folders(self, folders, original_feed, duplicate_feed): - new_folders = [] - - for k, folder in enumerate(folders): - if isinstance(folder, int): - if folder == duplicate_feed.pk: - # print " ===> Rewrote %s'th item: %s" % (k+1, folders) - new_folders.append(original_feed.pk) - else: - new_folders.append(folder) - elif isinstance(folder, dict): - for f_k, f_v in folder.items(): - new_folders.append({f_k: self.rewrite_folders(f_v, original_feed, duplicate_feed)}) - - return new_folders - \ No newline at end of file + else: + print "Merging: %s" % feeds + # merge_feeds(feeds['original_id'], feeds['duplicate_id']) + \ No newline at end of file