2010-06-27 22:40:22 -04:00
|
|
|
from django.core.management.base import BaseCommand
|
2010-08-21 23:49:36 -04:00
|
|
|
from apps.rss_feeds.models import Feed, Story, DuplicateFeed
|
2010-07-20 22:57:18 -04:00
|
|
|
from apps.reader.models import UserSubscription, UserStory, UserSubscriptionFolders
|
2010-07-20 23:18:38 -04:00
|
|
|
from apps.analyzer.models import FeatureCategory, Category, ClassifierTitle
|
|
|
|
from apps.analyzer.models import ClassifierAuthor, ClassifierFeed, ClassifierTag
|
2010-06-27 22:40:22 -04:00
|
|
|
from optparse import make_option
|
2010-07-20 22:57:18 -04:00
|
|
|
from django.db import connection
|
|
|
|
from django.db.utils import IntegrityError
|
|
|
|
from utils import json
|
2010-06-27 22:40:22 -04:00
|
|
|
|
|
|
|
class Command(BaseCommand):
|
|
|
|
option_list = BaseCommand.option_list + (
|
|
|
|
make_option("-f", "--feed", dest="feed", default=None),
|
|
|
|
make_option("-V", "--verbose", dest="verbose", action="store_true"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def handle(self, *args, **options):
|
2010-07-20 22:57:18 -04:00
|
|
|
cursor = connection.cursor()
|
|
|
|
cursor.execute("""SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id,
|
|
|
|
f.feed_address AS original_feed_address,
|
|
|
|
f2.feed_address AS duplicate_feed_address
|
2010-07-20 23:21:10 -04:00
|
|
|
"""
|
2010-07-20 22:57:18 -04:00
|
|
|
# f.feed_title AS original_feed_title,
|
|
|
|
# f2.feed_title AS duplicate_feed_title,
|
|
|
|
# f.feed_link AS original_feed_link,
|
|
|
|
# f2.feed_link AS duplicate_feed_link,
|
|
|
|
# f2.feed_tagline AS original_feed_tagline,
|
|
|
|
# f.feed_tagline AS duplicate_feed_tagline
|
2010-07-20 23:21:10 -04:00
|
|
|
"""
|
2010-07-20 22:57:18 -04:00
|
|
|
FROM stories s1
|
|
|
|
INNER JOIN stories s2 ON s1.story_guid_hash = s2.story_guid_hash
|
|
|
|
INNER JOIN feeds f ON f.id = s1.story_feed_id
|
|
|
|
INNER JOIN feeds f2 ON f2.id = s2.story_feed_id
|
|
|
|
WHERE s1.story_feed_id != s2.story_feed_id
|
|
|
|
AND f2.id > f.id
|
|
|
|
AND f.feed_tagline = f2.feed_tagline
|
|
|
|
AND f.feed_link = f2.feed_link
|
2010-07-20 23:40:09 -04:00
|
|
|
AND f.feed_title = f2.feed_title
|
|
|
|
ORDER BY original_id ASC;""")
|
2010-06-27 22:40:22 -04:00
|
|
|
|
2010-07-20 22:57:18 -04:00
|
|
|
feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address')
|
|
|
|
for feeds_values in cursor.fetchall():
|
|
|
|
feeds = dict(zip(feed_fields, feeds_values))
|
2010-07-20 23:40:09 -04:00
|
|
|
try:
|
|
|
|
original_feed = Feed.objects.get(pk=feeds['original_id'])
|
|
|
|
duplicate_feed = Feed.objects.get(pk=feeds['duplicate_id'])
|
|
|
|
except Feed.DoesNotExist:
|
|
|
|
print " ***> Already deleted feed: %s" % feeds['duplicate_id']
|
|
|
|
continue
|
2010-07-20 22:57:18 -04:00
|
|
|
|
|
|
|
print " ---> Feed: [%s - %s] %s - %s" % (feeds['original_id'], feeds['duplicate_id'],
|
|
|
|
original_feed, original_feed.feed_link)
|
|
|
|
print " --> %s" % feeds['original_feed_address']
|
|
|
|
print " --> %s" % feeds['duplicate_feed_address']
|
|
|
|
|
|
|
|
user_subs = UserSubscription.objects.filter(feed=duplicate_feed)
|
|
|
|
for user_sub in user_subs:
|
|
|
|
# Rewrite feed in subscription folders
|
2010-08-11 17:13:39 -04:00
|
|
|
try:
|
|
|
|
user_sub_folders = UserSubscriptionFolders.objects.get(user=user_sub.user)
|
|
|
|
except Exception, e:
|
|
|
|
print " *** ---> UserSubscriptionFolders error: %s" % e
|
|
|
|
continue
|
2010-07-20 22:57:18 -04:00
|
|
|
folders = json.decode(user_sub_folders.folders)
|
|
|
|
folders = self.rewrite_folders(folders, original_feed, duplicate_feed)
|
|
|
|
user_sub_folders.folders = json.encode(folders)
|
2010-07-20 23:30:31 -04:00
|
|
|
user_sub_folders.save()
|
2010-07-20 22:57:18 -04:00
|
|
|
|
|
|
|
# Switch to original feed for the user subscription
|
|
|
|
print " ===> %s " % user_sub.user
|
|
|
|
user_sub.feed = original_feed
|
2010-07-30 20:04:02 -04:00
|
|
|
user_sub.needs_unread_recalc = True
|
2010-07-20 22:57:18 -04:00
|
|
|
try:
|
2010-07-20 23:30:31 -04:00
|
|
|
user_sub.save()
|
2010-07-20 22:57:18 -04:00
|
|
|
pass
|
|
|
|
except IntegrityError:
|
|
|
|
print " !!!!> %s already subscribed" % user_sub.user
|
2010-07-20 23:30:31 -04:00
|
|
|
user_sub.delete()
|
2010-07-20 22:57:18 -04:00
|
|
|
|
|
|
|
# Switch read stories
|
|
|
|
user_stories = UserStory.objects.filter(feed=duplicate_feed)
|
|
|
|
print " ---> %s read stories" % user_stories.count()
|
|
|
|
for user_story in user_stories:
|
|
|
|
user_story.feed = original_feed
|
|
|
|
duplicate_story = user_story.story
|
|
|
|
original_story = Story.objects.filter(story_guid_hash=duplicate_story.story_guid_hash,
|
|
|
|
story_feed=original_feed)
|
|
|
|
if original_story:
|
|
|
|
user_story.story = original_story[0]
|
|
|
|
else:
|
|
|
|
print " ***> Can't find original story: %s" % duplicate_story
|
2010-07-20 23:46:29 -04:00
|
|
|
try:
|
|
|
|
user_story.save()
|
|
|
|
except IntegrityError:
|
|
|
|
print " ***> Story already saved: %s" % user_story
|
2010-07-20 22:57:18 -04:00
|
|
|
|
2010-07-20 23:18:38 -04:00
|
|
|
def delete_story_feed(model, feed_field='feed'):
|
|
|
|
duplicate_stories = model.objects.filter(**{feed_field: duplicate_feed})
|
2010-07-24 15:54:25 -04:00
|
|
|
# if duplicate_stories.count():
|
|
|
|
# print " ---> Deleting %s %s" % (duplicate_stories.count(), model)
|
2010-07-20 23:30:31 -04:00
|
|
|
duplicate_stories.delete()
|
2010-07-20 23:18:38 -04:00
|
|
|
def switch_feed(model):
|
|
|
|
duplicates = model.objects.filter(feed=duplicate_feed)
|
|
|
|
if duplicates.count():
|
|
|
|
print " ---> Switching %s %s" % (duplicates.count(), model)
|
|
|
|
for duplicate in duplicates:
|
|
|
|
duplicate.feed = original_feed
|
|
|
|
try:
|
2010-07-20 23:30:31 -04:00
|
|
|
duplicate.save()
|
2010-07-20 23:18:38 -04:00
|
|
|
pass
|
|
|
|
except IntegrityError:
|
|
|
|
print " !!!!> %s already exists" % duplicate
|
2010-07-20 23:30:31 -04:00
|
|
|
duplicates.delete()
|
2010-07-20 23:18:38 -04:00
|
|
|
delete_story_feed(Story, 'story_feed')
|
|
|
|
switch_feed(FeatureCategory)
|
|
|
|
switch_feed(Category)
|
|
|
|
switch_feed(ClassifierTitle)
|
|
|
|
switch_feed(ClassifierAuthor)
|
|
|
|
switch_feed(ClassifierFeed)
|
|
|
|
switch_feed(ClassifierTag)
|
2010-07-20 23:30:31 -04:00
|
|
|
|
2010-08-19 10:43:07 -04:00
|
|
|
try:
|
|
|
|
DuplicateFeed.objects.create(
|
|
|
|
duplicate_address=duplicate_feed.feed_address,
|
|
|
|
feed=original_feed
|
|
|
|
)
|
|
|
|
except IntegrityError:
|
|
|
|
pass
|
|
|
|
|
2010-07-20 23:30:31 -04:00
|
|
|
duplicate_feed.delete()
|
2010-07-20 22:57:18 -04:00
|
|
|
|
|
|
|
def rewrite_folders(self, folders, original_feed, duplicate_feed):
|
|
|
|
new_folders = []
|
2010-06-27 22:40:22 -04:00
|
|
|
|
2010-07-20 22:57:18 -04:00
|
|
|
for k, folder in enumerate(folders):
|
|
|
|
if isinstance(folder, int):
|
|
|
|
if folder == duplicate_feed.pk:
|
2010-07-20 23:30:31 -04:00
|
|
|
# print " ===> Rewrote %s'th item: %s" % (k+1, folders)
|
2010-07-20 22:57:18 -04:00
|
|
|
new_folders.append(original_feed.pk)
|
|
|
|
else:
|
|
|
|
new_folders.append(folder)
|
|
|
|
elif isinstance(folder, dict):
|
|
|
|
for f_k, f_v in folder.items():
|
|
|
|
new_folders.append({f_k: self.rewrite_folders(f_v, original_feed, duplicate_feed)})
|
|
|
|
|
|
|
|
return new_folders
|
|
|
|
|