NewsBlur-viq/apps/rss_feeds/management/commands/merge_feeds.py

50 lines
No EOL
2.6 KiB
Python

from django.core.management.base import BaseCommand
from apps.rss_feeds.models import merge_feeds, MStory
from optparse import make_option
from django.db import connection
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option("-f", "--feed", dest="feed", default=None),
make_option("-V", "--verbose", dest="verbose", action="store_true"),
)
def handle(self, *args, **options):
cursor = connection.cursor()
cursor.execute("""SELECT DISTINCT f.id AS original_id, f2.id AS duplicate_id,
f.feed_address AS original_feed_address,
f2.feed_address AS duplicate_feed_address,
f.feed_title AS original_feed_title,
f2.feed_title AS duplicate_feed_title,
f.feed_link AS original_feed_link,
f2.feed_link AS duplicate_feed_link,
fd2.feed_tagline AS original_feed_tagline,
fd.feed_tagline AS duplicate_feed_tagline
FROM feeds f, feeds f2
INNER JOIN rss_feeds_feeddata fd ON fd.feed_id = f.feed_id
INNER JOIN rss_feeds_feeddata fd2 ON fd2.feed_id = f2.feed_id
WHERE f2.id > f.id
AND fd.feed_tagline = fd2.feed_tagline
AND f.feed_link = f2.feed_link
AND f.feed_title = f2.feed_title
ORDER BY original_id ASC;""")
feed_fields = ('original_id', 'duplicate_id', 'original_feed_address', 'duplicate_feed_address')
skips = 0
merges = 0
for feeds_values in cursor.fetchall():
feeds = dict(zip(feed_fields, feeds_values))
duplicate_stories = MStory.objects(story_feed_id=feeds['duplicate_id']).only('story_guid')[5:8]
duplicate_story_ids = [story.story_guid for story in duplicate_stories]
original_stories = MStory.objects(story_feed_id=feeds['original_id'], story_guid__in=duplicate_story_ids)
if duplicate_stories.count() == original_stories.count():
merges += 1
merge_feeds(feeds['original_id'], feeds['duplicate_id'])
else:
# print duplicate_stories
# print duplicate_story_ids
# print original_stories
# print "Skipping: %s" % feeds
skips += 1
print "Skips: %s, Merges: %s" % (skips, merges)