Rudimentary sorting of global keywords based on subscribers.

This commit is contained in:
Samuel Clay 2016-10-06 15:24:53 -07:00
parent ce3cdd0919
commit 4a334a50b0
2 changed files with 63 additions and 0 deletions

View file

@ -0,0 +1,21 @@
from django.core.management.base import BaseCommand
from apps.reader.models import UserSubscription
from django.conf import settings
from optparse import make_option
from django.contrib.auth.models import User
from apps.rss_feeds.models import Feed
import os
import errno
import re
import datetime
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option("-q", "--query", dest="query", help="Search query"),
make_option("-l", "--limit", dest="limit", type="int", default=1000, help="Limit of stories"),
)
def handle(self, *args, **options):
settings.LOG_TO_STREAM = True
Feed.query_popularity(options['query'], limit=options['limit'])

View file

@ -11,6 +11,7 @@ import redis
import pymongo
import HTMLParser
from collections import defaultdict
from pprint import pprint
from operator import itemgetter
from bson.objectid import ObjectId
from BeautifulSoup import BeautifulSoup
@ -1449,6 +1450,47 @@ class Feed(models.Model):
stories = cls.format_stories(stories_db)
return stories
@classmethod
def query_popularity(cls, query, limit=5000, order='newest'):
popularity = {}
# Collect stories, sort by feed
story_ids = SearchStory.global_query(query, order=order, offset=0, limit=limit)
for story_hash in story_ids:
feed_id, story_id = MStory.split_story_hash(story_hash)
if feed_id not in popularity:
feed = Feed.get_by_id(feed_id)
popularity[feed_id] = {
'feed_title': feed.feed_title,
'num_subscribers': feed.num_subscribers,
'feed_id': feed.pk,
'story_ids': [],
'authors': {},
}
popularity[feed_id]['story_ids'].append(story_hash)
sorted_popularity = sorted(popularity.values(), key=lambda x: x['num_subscribers'],
reverse=True)
# Extract story authors from feeds
for feed in sorted_popularity:
story_ids = feed['story_ids']
stories_db = MStory.objects(story_hash__in=story_ids)
stories = cls.format_stories(stories_db)
for story in stories:
if story['story_authors'] not in feed['authors']:
feed['authors'][story['story_authors']] = {
'count': 0,
'tags': {}
}
feed['authors'][story['story_authors']]['count'] += 1
for tag in story['story_tags']:
if tag not in feed['authors'][story['story_authors']]['tags']:
feed['authors'][story['story_authors']]['tags'][tag] = 0
feed['authors'][story['story_authors']]['tags'][tag] += 1
pprint(sorted_popularity)
def find_stories(self, query, order="newest", offset=0, limit=25):
story_ids = SearchStory.query(feed_ids=[self.pk], query=query, order=order,