2010-08-22 18:34:40 -04:00
|
|
|
import mongoengine as mongo
|
2012-05-29 11:48:40 -07:00
|
|
|
from collections import defaultdict
|
2009-06-16 03:08:55 +00:00
|
|
|
from django.db import models
|
|
|
|
from django.contrib.auth.models import User
|
2011-01-17 23:24:30 -05:00
|
|
|
from apps.rss_feeds.models import Feed
|
2009-11-03 03:52:03 +00:00
|
|
|
|
|
|
|
class FeatureCategory(models.Model):
|
|
|
|
user = models.ForeignKey(User)
|
|
|
|
feed = models.ForeignKey(Feed)
|
|
|
|
feature = models.CharField(max_length=255)
|
|
|
|
category = models.CharField(max_length=255)
|
|
|
|
count = models.IntegerField(default=0)
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return '%s - %s (%s)' % (self.feature, self.category, self.count)
|
|
|
|
|
|
|
|
class Category(models.Model):
|
|
|
|
user = models.ForeignKey(User)
|
|
|
|
feed = models.ForeignKey(Feed)
|
|
|
|
category = models.CharField(max_length=255)
|
|
|
|
count = models.IntegerField(default=0)
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
return '%s (%s)' % (self.category, self.count)
|
2011-01-17 23:24:30 -05:00
|
|
|
|
2010-08-22 18:34:40 -04:00
|
|
|
class MClassifierTitle(mongo.Document):
|
|
|
|
user_id = mongo.IntField()
|
2012-03-22 15:27:28 -07:00
|
|
|
feed_id = mongo.IntField()
|
|
|
|
social_user_id = mongo.IntField()
|
2010-08-22 18:34:40 -04:00
|
|
|
title = mongo.StringField(max_length=255)
|
|
|
|
score = mongo.IntField()
|
|
|
|
creation_date = mongo.DateTimeField()
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
'collection': 'classifier_title',
|
2012-02-08 12:20:05 -08:00
|
|
|
'indexes': [('user_id', 'feed_id'), 'feed_id', ('user_id', 'social_user_id'), 'social_user_id'],
|
2010-08-22 18:34:40 -04:00
|
|
|
'allow_inheritance': False,
|
|
|
|
}
|
2012-08-29 18:07:54 -07:00
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
user = User.objects.get(pk=self.user_id)
|
|
|
|
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.title[:30])
|
|
|
|
|
2010-08-22 18:34:40 -04:00
|
|
|
|
|
|
|
class MClassifierAuthor(mongo.Document):
|
2012-02-14 10:34:10 -08:00
|
|
|
user_id = mongo.IntField(unique_with=('feed_id', 'social_user_id', 'author'))
|
2012-03-22 15:27:28 -07:00
|
|
|
feed_id = mongo.IntField()
|
|
|
|
social_user_id = mongo.IntField()
|
2012-02-14 10:34:10 -08:00
|
|
|
author = mongo.StringField(max_length=255)
|
2010-08-22 18:34:40 -04:00
|
|
|
score = mongo.IntField()
|
|
|
|
creation_date = mongo.DateTimeField()
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
'collection': 'classifier_author',
|
2012-02-08 12:20:05 -08:00
|
|
|
'indexes': [('user_id', 'feed_id'), 'feed_id', ('user_id', 'social_user_id'), 'social_user_id'],
|
2010-08-22 18:34:40 -04:00
|
|
|
'allow_inheritance': False,
|
|
|
|
}
|
2012-08-29 18:07:54 -07:00
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
user = User.objects.get(pk=self.user_id)
|
|
|
|
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.author[:30])
|
2010-01-21 13:12:29 -05:00
|
|
|
|
2012-02-14 10:34:10 -08:00
|
|
|
class MClassifierTag(mongo.Document):
|
|
|
|
user_id = mongo.IntField(unique_with=('feed_id', 'social_user_id', 'tag'))
|
2012-03-22 15:27:28 -07:00
|
|
|
feed_id = mongo.IntField()
|
|
|
|
social_user_id = mongo.IntField()
|
2012-02-14 10:34:10 -08:00
|
|
|
tag = mongo.StringField(max_length=255)
|
2010-08-22 18:34:40 -04:00
|
|
|
score = mongo.IntField()
|
|
|
|
creation_date = mongo.DateTimeField()
|
|
|
|
|
|
|
|
meta = {
|
2012-02-14 10:34:10 -08:00
|
|
|
'collection': 'classifier_tag',
|
2012-02-08 12:20:05 -08:00
|
|
|
'indexes': [('user_id', 'feed_id'), 'feed_id', ('user_id', 'social_user_id'), 'social_user_id'],
|
2010-08-22 18:34:40 -04:00
|
|
|
'allow_inheritance': False,
|
|
|
|
}
|
|
|
|
|
2012-08-29 18:07:54 -07:00
|
|
|
def __unicode__(self):
|
|
|
|
user = User.objects.get(pk=self.user_id)
|
|
|
|
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.tag[:30])
|
|
|
|
|
2012-02-14 10:34:10 -08:00
|
|
|
|
|
|
|
class MClassifierFeed(mongo.Document):
|
|
|
|
user_id = mongo.IntField(unique_with=('feed_id', 'social_user_id'))
|
2012-03-22 15:27:28 -07:00
|
|
|
feed_id = mongo.IntField()
|
|
|
|
social_user_id = mongo.IntField()
|
2010-08-22 18:34:40 -04:00
|
|
|
score = mongo.IntField()
|
|
|
|
creation_date = mongo.DateTimeField()
|
|
|
|
|
|
|
|
meta = {
|
2012-02-14 10:34:10 -08:00
|
|
|
'collection': 'classifier_feed',
|
2012-02-08 12:20:05 -08:00
|
|
|
'indexes': [('user_id', 'feed_id'), 'feed_id', ('user_id', 'social_user_id'), 'social_user_id'],
|
2010-08-22 18:34:40 -04:00
|
|
|
'allow_inheritance': False,
|
|
|
|
}
|
|
|
|
|
2012-08-29 18:07:54 -07:00
|
|
|
def __unicode__(self):
|
|
|
|
user = User.objects.get(pk=self.user_id)
|
2012-09-24 17:38:46 -07:00
|
|
|
if self.feed_id:
|
2012-10-25 16:14:25 -07:00
|
|
|
feed = Feed.get_by_id(self.feed_id)
|
2012-09-24 17:38:46 -07:00
|
|
|
else:
|
|
|
|
feed = User.objects.get(pk=self.social_user_id)
|
2012-09-07 19:14:01 -07:00
|
|
|
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, feed)
|
2012-08-29 18:07:54 -07:00
|
|
|
|
2014-01-27 17:16:34 -08:00
|
|
|
|
|
|
|
def compute_story_score(story, classifier_titles, classifier_authors, classifier_tags, classifier_feeds):
|
|
|
|
intelligence = {
|
|
|
|
'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id']),
|
|
|
|
'author': apply_classifier_authors(classifier_authors, story),
|
|
|
|
'tags': apply_classifier_tags(classifier_tags, story),
|
|
|
|
'title': apply_classifier_titles(classifier_titles, story),
|
|
|
|
}
|
|
|
|
score = 0
|
|
|
|
score_max = max(intelligence['title'],
|
|
|
|
intelligence['author'],
|
|
|
|
intelligence['tags'])
|
|
|
|
score_min = min(intelligence['title'],
|
|
|
|
intelligence['author'],
|
|
|
|
intelligence['tags'])
|
|
|
|
if score_max > 0:
|
|
|
|
score = score_max
|
|
|
|
elif score_min < 0:
|
|
|
|
score = score_min
|
|
|
|
|
|
|
|
if score == 0:
|
|
|
|
score = intelligence['feed']
|
|
|
|
|
|
|
|
return score
|
2010-08-22 18:34:40 -04:00
|
|
|
|
2010-01-21 13:12:29 -05:00
|
|
|
def apply_classifier_titles(classifiers, story):
|
2011-02-27 16:13:22 -05:00
|
|
|
score = 0
|
2010-01-21 13:12:29 -05:00
|
|
|
for classifier in classifiers:
|
2012-08-29 18:07:54 -07:00
|
|
|
if classifier.feed_id != story['story_feed_id']:
|
|
|
|
continue
|
2010-01-24 22:53:46 -05:00
|
|
|
if classifier.title.lower() in story['story_title'].lower():
|
2010-01-21 13:12:29 -05:00
|
|
|
# print 'Titles: (%s) %s -- %s' % (classifier.title in story['story_title'], classifier.title, story['story_title'])
|
2011-02-27 16:13:22 -05:00
|
|
|
score = classifier.score
|
|
|
|
if score > 0: return score
|
|
|
|
return score
|
2010-01-21 13:12:29 -05:00
|
|
|
|
|
|
|
def apply_classifier_authors(classifiers, story):
|
2011-02-27 16:13:22 -05:00
|
|
|
score = 0
|
2010-01-21 13:12:29 -05:00
|
|
|
for classifier in classifiers:
|
2012-08-29 18:07:54 -07:00
|
|
|
if classifier.feed_id != story['story_feed_id']:
|
|
|
|
continue
|
2010-08-22 18:34:40 -04:00
|
|
|
if story.get('story_authors') and classifier.author == story.get('story_authors'):
|
|
|
|
# print 'Authors: %s -- %s' % (classifier.author, story['story_authors'])
|
2011-02-27 16:13:22 -05:00
|
|
|
score = classifier.score
|
|
|
|
if score > 0: return classifier.score
|
|
|
|
return score
|
2010-01-21 13:12:29 -05:00
|
|
|
|
|
|
|
def apply_classifier_tags(classifiers, story):
|
2011-02-27 16:13:22 -05:00
|
|
|
score = 0
|
2010-01-21 13:12:29 -05:00
|
|
|
for classifier in classifiers:
|
2012-08-29 18:07:54 -07:00
|
|
|
if classifier.feed_id != story['story_feed_id']:
|
|
|
|
continue
|
2010-08-22 18:34:40 -04:00
|
|
|
if story['story_tags'] and classifier.tag in story['story_tags']:
|
|
|
|
# print 'Tags: (%s-%s) %s -- %s' % (classifier.tag in story['story_tags'], classifier.score, classifier.tag, story['story_tags'])
|
2011-02-27 16:13:22 -05:00
|
|
|
score = classifier.score
|
|
|
|
if score > 0: return classifier.score
|
|
|
|
return score
|
2010-03-23 20:03:40 -04:00
|
|
|
|
2012-09-24 17:38:46 -07:00
|
|
|
def apply_classifier_feeds(classifiers, feed, social_user_ids=None):
|
2012-12-10 14:19:54 -08:00
|
|
|
if not feed and not social_user_ids: return 0
|
|
|
|
feed_id = None
|
|
|
|
if feed:
|
|
|
|
feed_id = feed if isinstance(feed, int) else feed.pk
|
2012-08-24 18:07:44 -07:00
|
|
|
|
2012-09-24 17:38:46 -07:00
|
|
|
if social_user_ids and not isinstance(social_user_ids, list):
|
|
|
|
social_user_ids = [social_user_ids]
|
|
|
|
|
2012-02-15 18:00:10 -08:00
|
|
|
for classifier in classifiers:
|
|
|
|
if classifier.feed_id == feed_id:
|
|
|
|
# print 'Feeds: %s -- %s' % (classifier.feed_id, feed.pk)
|
|
|
|
return classifier.score
|
2012-09-24 17:38:46 -07:00
|
|
|
if (social_user_ids and not classifier.feed_id and
|
|
|
|
classifier.social_user_id in social_user_ids):
|
2012-02-15 18:00:10 -08:00
|
|
|
return classifier.score
|
|
|
|
return 0
|
|
|
|
|
2012-02-14 10:34:10 -08:00
|
|
|
def get_classifiers_for_user(user, feed_id=None, social_user_id=None, classifier_feeds=None, classifier_authors=None,
|
|
|
|
classifier_titles=None, classifier_tags=None):
|
|
|
|
params = dict(user_id=user.pk)
|
2012-05-26 22:14:34 -07:00
|
|
|
if isinstance(feed_id, list):
|
2012-02-14 10:34:10 -08:00
|
|
|
params['feed_id__in'] = feed_id
|
2012-05-26 22:14:34 -07:00
|
|
|
elif feed_id:
|
|
|
|
params['feed_id'] = feed_id
|
2012-02-14 10:34:10 -08:00
|
|
|
if social_user_id:
|
2012-05-29 11:48:40 -07:00
|
|
|
if isinstance(social_user_id, basestring):
|
|
|
|
social_user_id = int(social_user_id.replace('social:', ''))
|
|
|
|
params['social_user_id'] = social_user_id
|
2012-05-26 22:14:34 -07:00
|
|
|
|
2010-08-22 18:34:40 -04:00
|
|
|
if classifier_authors is None:
|
2012-02-14 10:34:10 -08:00
|
|
|
classifier_authors = list(MClassifierAuthor.objects(**params))
|
2010-08-22 18:34:40 -04:00
|
|
|
if classifier_titles is None:
|
2012-02-14 10:34:10 -08:00
|
|
|
classifier_titles = list(MClassifierTitle.objects(**params))
|
2010-08-22 18:34:40 -04:00
|
|
|
if classifier_tags is None:
|
2012-02-14 10:34:10 -08:00
|
|
|
classifier_tags = list(MClassifierTag.objects(**params))
|
2012-05-26 22:14:34 -07:00
|
|
|
if classifier_feeds is None:
|
|
|
|
if not social_user_id and feed_id:
|
|
|
|
params['social_user_id'] = 0
|
|
|
|
classifier_feeds = list(MClassifierFeed.objects(**params))
|
2012-02-14 10:34:10 -08:00
|
|
|
|
|
|
|
feeds = []
|
|
|
|
for f in classifier_feeds:
|
|
|
|
if f.social_user_id and not f.feed_id:
|
|
|
|
feeds.append(('social:%s' % f.social_user_id, f.score))
|
|
|
|
else:
|
|
|
|
feeds.append((f.feed_id, f.score))
|
|
|
|
|
2010-03-23 20:03:40 -04:00
|
|
|
payload = {
|
2012-02-14 10:34:10 -08:00
|
|
|
'feeds': dict(feeds),
|
2010-08-22 18:34:40 -04:00
|
|
|
'authors': dict([(a.author, a.score) for a in classifier_authors]),
|
2010-03-23 20:03:40 -04:00
|
|
|
'titles': dict([(t.title, t.score) for t in classifier_titles]),
|
2010-08-22 18:34:40 -04:00
|
|
|
'tags': dict([(t.tag, t.score) for t in classifier_tags]),
|
2010-03-23 20:03:40 -04:00
|
|
|
}
|
|
|
|
|
2012-05-29 11:48:40 -07:00
|
|
|
return payload
|
|
|
|
|
|
|
|
def sort_classifiers_by_feed(user, feed_ids=None,
|
|
|
|
classifier_feeds=None,
|
|
|
|
classifier_authors=None,
|
|
|
|
classifier_titles=None,
|
|
|
|
classifier_tags=None):
|
|
|
|
def sort_by_feed(classifiers):
|
|
|
|
feed_classifiers = defaultdict(list)
|
|
|
|
for classifier in classifiers:
|
|
|
|
feed_classifiers[classifier.feed_id].append(classifier)
|
|
|
|
return feed_classifiers
|
|
|
|
|
|
|
|
classifiers = {}
|
|
|
|
|
|
|
|
if feed_ids:
|
|
|
|
classifier_feeds = sort_by_feed(classifier_feeds)
|
|
|
|
classifier_authors = sort_by_feed(classifier_authors)
|
|
|
|
classifier_titles = sort_by_feed(classifier_titles)
|
|
|
|
classifier_tags = sort_by_feed(classifier_tags)
|
|
|
|
|
|
|
|
for feed_id in feed_ids:
|
|
|
|
classifiers[feed_id] = get_classifiers_for_user(user, feed_id=feed_id,
|
|
|
|
classifier_feeds=classifier_feeds[feed_id],
|
|
|
|
classifier_authors=classifier_authors[feed_id],
|
|
|
|
classifier_titles=classifier_titles[feed_id],
|
|
|
|
classifier_tags=classifier_tags[feed_id])
|
|
|
|
|
|
|
|
return classifiers
|