NewsBlur/apps/analyzer/models.py
2024-04-24 09:50:42 -04:00

327 lines
11 KiB
Python

import datetime
from collections import defaultdict
import mongoengine as mongo
from django.conf import settings
from django.contrib.auth.models import User
from django.core.mail import EmailMultiAlternatives
from django.db import models
from django.template.loader import render_to_string
from apps.analyzer.tasks import EmailPopularityQuery
from apps.rss_feeds.models import Feed
from utils import log as logging
class FeatureCategory(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)
feature = models.CharField(max_length=255)
category = models.CharField(max_length=255)
count = models.IntegerField(default=0)
def __str__(self):
return "%s - %s (%s)" % (self.feature, self.category, self.count)
class Category(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)
category = models.CharField(max_length=255)
count = models.IntegerField(default=0)
def __str__(self):
return "%s (%s)" % (self.category, self.count)
class MPopularityQuery(mongo.Document):
email = mongo.StringField()
query = mongo.StringField()
is_emailed = mongo.BooleanField()
creation_date = mongo.DateTimeField(default=datetime.datetime.now)
meta = {
"collection": "popularity_query",
"allow_inheritance": False,
}
def __str__(self):
return '%s - "%s"' % (self.email, self.query)
def queue_email(self):
EmailPopularityQuery.delay(pk=str(self.pk))
@classmethod
def ensure_all_sent(cls, queue=True):
for query in cls.objects.all().order_by("creation_date"):
query.ensure_sent(queue=queue)
def ensure_sent(self, queue=True):
if self.is_emailed:
logging.debug(" ---> Already sent %s" % self)
return
if queue:
self.queue_email()
else:
self.send_email()
def send_email(self, limit=5000):
filename = Feed.xls_query_popularity(self.query, limit=limit)
xlsx = open(filename, "r")
params = {"query": self.query}
text = render_to_string("mail/email_popularity_query.txt", params)
html = render_to_string("mail/email_popularity_query.xhtml", params)
subject = 'Keyword popularity spreadsheet: "%s"' % self.query
msg = EmailMultiAlternatives(
subject, text, from_email="NewsBlur <%s>" % settings.HELLO_EMAIL, to=["<%s>" % (self.email)]
)
msg.attach_alternative(html, "text/html")
msg.attach(filename, xlsx.read(), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
msg.send()
self.is_emailed = True
self.save()
logging.debug(" -> ~BB~FM~SBSent email for popularity query: %s" % self)
class MClassifierTitle(mongo.Document):
user_id = mongo.IntField()
feed_id = mongo.IntField()
social_user_id = mongo.IntField()
title = mongo.StringField(max_length=255)
score = mongo.IntField()
creation_date = mongo.DateTimeField()
meta = {
"collection": "classifier_title",
"indexes": [("user_id", "feed_id"), "feed_id", ("user_id", "social_user_id"), "social_user_id"],
"allow_inheritance": False,
}
def __str__(self):
user = User.objects.get(pk=self.user_id)
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.title[:30])
class MClassifierAuthor(mongo.Document):
user_id = mongo.IntField(unique_with=("feed_id", "social_user_id", "author"))
feed_id = mongo.IntField()
social_user_id = mongo.IntField()
author = mongo.StringField(max_length=255)
score = mongo.IntField()
creation_date = mongo.DateTimeField()
meta = {
"collection": "classifier_author",
"indexes": [("user_id", "feed_id"), "feed_id", ("user_id", "social_user_id"), "social_user_id"],
"allow_inheritance": False,
}
def __str__(self):
user = User.objects.get(pk=self.user_id)
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.author[:30])
class MClassifierTag(mongo.Document):
user_id = mongo.IntField(unique_with=("feed_id", "social_user_id", "tag"))
feed_id = mongo.IntField()
social_user_id = mongo.IntField()
tag = mongo.StringField(max_length=255)
score = mongo.IntField()
creation_date = mongo.DateTimeField()
meta = {
"collection": "classifier_tag",
"indexes": [("user_id", "feed_id"), "feed_id", ("user_id", "social_user_id"), "social_user_id"],
"allow_inheritance": False,
}
def __str__(self):
user = User.objects.get(pk=self.user_id)
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, self.tag[:30])
class MClassifierFeed(mongo.Document):
user_id = mongo.IntField(unique_with=("feed_id", "social_user_id"))
feed_id = mongo.IntField()
social_user_id = mongo.IntField()
score = mongo.IntField()
creation_date = mongo.DateTimeField()
meta = {
"collection": "classifier_feed",
"indexes": [("user_id", "feed_id"), "feed_id", ("user_id", "social_user_id"), "social_user_id"],
"allow_inheritance": False,
}
def __str__(self):
user = User.objects.get(pk=self.user_id)
if self.feed_id:
feed = Feed.get_by_id(self.feed_id)
else:
feed = User.objects.get(pk=self.social_user_id)
return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, feed)
def compute_story_score(story, classifier_titles, classifier_authors, classifier_tags, classifier_feeds):
intelligence = {
"feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]),
"author": apply_classifier_authors(classifier_authors, story),
"tags": apply_classifier_tags(classifier_tags, story),
"title": apply_classifier_titles(classifier_titles, story),
}
score = 0
score_max = max(intelligence["title"], intelligence["author"], intelligence["tags"])
score_min = min(intelligence["title"], intelligence["author"], intelligence["tags"])
if score_max > 0:
score = score_max
elif score_min < 0:
score = score_min
if score == 0:
score = intelligence["feed"]
return score
def apply_classifier_titles(classifiers, story):
score = 0
for classifier in classifiers:
if classifier.feed_id != story["story_feed_id"]:
continue
if classifier.title.lower() in story["story_title"].lower():
# print 'Titles: (%s) %s -- %s' % (classifier.title in story['story_title'], classifier.title, story['story_title'])
score = classifier.score
if score > 0:
return score
return score
def apply_classifier_authors(classifiers, story):
score = 0
for classifier in classifiers:
if classifier.feed_id != story["story_feed_id"]:
continue
if story.get("story_authors") and classifier.author == story.get("story_authors"):
# print 'Authors: %s -- %s' % (classifier.author, story['story_authors'])
score = classifier.score
if score > 0:
return classifier.score
return score
def apply_classifier_tags(classifiers, story):
score = 0
for classifier in classifiers:
if classifier.feed_id != story["story_feed_id"]:
continue
if story["story_tags"] and classifier.tag in story["story_tags"]:
# print 'Tags: (%s-%s) %s -- %s' % (classifier.tag in story['story_tags'], classifier.score, classifier.tag, story['story_tags'])
score = classifier.score
if score > 0:
return classifier.score
return score
def apply_classifier_feeds(classifiers, feed, social_user_ids=None):
if not feed and not social_user_ids:
return 0
feed_id = None
if feed:
feed_id = feed if isinstance(feed, int) else feed.pk
if social_user_ids and not isinstance(social_user_ids, list):
social_user_ids = [social_user_ids]
for classifier in classifiers:
if classifier.feed_id == feed_id:
# print 'Feeds: %s -- %s' % (classifier.feed_id, feed.pk)
return classifier.score
if social_user_ids and not classifier.feed_id and classifier.social_user_id in social_user_ids:
return classifier.score
return 0
def get_classifiers_for_user(
user,
feed_id=None,
social_user_id=None,
classifier_feeds=None,
classifier_authors=None,
classifier_titles=None,
classifier_tags=None,
):
params = dict(user_id=user.pk)
if isinstance(feed_id, list):
params["feed_id__in"] = feed_id
elif feed_id:
params["feed_id"] = feed_id
if social_user_id:
if isinstance(social_user_id, str):
social_user_id = int(social_user_id.replace("social:", ""))
params["social_user_id"] = social_user_id
if classifier_authors is None:
classifier_authors = list(MClassifierAuthor.objects(**params))
if classifier_titles is None:
classifier_titles = list(MClassifierTitle.objects(**params))
if classifier_tags is None:
classifier_tags = list(MClassifierTag.objects(**params))
if classifier_feeds is None:
if not social_user_id and feed_id:
params["social_user_id"] = 0
classifier_feeds = list(MClassifierFeed.objects(**params))
feeds = []
for f in classifier_feeds:
if f.social_user_id and not f.feed_id:
feeds.append(("social:%s" % f.social_user_id, f.score))
else:
feeds.append((f.feed_id, f.score))
payload = {
"feeds": dict(feeds),
"authors": dict([(a.author, a.score) for a in classifier_authors]),
"titles": dict([(t.title, t.score) for t in classifier_titles]),
"tags": dict([(t.tag, t.score) for t in classifier_tags]),
}
return payload
def sort_classifiers_by_feed(
user,
feed_ids=None,
classifier_feeds=None,
classifier_authors=None,
classifier_titles=None,
classifier_tags=None,
):
def sort_by_feed(classifiers):
feed_classifiers = defaultdict(list)
for classifier in classifiers:
feed_classifiers[classifier.feed_id].append(classifier)
return feed_classifiers
classifiers = {}
if feed_ids:
classifier_feeds = sort_by_feed(classifier_feeds)
classifier_authors = sort_by_feed(classifier_authors)
classifier_titles = sort_by_feed(classifier_titles)
classifier_tags = sort_by_feed(classifier_tags)
for feed_id in feed_ids:
classifiers[feed_id] = get_classifiers_for_user(
user,
feed_id=feed_id,
classifier_feeds=classifier_feeds[feed_id],
classifier_authors=classifier_authors[feed_id],
classifier_titles=classifier_titles[feed_id],
classifier_tags=classifier_tags[feed_id],
)
return classifiers