NewsBlur/apps/reader/models.py
2010-10-13 18:39:49 -04:00

297 lines
No EOL
13 KiB
Python

import datetime
import mongoengine as mongo
from utils import log as logging
from utils import json
from django.db import models
from django.conf import settings
from django.contrib.auth.models import User
from django.core.cache import cache
from apps.rss_feeds.models import Feed, Story, MStory
from apps.analyzer.models import MClassifierFeed, MClassifierAuthor, MClassifierTag, MClassifierTitle
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
class UserSubscription(models.Model):
"""
A feed which a user has subscrubed to. Carries all of the cached information
about the subscription, including unread counts of the three primary scores.
Also has a dirty flag (needs_unread_recalc) which means that the unread counts
are not accurate and need to be calculated with `self.calculate_feed_scores()`.
"""
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
user = models.ForeignKey(User, related_name='subscriptions')
feed = models.ForeignKey(Feed, related_name='subscribers')
active = models.BooleanField(default=False)
last_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
mark_read_date = models.DateTimeField(default=UNREAD_CUTOFF)
unread_count_neutral = models.IntegerField(default=0)
unread_count_positive = models.IntegerField(default=0)
unread_count_negative = models.IntegerField(default=0)
unread_count_updated = models.DateTimeField(default=datetime.datetime.now)
needs_unread_recalc = models.BooleanField(default=False)
feed_opens = models.IntegerField(default=0)
is_trained = models.BooleanField(default=False)
def __unicode__(self):
return '[' + self.feed.feed_title + '] '
def mark_feed_read(self):
now = datetime.datetime.utcnow()
if MStory.objects(story_feed_id=self.feed.pk).first():
latest_story_date = MStory.objects(story_feed_id=self.feed.pk).order_by('-story_date').only('story_date')[0]['story_date']\
+ datetime.timedelta(minutes=1)
else:
latest_story_date = now
self.last_read_date = latest_story_date
self.mark_read_date = latest_story_date
self.unread_count_negative = 0
self.unread_count_positive = 0
self.unread_count_neutral = 0
self.unread_count_updated = latest_story_date
self.needs_unread_recalc = False
self.save()
def calculate_feed_scores(self, silent=False, stories_db=None):
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
now = datetime.datetime.utcnow()
if self.user.profile.last_seen_on < UNREAD_CUTOFF:
# if not silent:
# logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
return
if not self.feed.fetched_once:
if not silent:
logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
self.needs_unread_recalc = False
self.save()
return
if not silent:
logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
feed_scores = dict(negative=0, neutral=0, positive=0)
# Two weeks in age. If mark_read_date is older, mark old stories as read.
date_delta = UNREAD_CUTOFF
if date_delta < self.mark_read_date:
date_delta = self.mark_read_date
else:
self.mark_read_date = date_delta
read_stories = MUserStory.objects(user_id=self.user.pk,
feed_id=self.feed.pk,
read_date__gte=self.mark_read_date)
# if not silent:
# logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now))
read_stories_ids = []
for us in read_stories:
if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
read_stories_ids.append(us.story.story_guid)
elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
story_date__gte=date_delta)
# if not silent:
# logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now))
unread_stories_db = []
for story in stories_db:
if story.story_date < date_delta:
continue
if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
unread_stories_db.append(story)
elif isinstance(story.id, unicode) and story.id not in read_stories_ids:
unread_stories_db.append(story)
stories = self.feed.format_stories(unread_stories_db)
# if not silent:
# logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now))
classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk)
classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk)
classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk)
classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk)
# if not silent:
# logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
scores = {
'feed': apply_classifier_feeds(classifier_feeds, self.feed),
}
for story in stories:
classifier_authors.rewind()
classifier_tags.rewind()
classifier_titles.rewind()
scores.update({
'author': apply_classifier_authors(classifier_authors, story),
'tags': apply_classifier_tags(classifier_tags, story),
'title': apply_classifier_titles(classifier_titles, story),
})
max_score = max(scores['author'], scores['tags'], scores['title'])
min_score = min(scores['author'], scores['tags'], scores['title'])
if max_score > 0:
feed_scores['positive'] += 1
elif min_score < 0:
feed_scores['negative'] += 1
else:
if scores['feed'] > 0:
feed_scores['positive'] += 1
elif scores['feed'] < 0:
feed_scores['negative'] += 1
else:
feed_scores['neutral'] += 1
# if not silent:
# logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now))
self.unread_count_positive = feed_scores['positive']
self.unread_count_neutral = feed_scores['neutral']
self.unread_count_negative = feed_scores['negative']
self.needs_unread_recalc = False
self.save()
if (self.unread_count_positive == 0 and
self.unread_count_neutral == 0):
self.mark_feed_read()
cache.delete('usersub:%s' % self.user.id)
return
class Meta:
unique_together = ("user", "feed")
class UserStory(models.Model):
"""
Stories read by the user. These are deleted as the mark_read_date for the
UserSubscription passes the UserStory date.
"""
user = models.ForeignKey(User)
feed = models.ForeignKey(Feed)
story = models.ForeignKey(Story)
read_date = models.DateTimeField(auto_now=True)
opinion = models.IntegerField(default=0)
def __unicode__(self):
return ('[' + self.feed.feed_title + '] '
+ self.story.story_title)
class Meta:
verbose_name_plural = "user stories"
verbose_name = "user story"
unique_together = ("user", "feed", "story")
class MUserStory(mongo.Document):
"""
Stories read by the user. These are deleted as the mark_read_date for the
UserSubscription passes the UserStory date.
"""
user_id = mongo.IntField()
feed_id = mongo.IntField()
read_date = mongo.DateTimeField()
story = mongo.ReferenceField(MStory, unique_with=('user_id', 'feed_id'))
meta = {
'collection': 'userstories',
'indexes': ['feed_id', ('user_id', 'feed_id')],
'allow_inheritance': False,
}
@classmethod
def delete_old_stories(cls, feed_id):
UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
MUserStory.objects(feed_id=feed_id, read_date__lte=UNREAD_CUTOFF).delete()
class UserSubscriptionFolders(models.Model):
"""
A JSON list of folders and feeds for while a user has subscribed. The list
is a recursive descent of feeds and folders in folders. Used to layout
the feeds and folders in the Reader's feed navigation pane.
"""
user = models.ForeignKey(User)
folders = models.TextField(default="[]")
def __unicode__(self):
return "[%s]: %s" % (self.user, len(self.folders),)
class Meta:
verbose_name_plural = "folders"
verbose_name = "folder"
def delete_feed(self, feed_id, in_folder):
def _find_feed_in_folders(old_folders, folder_name='', multiples_found=False, deleted=False):
new_folders = []
for k, folder in enumerate(old_folders):
if isinstance(folder, int):
if (folder == feed_id and (
(folder_name != in_folder) or
(folder_name == in_folder and deleted))):
multiples_found = True
logging.info(" ---> [%s] Deleting feed, and a multiple has been found in '%s'" % (self.user, folder_name))
if folder == feed_id and folder_name == in_folder and not deleted:
logging.info(" ---> [%s] Delete feed: %s'th item: %s folders/feeds" % (
self.user, k, len(old_folders)
))
deleted = True
else:
new_folders.append(folder)
elif isinstance(folder, dict):
for f_k, f_v in folder.items():
nf, multiples_found, deleted = _find_feed_in_folders(f_v, f_k, multiples_found, deleted)
new_folders.append({f_k: nf})
return new_folders, multiples_found, deleted
user_sub_folders = json.decode(self.folders)
user_sub_folders, multiples_found, deleted = _find_feed_in_folders(user_sub_folders)
self.folders = json.encode(user_sub_folders)
self.save()
if not multiples_found and deleted:
user_sub = UserSubscription.objects.get(user=self.user, feed=feed_id)
user_sub.delete()
MUserStory.objects(user_id=self.user.pk, feed_id=feed_id).delete()
def delete_folder(self, folder_to_delete, in_folder, feed_ids_in_folder):
def _find_folder_in_folders(old_folders, folder_name, feeds_to_delete):
new_folders = []
for k, folder in enumerate(old_folders):
if isinstance(folder, int):
new_folders.append(folder)
if folder in feeds_to_delete:
feeds_to_delete.remove(folder)
elif isinstance(folder, dict):
for f_k, f_v in folder.items():
if f_k == folder_to_delete and folder_name == in_folder:
logging.info(" ---> [%s] Deleting folder '%s' in '%s': %s" % (self.user, f_k, folder_name, folder))
else:
nf, feeds_to_delete = _find_folder_in_folders(f_v, f_k, feeds_to_delete)
new_folders.append({f_k: nf})
return new_folders, feeds_to_delete
user_sub_folders = json.decode(self.folders)
user_sub_folders, feeds_to_delete = _find_folder_in_folders(user_sub_folders, '', feed_ids_in_folder)
self.folders = json.encode(user_sub_folders)
self.save()
UserSubscription.objects.filter(user=self.user, feed__in=feeds_to_delete).delete()
class Feature(models.Model):
"""
Simple blog-like feature board shown to all users on the home page.
"""
description = models.TextField(default="")
date = models.DateTimeField(default=datetime.datetime.now)
def __unicode__(self):
return "[%s] %s" % (self.date, self.description[:50])
class Meta:
ordering = ["-date"]