Speeding up score calculation.

This commit is contained in:
Samuel Clay 2010-04-05 02:42:43 -04:00
parent 5cf7c3798b
commit fc6672bab0
2 changed files with 20 additions and 10 deletions

View file

@ -3,7 +3,7 @@ from django.contrib.auth.models import User
import datetime import datetime
import random import random
from django.core.cache import cache from django.core.cache import cache
from apps.rss_feeds.models import Feed, Story from apps.rss_feeds.models import Feed, Story, Tag
from utils import feedparser, object_manager, json from utils import feedparser, object_manager, json
from apps.analyzer.models import ClassifierFeed, ClassifierAuthor, ClassifierTag, ClassifierTitle from apps.analyzer.models import ClassifierFeed, ClassifierAuthor, ClassifierTag, ClassifierTitle
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
@ -101,9 +101,15 @@ class UserSubscription(models.Model):
feed=self.feed, feed=self.feed,
story__story_date__gte=date_delta) story__story_date__gte=date_delta)
read_stories_ids = [rs.story.id for rs in read_stories] read_stories_ids = [rs.story.id for rs in read_stories]
stories_db = Story.objects.filter(story_feed=self.feed, # print "Read Stories IDs: %s" % read_stories_ids
story_date__gte=date_delta)\ # print "Date delta: %s" % date_delta
.exclude(id__in=read_stories_ids) from django.db import connection
connection.queries = []
stories_db = Story.objects.select_related('story_author')\
.exclude(id__in=read_stories_ids)\
.filter(story_feed=self.feed,
story_date__gte=date_delta)
# print "Stories_db: %s" % stories_db.count()
stories = self.feed.format_stories(stories_db) stories = self.feed.format_stories(stories_db)
# print ' Stories: %s\t' % stories_db.count(), # print ' Stories: %s\t' % stories_db.count(),
# if read_stories.count(): print '(%s read)' % (read_stories.count()) # if read_stories.count(): print '(%s read)' % (read_stories.count())

View file

@ -4,7 +4,7 @@ from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.core import serializers from django.core import serializers
from django.core.cache import cache from django.core.cache import cache
from utils import feedparser, object_manager from utils import feedparser, object_manager, json
from utils.dateutil.parser import parse as dateutil_parse from utils.dateutil.parser import parse as dateutil_parse
from utils.feed_functions import encode, prints, mtime, levenshtein_distance from utils.feed_functions import encode, prints, mtime, levenshtein_distance
import time, datetime, random import time, datetime, random
@ -104,7 +104,8 @@ class Feed(models.Model):
story_content = story_content, story_content = story_content,
story_author = story_author, story_author = story_author,
story_permalink = story.get('link'), story_permalink = story.get('link'),
story_guid = story.get('guid') or story.get('id') or story.get('link') story_guid = story.get('guid') or story.get('id') or story.get('link'),
story_tags = json.encode([t.name for t in story_tags])
) )
try: try:
ret_values[ENTRY_NEW] += 1 ret_values[ENTRY_NEW] += 1
@ -138,7 +139,8 @@ class Feed(models.Model):
story_original_content = original_content, story_original_content = original_content,
story_author = story_author, story_author = story_author,
story_permalink = story.get('link'), story_permalink = story.get('link'),
story_guid = story.get('guid') or story.get('id') or story.get('link') story_guid = story.get('guid') or story.get('id') or story.get('link'),
story_tags = json.encode([t.name for t in story_tags])
) )
s.tags.clear() s.tags.clear()
[s.tags.add(tcat) for tcat in story_tags] [s.tags.add(tcat) for tcat in story_tags]
@ -194,11 +196,12 @@ class Feed(models.Model):
def format_stories(self, stories_db): def format_stories(self, stories_db):
stories = [] stories = []
# from django.db import connection
# print "Formatting Stories: %s" % stories_db.count()
for story_db in stories_db: for story_db in stories_db:
story = {} story = {}
story_tags = story_db.tags.all() story_tags = story_db.tags.all()
story['story_tags'] = [tag.name for tag in story_tags] story['story_tags'] = story_db.story_tags
story['short_parsed_date'] = format_story_link_date__short(story_db.story_date) story['short_parsed_date'] = format_story_link_date__short(story_db.story_date)
story['long_parsed_date'] = format_story_link_date__long(story_db.story_date) story['long_parsed_date'] = format_story_link_date__long(story_db.story_date)
story['story_date'] = story_db.story_date story['story_date'] = story_db.story_date
@ -206,7 +209,7 @@ class Feed(models.Model):
story['story_title'] = story_db.story_title story['story_title'] = story_db.story_title
story['story_content'] = story_db.story_content story['story_content'] = story_db.story_content
story['story_permalink'] = story_db.story_permalink story['story_permalink'] = story_db.story_permalink
story['story_feed_id'] = story_db.story_feed.id story['story_feed_id'] = self
story['id'] = story_db.id story['id'] = story_db.id
stories.append(story) stories.append(story)
@ -325,6 +328,7 @@ class Story(models.Model):
story_permalink = models.CharField(max_length=1000) story_permalink = models.CharField(max_length=1000)
story_guid = models.CharField(max_length=1000) story_guid = models.CharField(max_length=1000)
story_past_trim_date = models.BooleanField(default=False) story_past_trim_date = models.BooleanField(default=False)
story_tags = models.CharField(max_length=1000)
tags = models.ManyToManyField(Tag) tags = models.ManyToManyField(Tag)
def __unicode__(self): def __unicode__(self):