NewsBlur/apps/analyzer/tests.py

97 lines
No EOL
4 KiB
Python

from utils import json
from django.test.client import Client
from django.contrib.auth.models import User
from apps.rss_feeds.models import Feed, Story
from django.test import TestCase
from django.core import management
from pprint import pprint
# from apps.analyzer.classifier import FisherClassifier
from apps.analyzer.tokenizer import Tokenizer
from utils.reverend.thomas import Bayes
from apps.analyzer.phrase_filter import PhraseFilter
class ClassifierTest(TestCase):
fixtures = ['classifiers.json', 'brownstoner.json']
def setUp(self):
self.client = Client()
#
# def test_filter(self):
# user = User.objects.all()
# feed = Feed.objects.all()
#
# management.call_command('loaddata', 'brownstoner.json', verbosity=0)
# response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
# management.call_command('loaddata', 'brownstoner2.json', verbosity=0)
# response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
# management.call_command('loaddata', 'gothamist1.json', verbosity=0)
# response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
# management.call_command('loaddata', 'gothamist2.json', verbosity=0)
# response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
#
# stories = Story.objects.filter(story_feed=feed[1]).order_by('-story_date')[:100]
#
# phrasefilter = PhraseFilter()
# for story in stories:
# # print story.story_title, story.id
# phrasefilter.run(story.story_title, story.id)
#
# phrasefilter.pare_phrases()
# phrasefilter.print_phrases()
#
def test_train(self):
user = User.objects.all()
feed = Feed.objects.all()
management.call_command('loaddata', 'brownstoner.json', verbosity=0)
response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
management.call_command('loaddata', 'brownstoner2.json', verbosity=0)
response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
stories = Story.objects.filter(story_feed=1)[:53]
phrasefilter = PhraseFilter()
for story in stories:
# print story.story_title, story.id
phrasefilter.run(story.story_title, story.id)
phrasefilter.pare_phrases()
phrases = phrasefilter.get_phrases()
print phrases
tokenizer = Tokenizer(phrases)
classifier = Bayes(tokenizer) # FisherClassifier(user[0], feed[0], phrases)
classifier.train('good', 'House of the Day: 393 Pacific St.')
classifier.train('good', 'House of the Day: 393 Pacific St.')
classifier.train('good', 'Condo of the Day: 393 Pacific St.')
classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
classifier.train('good', 'Development Watch: 393 Pacific St. #3')
classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
# classifier.train('Development Watch: 393 Pacific St. #3', 'bad')
# classifier.train('Streetlevel: 393 Pacific St. #3', 'good')
c1 = classifier.guess('Co-op of the Day: 413 Atlantic')
self.assertEquals(c1[0][0], "good")
print c1
c1 = classifier.guess('House of the Day: 413 Atlantic')
self.assertEquals(c1[0][0], "good")
print c1
c2 = classifier.guess('Development Watch: Yatta')
print c2
self.assertEquals(c2[0][0], "bad")
c2 = classifier.guess('Development Watch: 393 Pacific St.')
print c2
c3 = classifier.guess('Extra, Extra')
print c3
c4 = classifier.guess('Nothing doing: 393 Pacific St.')
print c4