2009-11-03 03:52:03 +00:00
|
|
|
from utils import json
|
|
|
|
from django.test.client import Client
|
|
|
|
from django.contrib.auth.models import User
|
2009-11-15 18:57:53 +00:00
|
|
|
from apps.rss_feeds.models import Feed, Story
|
2009-11-03 03:52:03 +00:00
|
|
|
from django.test import TestCase
|
|
|
|
from django.core import management
|
|
|
|
from pprint import pprint
|
|
|
|
from apps.analyzer.classifier import FisherClassifier
|
2009-11-15 18:57:53 +00:00
|
|
|
from apps.analyzer.phrase_filter import PhraseFilter
|
2009-11-03 03:52:03 +00:00
|
|
|
|
|
|
|
class ClassifierTest(TestCase):
|
|
|
|
|
|
|
|
fixtures = ['classifiers.json', 'brownstoner.json']
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.client = Client()
|
|
|
|
|
2009-11-15 18:57:53 +00:00
|
|
|
def test_filter(self):
|
|
|
|
user = User.objects.all()
|
|
|
|
feed = Feed.objects.all()
|
|
|
|
|
|
|
|
management.call_command('loaddata', 'brownstoner.json', verbosity=0)
|
|
|
|
response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
|
|
|
|
management.call_command('loaddata', 'brownstoner2.json', verbosity=0)
|
|
|
|
response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
|
|
|
|
management.call_command('loaddata', 'gothamist1.json', verbosity=0)
|
|
|
|
response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
|
|
|
|
management.call_command('loaddata', 'gothamist2.json', verbosity=0)
|
|
|
|
response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
|
|
|
|
|
|
|
|
stories = Story.objects.filter(story_feed=feed[1]).order_by('-story_date')[:100]
|
|
|
|
|
|
|
|
phrasefilter = PhraseFilter()
|
|
|
|
for story in stories:
|
|
|
|
print story.story_title, story.id
|
|
|
|
phrasefilter.run(story.story_title, story.id)
|
|
|
|
|
|
|
|
phrasefilter.pare_phrases()
|
|
|
|
phrasefilter.print_phrases()
|
|
|
|
|
2009-11-03 03:52:03 +00:00
|
|
|
def test_train(self):
|
|
|
|
user = User.objects.all()
|
|
|
|
feed = Feed.objects.all()
|
2009-11-15 18:57:53 +00:00
|
|
|
|
|
|
|
management.call_command('loaddata', 'brownstoner.json', verbosity=0)
|
|
|
|
response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
|
|
|
|
|
|
|
|
phrases = [
|
|
|
|
"House of the Day",
|
|
|
|
"of the Day",
|
|
|
|
"Coop of the Day",
|
|
|
|
"Condo of the Day",
|
|
|
|
"Development Watch",
|
|
|
|
"Atlantic Yards",
|
|
|
|
"Streetlevel"
|
|
|
|
]
|
|
|
|
|
|
|
|
classifier = FisherClassifier(user[0], feed[0], phrases)
|
|
|
|
|
|
|
|
stories = Story.objects.filter(story_feed=feed[0]).order_by('-story_date')[:20]
|
|
|
|
|
2009-11-03 03:52:03 +00:00
|
|
|
|
|
|
|
classifier.train('House of the Day: 393 Pacific St.', 'good')
|
2009-11-15 18:57:53 +00:00
|
|
|
classifier.train('House of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Condo of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Condo of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Condo of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Condo of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Condo of the Day: 393 Pacific St.', 'good')
|
|
|
|
classifier.train('Coop of the Day: 393 Pacific St. #3', 'good')
|
2009-11-03 03:52:03 +00:00
|
|
|
classifier.train('Coop of the Day: 393 Pacific St. #3', 'good')
|
|
|
|
classifier.train('Development Watch: 393 Pacific St. #3', 'bad')
|
2009-11-15 18:57:53 +00:00
|
|
|
classifier.train('Development Watch: 393 Pacific St. #3', 'bad')
|
|
|
|
classifier.train('Development Watch: 393 Pacific St. #3', 'bad')
|
|
|
|
# classifier.train('Streetlevel: 393 Pacific St. #3', 'good')
|
2009-11-03 03:52:03 +00:00
|
|
|
|
2009-11-15 18:57:53 +00:00
|
|
|
c1 = classifier.classify('Condo of the Day: 413 Atlantic')
|
2009-11-03 03:52:03 +00:00
|
|
|
self.assertEquals(c1.category, "good")
|
2009-11-15 18:57:53 +00:00
|
|
|
c1_prob = classifier.fisher_probability('Condo of the Day: 413 Atlantic', 'good')
|
|
|
|
print c1_prob
|
|
|
|
|
|
|
|
c2 = classifier.classify('Development Watch: Yatta')
|
|
|
|
self.assertEquals(c2.category, "bad")
|
|
|
|
c2 = classifier.classify('Development Watch: 393 Pacific St.')
|
|
|
|
self.assertEquals(c2.category, "bad")
|
|
|
|
c2_prob = classifier.fisher_probability('Development Watch: Yatta', 'good')
|
|
|
|
self.assertTrue(c2_prob < .5)
|
|
|
|
print c2_prob
|
|
|
|
|
|
|
|
c4 = classifier.classify('Nothing doing: 393 Pacific St.')
|
|
|
|
c4_prob = classifier.fisher_probability('Nothing doing: 393 Pacific St.', 'good')
|
|
|
|
print c4_prob
|
|
|
|
self.assertEquals(c4.category, "good")
|
|
|
|
self.assertTrue(c4_prob == .5)
|
2009-11-03 03:52:03 +00:00
|
|
|
|
|
|
|
|