NewsBlur-viq/apps/analyzer/tests.py

from django.test.client import Client
from apps.rss_feeds.models import MStory
from django.test import TestCase
from django.core import management
# from apps.analyzer.classifier import FisherClassifier
import nltk
from itertools import groupby
from apps.analyzer.tokenizer import Tokenizer
from vendor.reverend.thomas import Bayes
from apps.analyzer.phrase_filter import PhraseFilter


class QuadgramCollocationFinder(nltk.collocations.AbstractCollocationFinder):
    """A tool for the finding and ranking of quadgram collocations or other association measures. 
    It is often useful to use from_words() rather thanconstructing an instance directly.
    """
    def __init__(self, word_fd, quadgram_fd, trigram_fd, bigram_fd, wildcard_fd):
        """Construct a TrigramCollocationFinder, given FreqDists for appearances of words, bigrams, two words with any word between them,and trigrams."""
        nltk.collocations.AbstractCollocationFinder.__init__(self, word_fd, quadgram_fd)
        self.trigram_fd = trigram_fd
        self.bigram_fd = bigram_fd
        self.wildcard_fd = wildcard_fd
        
    @classmethod
    def from_words(cls, words):
        wfd = nltk.probability.FreqDist()
        qfd = nltk.probability.FreqDist()
        tfd = nltk.probability.FreqDist()
        bfd = nltk.probability.FreqDist()
        wildfd = nltk.probability.FreqDist()
        
        for w1, w2, w3 ,w4 in nltk.util.ingrams(words, 4, pad_right=True):
            wfd.inc(w1)
            if w4 is None:
                continue
            else:
                qfd.inc((w1,w2,w3,w4))
            bfd.inc((w1,w2))
            tfd.inc((w1,w2,w3))
            wildfd.inc((w1,w3,w4))
            wildfd.inc((w1,w2,w4))
            
        return cls(wfd, qfd, tfd, bfd, wildfd)
    
    def score_ngram(self, score_fn, w1, w2, w3, w4):
        n_all = self.word_fd.N()
        n_iiii = self.ngram_fd[(w1, w2, w3, w4)]
        if not n_iiii:
            return
        n_iiix = self.bigram_fd[(w1, w2)]
        n_iixi = self.bigram_fd[(w2, w3)]
        n_ixii = self.bigram_fd[(w3, w4)]
        n_xiii = self.bigram_fd[(w3, w4)]
        n_iixx = self.word_fd[w1]
        n_ixix = self.word_fd[w2]
        n_ixxi = self.word_fd[w3]
        n_ixxx = self.word_fd[w4]
        n_xiix = self.trigram_fd[(w1, w2)]
        n_xixi = self.trigram_fd[(w2, w3)]
        n_xxii = self.trigram_fd[(w3, w4)]
        n_xxxi = self.trigram_fd[(w3, w4)]
        return score_fn(n_iiii,
                        (n_iiix, n_iixi, n_ixii, n_xiii),
                        (n_iixx, n_ixix, n_ixxi, n_ixxx),
                        (n_xiix, n_xixi, n_xxii, n_xxxi),
                        n_all)

    
class CollocationTest(TestCase):
    
    fixtures = ['brownstoner.json']
    
    def setUp(self):
        self.client = Client()
        
    def test_bigrams(self):
        # bigram_measures = nltk.collocations.BigramAssocMeasures()
        trigram_measures = nltk.collocations.TrigramAssocMeasures()

        tokens = [
            'Co-op', 'of', 'the', 'day',
            'House', 'of', 'the', 'day',
            'Condo', 'of', 'the', 'day',
            'Development', 'Watch',
            'Co-op', 'of', 'the', 'day',
        ]
        finder = nltk.collocations.TrigramCollocationFinder.from_words(tokens)
        
        finder.apply_freq_filter(2)
        
        # return the 10 n-grams with the highest PMI
        print(finder.nbest(trigram_measures.pmi, 10))

        titles = [
            'Co-op of the day',
            'Condo of the day',
            'Co-op of the day',
            'House of the day',
            'Development Watch',
            'Streetlevel',
        ]

        tokens = nltk.tokenize.word(' '.join(titles))
        ngrams = nltk.ngrams(tokens, 4)
        d = [key for key, group in groupby(sorted(ngrams)) if len(list(group)) >= 2]
        print(d)

class ClassifierTest(TestCase):
    
    fixtures = ['classifiers.json', 'brownstoner.json']
    
    def setUp(self):
        self.client = Client()
    # 
    # def test_filter(self):
    #     user = User.objects.all()
    #     feed = Feed.objects.all()
    #     
    #     management.call_command('loaddata', 'brownstoner.json', verbosity=0)
    #     response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
    #     management.call_command('loaddata', 'brownstoner2.json', verbosity=0)
    #     response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })
    #     management.call_command('loaddata', 'gothamist1.json', verbosity=0)
    #     response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
    #     management.call_command('loaddata', 'gothamist2.json', verbosity=0)
    #     response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })
    #     
    #     stories = Story.objects.filter(story_feed=feed[1]).order_by('-story_date')[:100]
    #     
    #     phrasefilter = PhraseFilter()
    #     for story in stories:
    #         # print story.story_title, story.id
    #         phrasefilter.run(story.story_title, story.id)
    # 
    #     phrasefilter.pare_phrases()
    #     phrasefilter.print_phrases()
    #     
    def test_train(self):
        # user = User.objects.all()
        # feed = Feed.objects.all()
        
        management.call_command('loaddata', 'brownstoner.json', verbosity=0, commit=False, skip_checks=False)
        management.call_command('refresh_feed', force=1, feed=1, single_threaded=True, daemonize=False, skip_checks=False)
        management.call_command('loaddata', 'brownstoner2.json', verbosity=0, commit=False, skip_checks=False)
        management.call_command('refresh_feed', force=1, feed=1, single_threaded=True, daemonize=False, skip_checks=False)
        
        stories = MStory.objects(story_feed_id=1)[:53]
        
        phrasefilter = PhraseFilter()
        for story in stories:
            # print story.story_title, story.id
            phrasefilter.run(story.story_title, story.id)

        phrasefilter.pare_phrases()
        phrases = phrasefilter.get_phrases()
        print(phrases)
        
        tokenizer = Tokenizer(phrases)
        classifier = Bayes(tokenizer) # FisherClassifier(user[0], feed[0], phrases)
        
        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'House of the Day: 393 Pacific St.')
        classifier.train('good', 'Condo of the Day: 393 Pacific St.')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')
        classifier.train('good', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Development Watch: 393 Pacific St. #3')
        classifier.train('bad', 'Streetlevel: 393 Pacific St. #3')
        
        guess = dict(classifier.guess('Co-op of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)
        
        guess = dict(classifier.guess('House of the Day: 413 Atlantic'))
        self.assertTrue(guess['good'] > .99)
        self.assertTrue('bad' not in guess)
        
        guess = dict(classifier.guess('Development Watch: Yatta'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)

        guess = dict(classifier.guess('Development Watch: 393 Pacific St.'))
        self.assertTrue(guess['bad'] > .7)
        self.assertTrue(guess['good'] < .3)
        
        guess = dict(classifier.guess('Streetlevel: 123 Carlton St.'))
        self.assertTrue(guess['bad'] > .99)
        self.assertTrue('good' not in guess)

        guess = classifier.guess('Extra, Extra')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)
        
        guess = classifier.guess('Nothing doing: 393 Pacific St.')
        self.assertTrue('bad' not in guess)
        self.assertTrue('good' not in guess)
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00			`from django.test.client import Client`
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00			`from apps.rss_feeds.models import MStory`
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00			`from django.test import TestCase`
			`from django.core import management`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`# from apps.analyzer.classifier import FisherClassifier`
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00			`import nltk`
			`from itertools import groupby`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`from apps.analyzer.tokenizer import Tokenizer`
Utils -> vendor 2011-04-11 21:57:45 -04:00			`from vendor.reverend.thomas import Bayes`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00			`from apps.analyzer.phrase_filter import PhraseFilter`
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00
			`class QuadgramCollocationFinder(nltk.collocations.AbstractCollocationFinder):`
			`"""A tool for the finding and ranking of quadgram collocations or other association measures.`
			`It is often useful to use from_words() rather thanconstructing an instance directly.`
			`"""`
			`def __init__(self, word_fd, quadgram_fd, trigram_fd, bigram_fd, wildcard_fd):`
			`"""Construct a TrigramCollocationFinder, given FreqDists for appearances of words, bigrams, two words with any word between them,and trigrams."""`
			`nltk.collocations.AbstractCollocationFinder.__init__(self, word_fd, quadgram_fd)`
			`self.trigram_fd = trigram_fd`
			`self.bigram_fd = bigram_fd`
			`self.wildcard_fd = wildcard_fd`

			`@classmethod`
			`def from_words(cls, words):`
			`wfd = nltk.probability.FreqDist()`
			`qfd = nltk.probability.FreqDist()`
			`tfd = nltk.probability.FreqDist()`
			`bfd = nltk.probability.FreqDist()`
			`wildfd = nltk.probability.FreqDist()`

			`for w1, w2, w3 ,w4 in nltk.util.ingrams(words, 4, pad_right=True):`
			`wfd.inc(w1)`
			`if w4 is None:`
			`continue`
			`else:`
			`qfd.inc((w1,w2,w3,w4))`
			`bfd.inc((w1,w2))`
			`tfd.inc((w1,w2,w3))`
			`wildfd.inc((w1,w3,w4))`
			`wildfd.inc((w1,w2,w4))`

			`return cls(wfd, qfd, tfd, bfd, wildfd)`

			`def score_ngram(self, score_fn, w1, w2, w3, w4):`
			`n_all = self.word_fd.N()`
			`n_iiii = self.ngram_fd[(w1, w2, w3, w4)]`
			`if not n_iiii:`
			`return`
			`n_iiix = self.bigram_fd[(w1, w2)]`
			`n_iixi = self.bigram_fd[(w2, w3)]`
			`n_ixii = self.bigram_fd[(w3, w4)]`
			`n_xiii = self.bigram_fd[(w3, w4)]`
			`n_iixx = self.word_fd[w1]`
			`n_ixix = self.word_fd[w2]`
			`n_ixxi = self.word_fd[w3]`
			`n_ixxx = self.word_fd[w4]`
			`n_xiix = self.trigram_fd[(w1, w2)]`
			`n_xixi = self.trigram_fd[(w2, w3)]`
			`n_xxii = self.trigram_fd[(w3, w4)]`
			`n_xxxi = self.trigram_fd[(w3, w4)]`
			`return score_fn(n_iiii,`
			`(n_iiix, n_iixi, n_ixii, n_xiii),`
			`(n_iixx, n_ixix, n_ixxi, n_ixxx),`
			`(n_xiix, n_xixi, n_xxii, n_xxxi),`
			`n_all)`


			`class CollocationTest(TestCase):`

			`fixtures = ['brownstoner.json']`

			`def setUp(self):`
			`self.client = Client()`

			`def test_bigrams(self):`
			`# bigram_measures = nltk.collocations.BigramAssocMeasures()`
			`trigram_measures = nltk.collocations.TrigramAssocMeasures()`

			`tokens = [`
			`'Co-op', 'of', 'the', 'day',`
			`'House', 'of', 'the', 'day',`
			`'Condo', 'of', 'the', 'day',`
			`'Development', 'Watch',`
			`'Co-op', 'of', 'the', 'day',`
			`]`
			`finder = nltk.collocations.TrigramCollocationFinder.from_words(tokens)`

			`finder.apply_freq_filter(2)`

			`# return the 10 n-grams with the highest PMI`
python2 to python3 conversion for analyzer and push 2020-10-13 22:01:32 +07:00			`print(finder.nbest(trigram_measures.pmi, 10))`
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00
			`titles = [`
			`'Co-op of the day',`
			`'Condo of the day',`
			`'Co-op of the day',`
			`'House of the day',`
			`'Development Watch',`
			`'Streetlevel',`
			`]`

			`tokens = nltk.tokenize.word(' '.join(titles))`
			`ngrams = nltk.ngrams(tokens, 4)`
			`d = [key for key, group in groupby(sorted(ngrams)) if len(list(group)) >= 2]`
python2 to python3 conversion for analyzer and push 2020-10-13 22:01:32 +07:00			`print(d)`
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00			`class ClassifierTest(TestCase):`

			`fixtures = ['classifiers.json', 'brownstoner.json']`

			`def setUp(self):`
			`self.client = Client()`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`#`
			`# def test_filter(self):`
			`# user = User.objects.all()`
			`# feed = Feed.objects.all()`
			`#`
			`# management.call_command('loaddata', 'brownstoner.json', verbosity=0)`
			`# response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })`
			`# management.call_command('loaddata', 'brownstoner2.json', verbosity=0)`
			`# response = self.client.get('/reader/refresh_feed', { "feed_id": 1, "force": True })`
			`# management.call_command('loaddata', 'gothamist1.json', verbosity=0)`
			`# response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })`
			`# management.call_command('loaddata', 'gothamist2.json', verbosity=0)`
			`# response = self.client.get('/reader/refresh_feed', { "feed_id": 4, "force": True })`
			`#`
			`# stories = Story.objects.filter(story_feed=feed[1]).order_by('-story_date')[:100]`
			`#`
			`# phrasefilter = PhraseFilter()`
			`# for story in stories:`
			`# # print story.story_title, story.id`
			`# phrasefilter.run(story.story_title, story.id)`
			`#`
			`# phrasefilter.pare_phrases()`
			`# phrasefilter.print_phrases()`
			`#`
			`def test_train(self):`
Trying out a new quadrigram collocator on for size. Needs some tokenizer lovin'. 2011-09-05 22:06:24 -07:00			`# user = User.objects.all()`
			`# feed = Feed.objects.all()`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
Merge branch 'django1.7' into django1.8 * django1.7: (102 commits) Beginning fix of rss_feeds unit tests. Fixing unit test for profile app and signup. Remove highlights count when it reaches zero. Android v10.1b1. Stubbing in profile tests. Adding nginx.local.conf Adding original text and original story to API docs. #1282 Adding feed to root folder #1319 In app and external browser options Adding a smarter wakeup for real-time to handle cases where a laptop is re-opened but real-time is not immediately reestablished. #1348 (scroll indicators theme) #1344 (search loses focus) #1335 Auto theme option for OS level dark mode Fixing signup flow. #1347 Show pager with stories after using the intel trainer and refreshing #1272 Load HTML in comments New icon for Infrequent Site Stories. Allowing selection in private notes. Autoresizing private notes field. For #1035: Adding private notes to saved stories. ... 2020-08-10 17:51:25 -04:00			`management.call_command('loaddata', 'brownstoner.json', verbosity=0, commit=False, skip_checks=False)`
add skip_checks kwarg to call_command to maintain old functionality that was lost in django upgrade 2020-06-08 06:04:07 -04:00			`management.call_command('refresh_feed', force=1, feed=1, single_threaded=True, daemonize=False, skip_checks=False)`
Merge branch 'django1.7' into django1.8 * django1.7: (102 commits) Beginning fix of rss_feeds unit tests. Fixing unit test for profile app and signup. Remove highlights count when it reaches zero. Android v10.1b1. Stubbing in profile tests. Adding nginx.local.conf Adding original text and original story to API docs. #1282 Adding feed to root folder #1319 In app and external browser options Adding a smarter wakeup for real-time to handle cases where a laptop is re-opened but real-time is not immediately reestablished. #1348 (scroll indicators theme) #1344 (search loses focus) #1335 Auto theme option for OS level dark mode Fixing signup flow. #1347 Show pager with stories after using the intel trainer and refreshing #1272 Load HTML in comments New icon for Infrequent Site Stories. Allowing selection in private notes. Autoresizing private notes field. For #1035: Adding private notes to saved stories. ... 2020-08-10 17:51:25 -04:00			`management.call_command('loaddata', 'brownstoner2.json', verbosity=0, commit=False, skip_checks=False)`
add skip_checks kwarg to call_command to maintain old functionality that was lost in django upgrade 2020-06-08 06:04:07 -04:00			`management.call_command('refresh_feed', force=1, feed=1, single_threaded=True, daemonize=False, skip_checks=False)`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
Paging from 1 and not 0. Moving the JS assetmodel to use new paging and API endpoints. 2011-04-21 10:44:50 -04:00			`stories = MStory.objects(story_feed_id=1)[:53]`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
			`phrasefilter = PhraseFilter()`
			`for story in stories:`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`# print story.story_title, story.id`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00			`phrasefilter.run(story.story_title, story.id)`

			`phrasefilter.pare_phrases()`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`phrases = phrasefilter.get_phrases()`
python2 to python3 conversion for analyzer and push 2020-10-13 22:01:32 +07:00			`print(phrases)`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`tokenizer = Tokenizer(phrases)`
			`classifier = Bayes(tokenizer) # FisherClassifier(user[0], feed[0], phrases)`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00			`classifier.train('good', 'House of the Day: 393 Pacific St.')`
			`classifier.train('good', 'House of the Day: 393 Pacific St.')`
			`classifier.train('good', 'Condo of the Day: 393 Pacific St.')`
			`classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')`
			`classifier.train('good', 'Co-op of the Day: 393 Pacific St. #3')`
			`classifier.train('good', 'Development Watch: 393 Pacific St. #3')`
			`classifier.train('bad', 'Development Watch: 393 Pacific St. #3')`
			`classifier.train('bad', 'Development Watch: 393 Pacific St. #3')`
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`classifier.train('bad', 'Development Watch: 393 Pacific St. #3')`
			`classifier.train('bad', 'Streetlevel: 393 Pacific St. #3')`
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = dict(classifier.guess('Co-op of the Day: 413 Atlantic'))`
			`self.assertTrue(guess['good'] > .99)`
			`self.assertTrue('bad' not in guess)`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = dict(classifier.guess('House of the Day: 413 Atlantic'))`
			`self.assertTrue(guess['good'] > .99)`
			`self.assertTrue('bad' not in guess)`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = dict(classifier.guess('Development Watch: Yatta'))`
			`self.assertTrue(guess['bad'] > .7)`
			`self.assertTrue(guess['good'] < .3)`
Adding a tokenizer to the classifier, as well as using Divmod's Reverend Bayesian classifier. 2009-12-18 18:29:34 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = dict(classifier.guess('Development Watch: 393 Pacific St.'))`
			`self.assertTrue(guess['bad'] > .7)`
			`self.assertTrue(guess['good'] < .3)`

			`guess = dict(classifier.guess('Streetlevel: 123 Carlton St.'))`
			`self.assertTrue(guess['bad'] > .99)`
			`self.assertTrue('good' not in guess)`
Adding Phrase Filter to Analyzer. Just need to hook up the phrase filter to the story classifier and then save the results to be used for later processing. 2009-11-15 18:57:53 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = classifier.guess('Extra, Extra')`
			`self.assertTrue('bad' not in guess)`
			`self.assertTrue('good' not in guess)`
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00
All tests fixed and now pass. 2010-04-09 18:30:25 -04:00			`guess = classifier.guess('Nothing doing: 393 Pacific St.')`
			`self.assertTrue('bad' not in guess)`
			`self.assertTrue('good' not in guess)`
Naive Fisher Bayesian Classifier. Stubbed in some boilerplate for making the analyzer do its job. Still have to connect it to feeds and train on real data, but it's looking good as a starting point. 2009-11-03 03:52:03 +00:00