Utils -> vendor

This commit is contained in:
Samuel Clay 2011-04-11 21:57:45 -04:00
parent 795573ccc2
commit 8e2936fc1b
145 changed files with 34 additions and 1699 deletions

View file

@ -7,7 +7,7 @@ from django.core import management
from pprint import pprint
# from apps.analyzer.classifier import FisherClassifier
from apps.analyzer.tokenizer import Tokenizer
from utils.reverend.thomas import Bayes
from vendor.reverend.thomas import Bayes
from apps.analyzer.phrase_filter import PhraseFilter
class ClassifierTest(TestCase):

View file

@ -7,7 +7,7 @@ import datetime
from StringIO import StringIO
from lxml import etree
from utils import json_functions as json, urlnorm
import utils.opml as opml
import vendor.opml as opml
from utils import log as logging
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
# import minidom

View file

@ -11,7 +11,7 @@ from paypal.standard.ipn.signals import subscription_signup
from apps.rss_feeds.tasks import NewFeeds
from celery.task import Task
from utils import log as logging
from utils.timezones.fields import TimeZoneField
from vendor.timezones.fields import TimeZoneField
from utils.user_functions import generate_secret_token
class Profile(models.Model):

View file

@ -36,7 +36,7 @@ from utils.story_functions import format_story_link_date__long
from utils.story_functions import bunch
from utils.story_functions import story_score
from utils import log as logging
from utils.timezones.utilities import localtime_for_timezone
from vendor.timezones.utilities import localtime_for_timezone
SINGLE_DAY = 60*60*24

View file

@ -7,6 +7,7 @@ import datetime
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option("-f", "--feed", default=None),
make_option("-a", "--all", default=False, action='store_true'),
make_option('-V', '--verbose', action='store_true',
dest='verbose', default=False, help='Verbose output.'),
)
@ -23,6 +24,8 @@ class Command(BaseCommand):
).exclude(
active_subscribers=0
).order_by('?')
if options['all']:
feeds = Feed.objects.all()
Feed.task_feeds(feeds)
# Mistakenly inactive feeds

View file

@ -1,5 +1,5 @@
from celery.task import Task
# from utils import log as logging
from utils import log as logging
class UpdateFeeds(Task):
name = 'update-feeds'
@ -12,9 +12,12 @@ class UpdateFeeds(Task):
feed_pks = [feed_pks]
for feed_pk in feed_pks:
feed = Feed.objects.get(pk=feed_pk)
try:
feed = Feed.objects.get(pk=feed_pk)
feed.update()
except Feed.DoesNotExist:
logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
# logging.debug(' Updating: [%s] %s' % (feed_pks, feed))
feed.update()
class NewFeeds(Task):
name = 'new-feeds'

View file

@ -3038,7 +3038,8 @@
(story.long_parsed_date &&
$.make('span', { className: 'NB-feed-story-date' }, story.long_parsed_date)),
(story.starred_date &&
$.make('span', { className: 'NB-feed-story-starred-date' }, story.starred_date))
$.make('span', { className: 'NB-feed-story-starred-date' }, story.starred_date)),
(!this.model.get_preference('hide_story_changes') && $.make('div', { className: 'NB-feed-story-hide-changes', title: 'Hide story modifications' }))
])
]),
$.make('div', { className: 'NB-feed-story-content' }, story.story_content)

View file

@ -12,6 +12,7 @@ NEWSBLUR_DIR = CURRENT_DIR
TEMPLATE_DIRS = (''.join([CURRENT_DIR, '/templates']),)
MEDIA_ROOT = ''.join([CURRENT_DIR, '/media'])
UTILS_ROOT = ''.join([CURRENT_DIR, '/utils'])
VENDOR_ROOT = ''.join([CURRENT_DIR, '/vendor'])
LOG_FILE = ''.join([CURRENT_DIR, '/logs/newsblur.log'])
IMAGE_MASK = ''.join([CURRENT_DIR, '/media/img/mask.png'])
@ -19,9 +20,10 @@ IMAGE_MASK = ''.join([CURRENT_DIR, '/media/img/mask.png'])
# = PYTHONPATH =
# ==============
UTILS_DIR = ''.join([CURRENT_DIR, '/utils'])
if '/utils' not in ' '.join(sys.path):
sys.path.append(UTILS_DIR)
sys.path.append(UTILS_ROOT)
if '/vendor' not in ' '.join(sys.path):
sys.path.append(VENDOR_ROOT)
# ===================
# = Global Settings =
@ -252,8 +254,9 @@ INSTALLED_APPS = (
'apps.recommendations',
'south',
'utils',
'utils.typogrify',
'utils.paypal.standard.ipn',
'vendor',
'vendor.typogrify',
'vendor.paypal.standard.ipn',
)
if not DEVELOPMENT:

View file

@ -1,6 +1,6 @@
import logging
from django.conf import settings
from utils.colorama import Fore, Back, Style
from vendor.colorama import Fore, Back, Style
import re
def getlogger():

View file

@ -1,17 +0,0 @@
K 25
svn:wc:ra_dav:version-url
V 50
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend
END
thomas.py
K 25
svn:wc:ra_dav:version-url
V 59
/svn/Divmod/!svn/ver/6111/trunk/Reverend/reverend/thomas.py
END
__init__.py
K 25
svn:wc:ra_dav:version-url
V 61
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/__init__.py
END

View file

@ -1,105 +0,0 @@
10
dir
17937
http://divmod.org/svn/Divmod/trunk/Reverend/reverend
http://divmod.org/svn/Divmod
2009-07-03T21:31:34.117160Z
17655
exarkun
866e43f7-fbfc-0310-8f2a-ec88d1da2979
test
dir
guessers
dir
thomas.py
file
2010-04-09T21:05:36.000000Z
f938743a245eb3f0bb190092b37bbc5f
2006-04-14T18:23:46.881754Z
6111
mithrandi
has-props
10468
__init__.py
file
2010-04-09T21:05:36.000000Z
d41d8cd98f00b204e9800998ecf8427e
2005-10-25T19:49:27.727286Z
2573
washort
0
ui
dir

View file

@ -1,5 +0,0 @@
K 14
svn:executable
V 1
*
END

View file

@ -1,324 +0,0 @@
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
# amir@divmod.org. This is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
import operator
import re
import math
from sets import Set
class BayesData(dict):
def __init__(self, name='', pool=None):
self.name = name
self.training = []
self.pool = pool
self.tokenCount = 0
self.trainCount = 0
def trainedOn(self, item):
return item in self.training
def __repr__(self):
return '<BayesDict: %s, %s tokens>' % (self.name, self.tokenCount)
class Bayes(object):
def __init__(self, tokenizer=None, combiner=None, dataClass=None):
if dataClass is None:
self.dataClass = BayesData
else:
self.dataClass = dataClass
self.corpus = self.dataClass('__Corpus__')
self.pools = {}
self.pools['__Corpus__'] = self.corpus
self.trainCount = 0
self.dirty = True
# The tokenizer takes an object and returns
# a list of strings
if tokenizer is None:
self._tokenizer = Tokenizer()
else:
self._tokenizer = tokenizer
# The combiner combines probabilities
if combiner is None:
self.combiner = self.robinson
else:
self.combiner = combiner
def commit(self):
self.save()
def newPool(self, poolName):
"""Create a new pool, without actually doing any
training.
"""
self.dirty = True # not always true, but it's simple
return self.pools.setdefault(poolName, self.dataClass(poolName))
def removePool(self, poolName):
del(self.pools[poolName])
self.dirty = True
def renamePool(self, poolName, newName):
self.pools[newName] = self.pools[poolName]
self.pools[newName].name = newName
self.removePool(poolName)
self.dirty = True
def mergePools(self, destPool, sourcePool):
"""Merge an existing pool into another.
The data from sourcePool is merged into destPool.
The arguments are the names of the pools to be merged.
The pool named sourcePool is left in tact and you may
want to call removePool() to get rid of it.
"""
sp = self.pools[sourcePool]
dp = self.pools[destPool]
for tok, count in sp.items():
if dp.get(tok):
dp[tok] += count
else:
dp[tok] = count
dp.tokenCount += 1
self.dirty = True
def poolData(self, poolName):
"""Return a list of the (token, count) tuples.
"""
return self.pools[poolName].items()
def poolTokens(self, poolName):
"""Return a list of the tokens in this pool.
"""
return [tok for tok, count in self.poolData(poolName)]
def save(self, fname='bayesdata.dat'):
from cPickle import dump
fp = open(fname, 'wb')
dump(self.pools, fp)
fp.close()
def load(self, fname='bayesdata.dat'):
from cPickle import load
fp = open(fname, 'rb')
self.pools = load(fp)
fp.close()
self.corpus = self.pools['__Corpus__']
self.dirty = True
def poolNames(self):
"""Return a sorted list of Pool names.
Does not include the system pool '__Corpus__'.
"""
pools = self.pools.keys()
pools.remove('__Corpus__')
pools = [pool for pool in pools]
pools.sort()
return pools
def buildCache(self):
""" merges corpora and computes probabilities
"""
self.cache = {}
for pname, pool in self.pools.items():
# skip our special pool
if pname == '__Corpus__':
continue
poolCount = pool.tokenCount
themCount = max(self.corpus.tokenCount - poolCount, 1)
cacheDict = self.cache.setdefault(pname, self.dataClass(pname))
for word, totCount in self.corpus.items():
# for every word in the copus
# check to see if this pool contains this word
thisCount = float(pool.get(word, 0.0))
if (thisCount == 0.0):
continue
otherCount = float(totCount) - thisCount
if not poolCount:
goodMetric = 1.0
else:
goodMetric = min(1.0, otherCount/poolCount)
badMetric = min(1.0, thisCount/themCount)
f = badMetric / (goodMetric + badMetric)
# PROBABILITY_THRESHOLD
if abs(f-0.5) >= 0.1 :
# GOOD_PROB, BAD_PROB
cacheDict[word] = max(0.0001, min(0.9999, f))
def poolProbs(self):
if self.dirty:
self.buildCache()
self.dirty = False
return self.cache
def getTokens(self, obj):
"""By default, we expect obj to be a screen and split
it on whitespace.
Note that this does not change the case.
In some applications you may want to lowecase everthing
so that "king" and "King" generate the same token.
Override this in your subclass for objects other
than text.
Alternatively, you can pass in a tokenizer as part of
instance creation.
"""
return self._tokenizer.tokenize(obj)
def getProbs(self, pool, words):
""" extracts the probabilities of tokens in a message
"""
probs = [(word, pool[word]) for word in words if word in pool]
probs.sort(lambda x,y: cmp(y[1],x[1]))
return probs[:2048]
def train(self, pool, item, uid=None):
"""Train Bayes by telling him that item belongs
in pool. uid is optional and may be used to uniquely
identify the item that is being trained on.
"""
tokens = self.getTokens(item)
pool = self.pools.setdefault(pool, self.dataClass(pool))
self._train(pool, tokens)
self.corpus.trainCount += 1
pool.trainCount += 1
if uid:
pool.training.append(uid)
self.dirty = True
def untrain(self, pool, item, uid=None):
tokens = self.getTokens(item)
pool = self.pools.get(pool, None)
if not pool:
return
self._untrain(pool, tokens)
# I guess we want to count this as additional training?
self.corpus.trainCount += 1
pool.trainCount += 1
if uid:
pool.training.remove(uid)
self.dirty = True
def _train(self, pool, tokens):
wc = 0
for token in tokens:
count = pool.get(token, 0)
pool[token] = count + 1
count = self.corpus.get(token, 0)
self.corpus[token] = count + 1
wc += 1
pool.tokenCount += wc
self.corpus.tokenCount += wc
def _untrain(self, pool, tokens):
for token in tokens:
count = pool.get(token, 0)
if count:
if count == 1:
del(pool[token])
else:
pool[token] = count - 1
pool.tokenCount -= 1
count = self.corpus.get(token, 0)
if count:
if count == 1:
del(self.corpus[token])
else:
self.corpus[token] = count - 1
self.corpus.tokenCount -= 1
def trainedOn(self, msg):
for p in self.cache.values():
if msg in p.training:
return True
return False
def guess(self, msg):
tokens = Set(self.getTokens(msg))
pools = self.poolProbs()
res = {}
for pname, pprobs in pools.items():
p = self.getProbs(pprobs, tokens)
if len(p) != 0:
res[pname]=self.combiner(p, pname)
res = res.items()
res.sort(lambda x,y: cmp(y[1], x[1]))
return res
def robinson(self, probs, ignore):
""" computes the probability of a message being spam (Robinson's method)
P = 1 - prod(1-p)^(1/n)
Q = 1 - prod(p)^(1/n)
S = (1 + (P-Q)/(P+Q)) / 2
Courtesy of http://christophe.delord.free.fr/en/index.html
"""
nth = 1./len(probs)
P = 1.0 - reduce(operator.mul, map(lambda p: 1.0-p[1], probs), 1.0) ** nth
Q = 1.0 - reduce(operator.mul, map(lambda p: p[1], probs)) ** nth
S = (P - Q) / (P + Q)
return (1 + S) / 2
def robinsonFisher(self, probs, ignore):
""" computes the probability of a message being spam (Robinson-Fisher method)
H = C-1( -2.ln(prod(p)), 2*n )
S = C-1( -2.ln(prod(1-p)), 2*n )
I = (1 + H - S) / 2
Courtesy of http://christophe.delord.free.fr/en/index.html
"""
n = len(probs)
try: H = chi2P(-2.0 * math.log(reduce(operator.mul, map(lambda p: p[1], probs), 1.0)), 2*n)
except OverflowError: H = 0.0
try: S = chi2P(-2.0 * math.log(reduce(operator.mul, map(lambda p: 1.0-p[1], probs), 1.0)), 2*n)
except OverflowError: S = 0.0
return (1 + H - S) / 2
def __repr__(self):
return '<Bayes: %s>' % [self.pools[p] for p in self.poolNames()]
def __len__(self):
return len(self.corpus)
class Tokenizer:
"""A simple regex-based whitespace tokenizer.
It expects a string and can return all tokens lower-cased
or in their existing case.
"""
WORD_RE = re.compile('\\w+', re.U)
def __init__(self, lower=False):
self.lower = lower
def tokenize(self, obj):
for match in self.WORD_RE.finditer(obj):
if self.lower:
yield match.group().lower()
else:
yield match.group()
def chi2P(chi, df):
""" return P(chisq >= chi, with df degree of freedom)
df must be even
"""
assert df & 1 == 0
m = chi / 2.0
sum = term = math.exp(-m)
for i in range(1, df/2):
term *= m/i
sum += term
return min(sum, 1.0)

View file

@ -1,17 +0,0 @@
K 25
svn:wc:ra_dav:version-url
V 59
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/guessers
END
__init__.py
K 25
svn:wc:ra_dav:version-url
V 70
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/guessers/__init__.py
END
email.py
K 25
svn:wc:ra_dav:version-url
V 68
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/guessers/email.py
END

View file

@ -1,96 +0,0 @@
10
dir
17937
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/guessers
http://divmod.org/svn/Divmod
2009-07-03T21:31:34.117160Z
17655
exarkun
866e43f7-fbfc-0310-8f2a-ec88d1da2979
__init__.py
file
2010-04-09T21:05:35.000000Z
d41d8cd98f00b204e9800998ecf8427e
2005-10-25T19:49:27.727286Z
2573
washort
0
email.py
file
2010-04-09T21:05:35.000000Z
852c557941154a2f0bed11640429d8bd
2009-07-03T21:31:34.117160Z
17655
exarkun
has-props
3249

View file

@ -1,5 +0,0 @@
K 14
svn:executable
V 1
*
END

View file

@ -1,104 +0,0 @@
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
# amir@divmod.org. This is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
from rfc822 import AddressList
from reverend.thomas import Bayes
class EmailClassifier(Bayes):
def getTokens(self, msg):
# Overide from parent
# This should return a list of strings
# which will be used as the key into
# the table of token counts
tokens = self.getHeaderTokens(msg)
tokens += self.getBodyTokens(msg)
# Get some tokens that are generated from the
# header and the structure
tokens += self.getMetaTokens(msg)
return tokens
def getBodyTokens(self, msg):
text = self.getTextPlain(msg)
if text is None:
text = ''
tl = list(self._tokenizer.tokenize(text))
return tl
def getHeaderTokens(self, msg):
subj = msg.get('subject','nosubject')
text = subj + ' '
text += msg.get('from','fromnoone') + ' '
text += msg.get('to','tonoone') + ' '
text += msg.get('cc','ccnoone') + ' '
tl = list(self._tokenizer.tokenize(text))
return tl
def getTextPlain(self, msg):
for part in msg.walk():
typ = part.get_content_type()
if typ and typ.lower() == "text/plain":
text = part.get_payload(decode=True)
return text
return None
def getTextHtml(self, msg):
for part in msg.walk():
typ = part.get_content_type()
if typ and typ.lower() == "text/html":
text = part.get_payload(decode=False)
return text
return None
def getMetaTokens(self, msg):
r = []
for f in ['Content-type', 'X-Priority', 'X-Mailer',
'content-transfer-encoding', 'X-MSMail-Priority']:
r.append(f +':' + msg.get(f, 'None'))
text = self.getTextPlain(msg)
html = self.getTextHtml(msg)
for stem, part in zip(['text','html'],[text,html]):
if part is None:
r.append(stem + '_None')
continue
else:
r.append(stem + '_True')
l = len(part.split())
if l is 0:
a = 'zero'
r.append(stem + a)
if l > 10000:
a = 'more_than_10000'
r.append(stem + a)
if l > 1000:
a = 'more_than_1000'
r.append(stem + a)
if l > 100:
a = 'more_than_100'
r.append(stem + a)
t = msg.get('to','')
at = AddressList(t).addresslist
c = msg.get('cc','')
ac = AddressList(c).addresslist
if at > 5:
r.append('to_more_than_5')
if at > 10:
r.append('to_more_than_10')
if ac > 5:
r.append('cc_more_than_5')
if ac > 10:
r.append('cc_more_than_10')
return r

View file

@ -1,17 +0,0 @@
K 25
svn:wc:ra_dav:version-url
V 55
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test
END
__init__.py
K 25
svn:wc:ra_dav:version-url
V 67
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test/__init__.py
END
test_email.py
K 25
svn:wc:ra_dav:version-url
V 69
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test/test_email.py
END

View file

@ -1,96 +0,0 @@
10
dir
17937
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/test
http://divmod.org/svn/Divmod
2009-07-03T21:31:34.117160Z
17655
exarkun
866e43f7-fbfc-0310-8f2a-ec88d1da2979
__init__.py
file
2010-04-09T21:05:35.000000Z
d41d8cd98f00b204e9800998ecf8427e
2009-07-03T21:31:34.117160Z
17655
exarkun
0
test_email.py
file
2010-04-09T21:05:35.000000Z
458da3b3036588912b1a673fb190021f
2009-07-03T21:31:34.117160Z
17655
exarkun
878

View file

@ -1,38 +0,0 @@
"""
Tests for L{reverend.guessers.email}.
"""
import email
from unittest import TestCase
from reverend.guessers.email import EmailClassifier
class EmailClassifierTests(TestCase):
"""
Tests for L{EmailClassifier}
"""
def setUp(self):
"""
Create a L{Message} and an L{EmailClassifier}.
"""
self.classifier = EmailClassifier()
self.message = email.Message.Message()
def test_training(self):
"""
L{EmailClassifier.train} accepts a pool name and a L{Message}
instance and trains the classifier to put similar messages into that
pool.
"""
self.classifier.train("test", self.message)
def test_guessing(self):
"""
L{EmailClassifier.guess} accepts a L{Message} and returns a pool
name.
"""
self.classifier.guess(self.message)

View file

@ -1,29 +0,0 @@
K 25
svn:wc:ra_dav:version-url
V 52
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui
END
util.py
K 25
svn:wc:ra_dav:version-url
V 60
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/util.py
END
__init__.py
K 25
svn:wc:ra_dav:version-url
V 64
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/__init__.py
END
trainer.py
K 25
svn:wc:ra_dav:version-url
V 63
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/trainer.py
END
tester.py
K 25
svn:wc:ra_dav:version-url
V 62
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/tester.py
END

View file

@ -1,164 +0,0 @@
10
dir
17937
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/ui
http://divmod.org/svn/Divmod
2005-10-25T19:49:27.727286Z
2573
washort
866e43f7-fbfc-0310-8f2a-ec88d1da2979
util.py
file
2010-04-09T21:05:36.000000Z
6fb32ec747139aae00a39b92c40cfdb1
2005-10-25T19:49:27.727286Z
2573
washort
3097
__init__.py
file
2010-04-09T21:05:36.000000Z
d41d8cd98f00b204e9800998ecf8427e
2005-10-25T19:49:27.727286Z
2573
washort
0
trainer.py
file
2010-04-09T21:05:36.000000Z
231ad4977253c217db8bd9131cb547ca
2005-10-25T19:49:27.727286Z
2573
washort
has-props
12967
tester.py
file
2010-04-09T21:05:36.000000Z
f14706ea2409bae821f910766c4790b9
2005-10-25T19:49:27.727286Z
2573
washort
5382

View file

@ -1,5 +0,0 @@
K 14
svn:executable
V 1
*
END

View file

@ -1,152 +0,0 @@
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
# amir@divmod.org. This is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
from __future__ import generators
from Tkinter import *
import tkFileDialog
import tkSimpleDialog
import tkMessageBox
import os
import time
class TestView(Frame):
def __init__(self, parent=None, guesser=None, app=None):
Frame.__init__(self, parent)
self.pack()
self.guesser = guesser
self.app = app
self.size = 300
self.setupViews()
def setupViews(self):
line = Frame(self, relief=RAISED, borderwidth=1)
line.pack(side=TOP, padx=2, pady=1)
colHeadings = [('Guesses', 8), ('Right', 8), ('Wrong', 8), ('Accuracy %', 10)]
currCol = 0
for cHdr, width in colHeadings:
l = Label(line, text=cHdr, width=width, bg='lightblue')
l.grid(row=0, column=currCol)
currCol += 1
line = Frame(self)
line.pack(fill=X)
iGuess = IntVar()
iRight = IntVar()
iWrong = IntVar()
iAcc = IntVar()
self.model = (iGuess, iRight, iWrong, iAcc)
l = Label(line, textvariable=iGuess, anchor=E, width=8, relief=SUNKEN)
l.grid(row=0, column=0)
l = Label(line, textvariable=iRight, anchor=E, width=8, relief=SUNKEN)
l.grid(row=0, column=1)
l = Label(line, textvariable=iWrong, anchor=E, width=8, relief=SUNKEN)
l.grid(row=0, column=2)
l = Label(line, textvariable=iAcc, anchor=E, width=8, relief=SUNKEN)
l.grid(row=0, column=3)
bp = Button(self, text="Run Test", command=self.runTest)
bp.pack(side=BOTTOM)
canvas = Canvas(self, width=self.size, height=self.size, bg='lightyellow')
canvas.pack(expand=YES, fill=BOTH, side=BOTTOM)
self.canvas = canvas
## slid = Scale(self, label='Wrong', variable=iWrong, to=400, orient=HORIZONTAL, bg='red')
## slid.pack(side=BOTTOM)
## slid = Scale(self, label='Right', variable=iRight, to=400, orient=HORIZONTAL, bg='green')
## slid.pack(side=BOTTOM)
def runTest(self):
# TODO - This is nasty re-write
if len(self.guesser) == 0:
tkMessageBox.showwarning('Underprepared for examination!',
'Your guesser has had no training. Please train and retry.')
return
path = tkFileDialog.askdirectory()
if not path:
return
answer = tkSimpleDialog.askstring('Which Pool do these items belong to?', 'Pool name?',
parent=self.app)
if not answer:
return
if answer not in self.guesser.pools:
return
de = DirectoryExam(path, answer, self.app.itemClass)
testCount = len(de)
scale = self.calcScale(testCount)
x = 0
y = 0
cumTime = 0
iGuess, iRight, iWrong, iAcc = self.model
for m, ans in de:
then = time.time()
g = self.guesser.guess(m)
cumTime += time.time() - then
if g:
g = g[0][0]
iGuess.set(iGuess.get()+1)
if g == ans:
col = 'green'
iRight.set(iRight.get()+1)
else:
col = 'red'
iWrong.set(iWrong.get()+1)
iAcc.set(round(100 * iRight.get()/float(iGuess.get()), 3))
# Plot squares
self.canvas.create_rectangle(x*scale,y*scale,(x+1)*scale,(y+1)*scale,fill=col)
if not divmod(iGuess.get(),(int(self.size/scale)))[1]:
# wrap
x = 0
y += 1
else:
x += 1
self.update_idletasks()
guesses = iGuess.get()
self.app.status.log('%r guesses in %.2f seconds. Avg: %.2f/sec.' % (guesses, cumTime,
round(guesses/cumTime, 2)))
def calcScale(self, testCount):
import math
scale = int(self.size/(math.sqrt(testCount)+1))
return scale
class DirectoryExam(object):
"""Creates a iterator that returns a pair at a time.
(Item, correctAnswer). This Exam creates items from
a directory and uses the same answer for each.
"""
def __init__(self, path, answer, itemClass):
self.path = path
self.answer = answer
self.itemClass = itemClass
def __iter__(self):
files = os.listdir(self.path)
for file in files:
fp = open(os.path.join(self.path, file), 'rb')
try:
item = self.itemClass.fromFile(fp)
finally:
fp.close()
if item is None:
continue
yield (item, self.answer)
def __len__(self):
files = os.listdir(self.path)
return len(files)

View file

@ -1,403 +0,0 @@
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
# amir@divmod.org. This is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
from Tkinter import *
import tkFileDialog
import tkSimpleDialog
import tkMessageBox
import os
from util import Command, StatusBar, Notebook
from tester import TestView
class PoolView(Frame):
def __init__(self, master=None, guesser=None, app=None):
Frame.__init__(self, master, bg='lightblue3')
self.pack()
self.listView = Frame(self)
self.listView.pack()
bp = Button(self, text="New Pool", command=self.newPool)
bp.pack(side=LEFT, anchor=SE)
self.addLoadSave()
self.columnHeadings()
self.model = {}
self.guesser = guesser
self.app = app
self.reload()
def reload(self):
self.listView.destroy()
self.listView = Frame(self)
self.listView.pack()
for pool in self.guesser.poolNames():
self.addPool(self.guesser.pools[pool])
self.addPool(self.guesser.corpus, 'Total')
def upload(self):
pass
def addLoadSave(self):
frame = Frame(self)
frame.pack(side=RIGHT)
bp = Button(frame, text="Upload", command=self.upload, state=DISABLED)
bp.pack(side=BOTTOM, fill=X)
bp = Button(frame, text="Save", command=self.save)
bp.pack(side=BOTTOM, fill=X)
bp = Button(frame, text="Load", command=self.load)
bp.pack(side=BOTTOM, fill=X)
def addPool(self, pool, name=None):
col=None
tTok = IntVar()
train = IntVar()
line = Frame(self.listView)
line.pack()
if name is None:
name = pool.name
idx = self.guesser.poolNames().index(name)
col = self.defaultColours()[idx]
l = Label(line, text=name, anchor=W, width=10)
l.grid(row=0, column=0)
colourStripe = Label(line, text=' ', width=1, bg=col, anchor=W, relief=GROOVE)
colourStripe.grid(row=0, column=1)
train = IntVar()
train.set(pool.trainCount)
l = Label(line, textvariable=train, anchor=E, width=10, relief=SUNKEN)
l.grid(row=0, column=2)
uTok = IntVar()
uTok.set(len(pool))
l = Label(line, textvariable=uTok, anchor=E, width=12, relief=SUNKEN)
l.grid(row=0, column=3)
tTok = IntVar()
tTok.set(pool.tokenCount)
l = Label(line, textvariable=tTok, anchor=E, width=10, relief=SUNKEN)
l.grid(row=0, column=4)
self.model[name]=(pool, uTok, tTok, train)
def refresh(self):
for pool, ut, tt, train in self.model.values():
ut.set(len(pool))
tt.set(pool.tokenCount)
train.set(pool.trainCount)
def save(self):
path = tkFileDialog.asksaveasfilename()
if not path:
return
self.guesser.save(path)
self.app.dirty = False
def load(self):
path = tkFileDialog.askopenfilename()
if not path:
return
self.guesser.load(path)
self.reload()
self.app.dirty = False
def newPool(self):
p = tkSimpleDialog.askstring('Create Pool', 'Name for new pool?')
if not p:
return
if p in self.guesser.pools:
tkMessageBox.showwarning('Bad pool name!', 'Pool %s already exists.' % p)
self.guesser.newPool(p)
self.reload()
self.app.poolAdded()
self.app.status.log('New pool created: %s.' % p, clear=3)
def defaultColours(self):
return ['green', 'yellow', 'lightblue', 'red', 'blue', 'orange', 'purple', 'pink']
def columnHeadings(self):
# FIXME factor out and generalize
title = Label(self, text='Pools', relief=RAISED, borderwidth=1)
title.pack(side=TOP, fill=X)
msgLine = Frame(self, relief=RAISED, borderwidth=1)
msgLine.pack(side=TOP)
currCol = 0
colHeadings = [('Name', 10), ('', 1), ('Trained', 10), ('Unique Tokens', 12), ('Tokens', 10)]
for cHdr, width in colHeadings:
l = Label(msgLine, text=cHdr, width=width, bg='lightblue')
l.grid(row=0, column=currCol)
currCol += 1
class Trainer(Frame):
def __init__(self, parent, guesser=None, itemClass=None):
self.status = StatusBar(parent)
self.status.pack(side=BOTTOM, fill=X)
Frame.__init__(self, parent)
self.pack(side=TOP, fill=BOTH)
self.itemsPerPage = 20
self.rows = []
for i in range(self.itemsPerPage):
self.rows.append(ItemRow())
self.items = []
self.files = []
self.cursor = 0
self.dirty = False
if guesser is None:
from reverend.thomas import Bayes
self.guesser = Bayes()
else:
self.guesser = guesser
if itemClass is None:
self.itemClass = TextItem
else:
self.itemClass = itemClass
for row in self.rows:
row.summary.set('foo')
self.initViews()
def initViews(self):
self.nb = Notebook(self)
## frame1 = Frame(self.nb())
## self.poolView = PoolView(frame1, guesser=self.guesser, app=self)
## self.poolView.pack(side=TOP)
frame2 = Frame(self.nb())
self.poolView = PoolView(frame2, guesser=self.guesser, app=self)
self.poolView.pack(side=TOP)
self.listView = Canvas(frame2, relief=GROOVE)
self.listView.pack(padx=3)
bn = Button(self.listView, text="Load training", command=self.loadCorpus)
bn.pack(side=RIGHT, anchor=NE, fill=X)
self.columnHeadings()
self.addNextPrev()
frame3 = Frame(self.nb())
self.testView = TestView(frame3, guesser=self.guesser, app=self)
self.testView.pack()
frame4 = Frame(self.nb())
bp = Button(frame4, text="Quit", command=self.quitNow)
bp.pack(side=BOTTOM)
#self.nb.add_screen(frame1, 'Reverend')
self.nb.add_screen(frame2, 'Training')
self.nb.add_screen(frame3, 'Testing')
self.nb.add_screen(frame4, 'Quit')
def addNextPrev(self):
npFrame = Frame(self.listView)
npFrame.pack(side=BOTTOM, fill=X)
bn = Button(npFrame, text="Prev Page", command=self.prevPage)
bn.grid(row=0, column=0)
bn = Button(npFrame, text="Next Page", command=self.nextPage)
bn.grid(row=0, column=1)
def loadCorpus(self):
path = tkFileDialog.askdirectory()
if not path:
return
self.loadFileList(path)
self.displayItems()
self.displayRows()
def bulkTest(self):
dirs = []
for pool in self.guesser.poolNames():
path = tkFileDialog.askdirectory()
dirs.append((pool, path))
for pool, path in dirs:
print pool, path
def displayList(self):
for item in self.items:
self.itemRow(item)
def displayRows(self):
for row in self.rows:
self.displayRow(row)
def loadFileList(self, path):
listing = os.listdir(path)
self.files = [os.path.join(path, file) for file in listing]
self.cursor = 0
def prevPage(self):
self.cursor = max(0, self.cursor - self.itemsPerPage)
self.displayItems()
def nextPage(self):
self.cursor = min(len(self.files), self.cursor + self.itemsPerPage)
self.displayItems()
def displayItems(self):
theseFiles = self.files[self.cursor:self.cursor + self.itemsPerPage]
items = []
for file, row in zip(theseFiles, self.rows):
fp = open(file, 'rb')
try:
item = self.itemClass.fromFile(fp)
finally:
fp.close()
if item is None:
continue
items.append(item)
guesses = self.guesser.guess(item)
summary = item.summary()
cols = item.columnDefs()
s = ''
for c, ignore in cols:
s += summary[c] + ' '
row.initialize(item, s, guesses, self.guesser.poolNames())
self.items = items
def quitNow(self):
if self.dirty:
if tkMessageBox.askyesno("You have unsaved changes!", "Quit without saving?"):
self.quit()
self.quit()
def columnHeadings(self):
# FIXME - Something better for columns and rows in general
line = Frame(self.listView, relief=RAISED, borderwidth=1)
line.pack(side=TOP, padx=2, pady=1)
colHeadings = self.itemClass.columnDefs()
currCol = 0
for cHdr, width in colHeadings:
l = Label(line, text=cHdr, width=width, bg='lightblue')
l.grid(row=0, column=currCol)
currCol += 1
line = Frame(self)
line.pack(fill=X)
def training(self, row):
sel = row.selection.get()
self.guesser.train(sel, row.original)
row.current = sel
self.guessAll()
def guessAll(self):
self.poolView.refresh()
pools = self.guesser.poolNames()
for row in self.rows:
row.setGuess(self.guesser.guess(row.original), pools)
def displayRow(self, row, bgc=None):
# UGH - REWRITE!
line = Frame(self.listView, bg=bgc)
line.pack(pady=1)
row.line = line
self.insertRadios(row)
Label(line, text=row.summary.get(), textvariable=row.summary, width=60, bg=bgc,
anchor=W).grid(row=0, column=2)
#Label(line, text=row.guess, width=7, bg=bgc, anchor=W).grid(row=0, column=1)
colourStripe = Label(line, text=' ', width=1, bg=bgc, anchor=W, relief=GROOVE)
colourStripe.grid(row=0, column=1)
line.colourStripe = colourStripe
pools = self.guesser.poolNames()
row.refreshColour(pools)
def poolAdded(self):
if not self.items:
return
pools = self.guesser.poolNames()
for row in self.rows:
for r in row.radios:
r.destroy()
self.insertRadios(row)
row.refreshColour(pools)
self.dirty = True
def insertRadios(self, row):
radioFrame = Frame(row.line)
radioFrame.grid(row=0, column=0)
currCol = 0
radios = []
v = row.selection
ci = 0
colours = row.defaultColours()
pools = self.guesser.poolNames()
for pool in pools:
rb = Radiobutton(radioFrame, text=pool, variable=v, value=pool, command=Command(self.training, row), bg=None)
rb.grid(row=0, column=currCol)
radios.append(rb)
currCol += 1
ci += 1
row.radios = radios
class TextItem(object):
def __init__(self, text):
self.text = text
def summary(self):
return {'Text': self.text}
def columnNames(self):
return ['Text']
def lower(self):
return self.text.lower()
def fromFile(self, fp):
"""Return the first line of the file.
"""
ti = self(fp.readline())
return ti
fromFile = classmethod(fromFile)
class ItemRow(object):
def __init__(self, orig=None):
self.line = None
self.radios = []
self.original = orig
self.current = ''
self.guess = []
self.summary = StringVar()
self.selection = StringVar()
def initialize(self, item=None, summary='', guess=None, pools=[]):
self.selection.set('')
self.original = item
self.summary.set(summary)
self.setGuess(guess, pools)
def setGuess(self, guess, pools):
if not guess:
guess = [['']]
self.guess = guess
self.selection.set(self.bestGuess())
self.current = self.bestGuess()
self.refreshColour(pools)
def refreshColour(self, pools):
col = None
if self.guess[0][0] in pools:
idx = pools.index(self.guess[0][0])
col = self.defaultColours()[idx]
if self.line:
self.line.colourStripe.config(bg=col)
def __repr__(self):
return self.original.__repr__()
def defaultColours(self):
return ['green', 'yellow', 'lightblue', 'red', 'blue', 'orange', 'purple', 'pink']
def bestGuess(self):
if self.guess:
return self.guess[0][0]
else:
return None
if __name__ == "__main__":
root = Tk()
root.title('Reverend Trainer')
root.minsize(width=300, height=300)
#root.maxsize(width=600, height=600)
display = Trainer(root)
root.mainloop()

View file

@ -1,98 +0,0 @@
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
# amir@divmod.org. This is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
#
from Tkinter import *
class StatusBar(Frame):
"""Courtesy of Fredrik Lundh.
"""
def __init__(self, master):
Frame.__init__(self, master)
self.label = Label(self, bd=1, relief=SUNKEN, anchor=W)
self.label.pack(fill=X)
def set(self, format, *args):
self.label.config(text=format % args)
self.label.update_idletasks()
def clear(self):
self.label.config(text="")
self.label.update_idletasks()
def log(self, text, clear=0):
# Clear after clear seconds
self.set('%s', text)
if clear:
self.label.after(clear * 1000, self.clear)
class Command:
"""Courtesy of Danny Yoo
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66521
"""
def __init__(self, callback, *args, **kwargs):
self.callback = callback
self.args = args
self.kwargs = kwargs
def __call__(self):
return apply(self.callback, self.args, self.kwargs)
class Notebook:
"""Courtesy of Iuri Wickert
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/188537
"""
# initialization. receives the master widget
# reference and the notebook orientation
def __init__(self, master, side=LEFT):
self.active_fr = None
self.count = 0
self.choice = IntVar(0)
# allows the TOP and BOTTOM
# radiobuttons' positioning.
if side in (TOP, BOTTOM):
self.side = LEFT
else:
self.side = TOP
# creates notebook's frames structure
self.rb_fr = Frame(master, borderwidth=2, relief=RIDGE)
self.rb_fr.pack(side=side, fill=BOTH)
self.screen_fr = Frame(master, borderwidth=2, relief=RIDGE)
self.screen_fr.pack(fill=BOTH)
# return a master frame reference for the external frames (screens)
def __call__(self):
return self.screen_fr
# add a new frame (screen) to the (bottom/left of the) notebook
def add_screen(self, fr, title):
b = Radiobutton(self.rb_fr, text=title, indicatoron=0, \
variable=self.choice, value=self.count, \
command=lambda: self.display(fr))
b.pack(fill=BOTH, side=self.side)
# ensures the first frame will be
# the first selected/enabled
if not self.active_fr:
fr.pack(fill=BOTH, expand=1)
self.active_fr = fr
self.count += 1
# hides the former active frame and shows
# another one, keeping its reference
def display(self, fr):
self.active_fr.forget()
fr.pack(fill=BOTH, expand=1)
self.active_fr = fr

Some files were not shown because too many files have changed in this diff Show more