mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Utils -> vendor
This commit is contained in:
parent
795573ccc2
commit
8e2936fc1b
145 changed files with 34 additions and 1699 deletions
|
@ -7,7 +7,7 @@ from django.core import management
|
|||
from pprint import pprint
|
||||
# from apps.analyzer.classifier import FisherClassifier
|
||||
from apps.analyzer.tokenizer import Tokenizer
|
||||
from utils.reverend.thomas import Bayes
|
||||
from vendor.reverend.thomas import Bayes
|
||||
from apps.analyzer.phrase_filter import PhraseFilter
|
||||
|
||||
class ClassifierTest(TestCase):
|
||||
|
|
|
@ -7,7 +7,7 @@ import datetime
|
|||
from StringIO import StringIO
|
||||
from lxml import etree
|
||||
from utils import json_functions as json, urlnorm
|
||||
import utils.opml as opml
|
||||
import vendor.opml as opml
|
||||
from utils import log as logging
|
||||
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
|
||||
# import minidom
|
||||
|
|
|
@ -11,7 +11,7 @@ from paypal.standard.ipn.signals import subscription_signup
|
|||
from apps.rss_feeds.tasks import NewFeeds
|
||||
from celery.task import Task
|
||||
from utils import log as logging
|
||||
from utils.timezones.fields import TimeZoneField
|
||||
from vendor.timezones.fields import TimeZoneField
|
||||
from utils.user_functions import generate_secret_token
|
||||
|
||||
class Profile(models.Model):
|
||||
|
|
|
@ -36,7 +36,7 @@ from utils.story_functions import format_story_link_date__long
|
|||
from utils.story_functions import bunch
|
||||
from utils.story_functions import story_score
|
||||
from utils import log as logging
|
||||
from utils.timezones.utilities import localtime_for_timezone
|
||||
from vendor.timezones.utilities import localtime_for_timezone
|
||||
|
||||
SINGLE_DAY = 60*60*24
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import datetime
|
|||
class Command(BaseCommand):
|
||||
option_list = BaseCommand.option_list + (
|
||||
make_option("-f", "--feed", default=None),
|
||||
make_option("-a", "--all", default=False, action='store_true'),
|
||||
make_option('-V', '--verbose', action='store_true',
|
||||
dest='verbose', default=False, help='Verbose output.'),
|
||||
)
|
||||
|
@ -23,6 +24,8 @@ class Command(BaseCommand):
|
|||
).exclude(
|
||||
active_subscribers=0
|
||||
).order_by('?')
|
||||
if options['all']:
|
||||
feeds = Feed.objects.all()
|
||||
Feed.task_feeds(feeds)
|
||||
|
||||
# Mistakenly inactive feeds
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from celery.task import Task
|
||||
# from utils import log as logging
|
||||
from utils import log as logging
|
||||
|
||||
class UpdateFeeds(Task):
|
||||
name = 'update-feeds'
|
||||
|
@ -12,9 +12,12 @@ class UpdateFeeds(Task):
|
|||
feed_pks = [feed_pks]
|
||||
|
||||
for feed_pk in feed_pks:
|
||||
feed = Feed.objects.get(pk=feed_pk)
|
||||
try:
|
||||
feed = Feed.objects.get(pk=feed_pk)
|
||||
feed.update()
|
||||
except Feed.DoesNotExist:
|
||||
logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
|
||||
# logging.debug(' Updating: [%s] %s' % (feed_pks, feed))
|
||||
feed.update()
|
||||
|
||||
class NewFeeds(Task):
|
||||
name = 'new-feeds'
|
||||
|
|
|
@ -3038,7 +3038,8 @@
|
|||
(story.long_parsed_date &&
|
||||
$.make('span', { className: 'NB-feed-story-date' }, story.long_parsed_date)),
|
||||
(story.starred_date &&
|
||||
$.make('span', { className: 'NB-feed-story-starred-date' }, story.starred_date))
|
||||
$.make('span', { className: 'NB-feed-story-starred-date' }, story.starred_date)),
|
||||
(!this.model.get_preference('hide_story_changes') && $.make('div', { className: 'NB-feed-story-hide-changes', title: 'Hide story modifications' }))
|
||||
])
|
||||
]),
|
||||
$.make('div', { className: 'NB-feed-story-content' }, story.story_content)
|
||||
|
|
11
settings.py
11
settings.py
|
@ -12,6 +12,7 @@ NEWSBLUR_DIR = CURRENT_DIR
|
|||
TEMPLATE_DIRS = (''.join([CURRENT_DIR, '/templates']),)
|
||||
MEDIA_ROOT = ''.join([CURRENT_DIR, '/media'])
|
||||
UTILS_ROOT = ''.join([CURRENT_DIR, '/utils'])
|
||||
VENDOR_ROOT = ''.join([CURRENT_DIR, '/vendor'])
|
||||
LOG_FILE = ''.join([CURRENT_DIR, '/logs/newsblur.log'])
|
||||
IMAGE_MASK = ''.join([CURRENT_DIR, '/media/img/mask.png'])
|
||||
|
||||
|
@ -19,9 +20,10 @@ IMAGE_MASK = ''.join([CURRENT_DIR, '/media/img/mask.png'])
|
|||
# = PYTHONPATH =
|
||||
# ==============
|
||||
|
||||
UTILS_DIR = ''.join([CURRENT_DIR, '/utils'])
|
||||
if '/utils' not in ' '.join(sys.path):
|
||||
sys.path.append(UTILS_DIR)
|
||||
sys.path.append(UTILS_ROOT)
|
||||
if '/vendor' not in ' '.join(sys.path):
|
||||
sys.path.append(VENDOR_ROOT)
|
||||
|
||||
# ===================
|
||||
# = Global Settings =
|
||||
|
@ -252,8 +254,9 @@ INSTALLED_APPS = (
|
|||
'apps.recommendations',
|
||||
'south',
|
||||
'utils',
|
||||
'utils.typogrify',
|
||||
'utils.paypal.standard.ipn',
|
||||
'vendor',
|
||||
'vendor.typogrify',
|
||||
'vendor.paypal.standard.ipn',
|
||||
)
|
||||
|
||||
if not DEVELOPMENT:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
from django.conf import settings
|
||||
from utils.colorama import Fore, Back, Style
|
||||
from vendor.colorama import Fore, Back, Style
|
||||
import re
|
||||
|
||||
def getlogger():
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 50
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend
|
||||
END
|
||||
thomas.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 59
|
||||
/svn/Divmod/!svn/ver/6111/trunk/Reverend/reverend/thomas.py
|
||||
END
|
||||
__init__.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 61
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/__init__.py
|
||||
END
|
|
@ -1,105 +0,0 @@
|
|||
10
|
||||
|
||||
dir
|
||||
17937
|
||||
http://divmod.org/svn/Divmod/trunk/Reverend/reverend
|
||||
http://divmod.org/svn/Divmod
|
||||
|
||||
|
||||
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
866e43f7-fbfc-0310-8f2a-ec88d1da2979
|
||||
|
||||
test
|
||||
dir
|
||||
|
||||
guessers
|
||||
dir
|
||||
|
||||
thomas.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
f938743a245eb3f0bb190092b37bbc5f
|
||||
2006-04-14T18:23:46.881754Z
|
||||
6111
|
||||
mithrandi
|
||||
has-props
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
10468
|
||||
|
||||
__init__.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
d41d8cd98f00b204e9800998ecf8427e
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
0
|
||||
|
||||
ui
|
||||
dir
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
K 14
|
||||
svn:executable
|
||||
V 1
|
||||
*
|
||||
END
|
|
@ -1,324 +0,0 @@
|
|||
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
|
||||
# amir@divmod.org. This is free software; you can redistribute it and/or
|
||||
# modify it under the terms of version 2.1 of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
import operator
|
||||
import re
|
||||
import math
|
||||
from sets import Set
|
||||
|
||||
class BayesData(dict):
|
||||
|
||||
def __init__(self, name='', pool=None):
|
||||
self.name = name
|
||||
self.training = []
|
||||
self.pool = pool
|
||||
self.tokenCount = 0
|
||||
self.trainCount = 0
|
||||
|
||||
def trainedOn(self, item):
|
||||
return item in self.training
|
||||
|
||||
def __repr__(self):
|
||||
return '<BayesDict: %s, %s tokens>' % (self.name, self.tokenCount)
|
||||
|
||||
class Bayes(object):
|
||||
|
||||
def __init__(self, tokenizer=None, combiner=None, dataClass=None):
|
||||
if dataClass is None:
|
||||
self.dataClass = BayesData
|
||||
else:
|
||||
self.dataClass = dataClass
|
||||
self.corpus = self.dataClass('__Corpus__')
|
||||
self.pools = {}
|
||||
self.pools['__Corpus__'] = self.corpus
|
||||
self.trainCount = 0
|
||||
self.dirty = True
|
||||
# The tokenizer takes an object and returns
|
||||
# a list of strings
|
||||
if tokenizer is None:
|
||||
self._tokenizer = Tokenizer()
|
||||
else:
|
||||
self._tokenizer = tokenizer
|
||||
# The combiner combines probabilities
|
||||
if combiner is None:
|
||||
self.combiner = self.robinson
|
||||
else:
|
||||
self.combiner = combiner
|
||||
|
||||
def commit(self):
|
||||
self.save()
|
||||
|
||||
def newPool(self, poolName):
|
||||
"""Create a new pool, without actually doing any
|
||||
training.
|
||||
"""
|
||||
self.dirty = True # not always true, but it's simple
|
||||
return self.pools.setdefault(poolName, self.dataClass(poolName))
|
||||
|
||||
def removePool(self, poolName):
|
||||
del(self.pools[poolName])
|
||||
self.dirty = True
|
||||
|
||||
def renamePool(self, poolName, newName):
|
||||
self.pools[newName] = self.pools[poolName]
|
||||
self.pools[newName].name = newName
|
||||
self.removePool(poolName)
|
||||
self.dirty = True
|
||||
|
||||
def mergePools(self, destPool, sourcePool):
|
||||
"""Merge an existing pool into another.
|
||||
The data from sourcePool is merged into destPool.
|
||||
The arguments are the names of the pools to be merged.
|
||||
The pool named sourcePool is left in tact and you may
|
||||
want to call removePool() to get rid of it.
|
||||
"""
|
||||
sp = self.pools[sourcePool]
|
||||
dp = self.pools[destPool]
|
||||
for tok, count in sp.items():
|
||||
if dp.get(tok):
|
||||
dp[tok] += count
|
||||
else:
|
||||
dp[tok] = count
|
||||
dp.tokenCount += 1
|
||||
self.dirty = True
|
||||
|
||||
def poolData(self, poolName):
|
||||
"""Return a list of the (token, count) tuples.
|
||||
"""
|
||||
return self.pools[poolName].items()
|
||||
|
||||
def poolTokens(self, poolName):
|
||||
"""Return a list of the tokens in this pool.
|
||||
"""
|
||||
return [tok for tok, count in self.poolData(poolName)]
|
||||
|
||||
def save(self, fname='bayesdata.dat'):
|
||||
from cPickle import dump
|
||||
fp = open(fname, 'wb')
|
||||
dump(self.pools, fp)
|
||||
fp.close()
|
||||
|
||||
def load(self, fname='bayesdata.dat'):
|
||||
from cPickle import load
|
||||
fp = open(fname, 'rb')
|
||||
self.pools = load(fp)
|
||||
fp.close()
|
||||
self.corpus = self.pools['__Corpus__']
|
||||
self.dirty = True
|
||||
|
||||
def poolNames(self):
|
||||
"""Return a sorted list of Pool names.
|
||||
Does not include the system pool '__Corpus__'.
|
||||
"""
|
||||
pools = self.pools.keys()
|
||||
pools.remove('__Corpus__')
|
||||
pools = [pool for pool in pools]
|
||||
pools.sort()
|
||||
return pools
|
||||
|
||||
def buildCache(self):
|
||||
""" merges corpora and computes probabilities
|
||||
"""
|
||||
self.cache = {}
|
||||
for pname, pool in self.pools.items():
|
||||
# skip our special pool
|
||||
if pname == '__Corpus__':
|
||||
continue
|
||||
|
||||
poolCount = pool.tokenCount
|
||||
themCount = max(self.corpus.tokenCount - poolCount, 1)
|
||||
cacheDict = self.cache.setdefault(pname, self.dataClass(pname))
|
||||
|
||||
for word, totCount in self.corpus.items():
|
||||
# for every word in the copus
|
||||
# check to see if this pool contains this word
|
||||
thisCount = float(pool.get(word, 0.0))
|
||||
if (thisCount == 0.0):
|
||||
continue
|
||||
otherCount = float(totCount) - thisCount
|
||||
|
||||
if not poolCount:
|
||||
goodMetric = 1.0
|
||||
else:
|
||||
goodMetric = min(1.0, otherCount/poolCount)
|
||||
badMetric = min(1.0, thisCount/themCount)
|
||||
f = badMetric / (goodMetric + badMetric)
|
||||
|
||||
# PROBABILITY_THRESHOLD
|
||||
if abs(f-0.5) >= 0.1 :
|
||||
# GOOD_PROB, BAD_PROB
|
||||
cacheDict[word] = max(0.0001, min(0.9999, f))
|
||||
|
||||
def poolProbs(self):
|
||||
if self.dirty:
|
||||
self.buildCache()
|
||||
self.dirty = False
|
||||
return self.cache
|
||||
|
||||
def getTokens(self, obj):
|
||||
"""By default, we expect obj to be a screen and split
|
||||
it on whitespace.
|
||||
|
||||
Note that this does not change the case.
|
||||
In some applications you may want to lowecase everthing
|
||||
so that "king" and "King" generate the same token.
|
||||
|
||||
Override this in your subclass for objects other
|
||||
than text.
|
||||
|
||||
Alternatively, you can pass in a tokenizer as part of
|
||||
instance creation.
|
||||
"""
|
||||
return self._tokenizer.tokenize(obj)
|
||||
|
||||
def getProbs(self, pool, words):
|
||||
""" extracts the probabilities of tokens in a message
|
||||
"""
|
||||
probs = [(word, pool[word]) for word in words if word in pool]
|
||||
probs.sort(lambda x,y: cmp(y[1],x[1]))
|
||||
return probs[:2048]
|
||||
|
||||
def train(self, pool, item, uid=None):
|
||||
"""Train Bayes by telling him that item belongs
|
||||
in pool. uid is optional and may be used to uniquely
|
||||
identify the item that is being trained on.
|
||||
"""
|
||||
tokens = self.getTokens(item)
|
||||
pool = self.pools.setdefault(pool, self.dataClass(pool))
|
||||
self._train(pool, tokens)
|
||||
self.corpus.trainCount += 1
|
||||
pool.trainCount += 1
|
||||
if uid:
|
||||
pool.training.append(uid)
|
||||
self.dirty = True
|
||||
|
||||
def untrain(self, pool, item, uid=None):
|
||||
tokens = self.getTokens(item)
|
||||
pool = self.pools.get(pool, None)
|
||||
if not pool:
|
||||
return
|
||||
self._untrain(pool, tokens)
|
||||
# I guess we want to count this as additional training?
|
||||
self.corpus.trainCount += 1
|
||||
pool.trainCount += 1
|
||||
if uid:
|
||||
pool.training.remove(uid)
|
||||
self.dirty = True
|
||||
|
||||
def _train(self, pool, tokens):
|
||||
wc = 0
|
||||
for token in tokens:
|
||||
count = pool.get(token, 0)
|
||||
pool[token] = count + 1
|
||||
count = self.corpus.get(token, 0)
|
||||
self.corpus[token] = count + 1
|
||||
wc += 1
|
||||
pool.tokenCount += wc
|
||||
self.corpus.tokenCount += wc
|
||||
|
||||
def _untrain(self, pool, tokens):
|
||||
for token in tokens:
|
||||
count = pool.get(token, 0)
|
||||
if count:
|
||||
if count == 1:
|
||||
del(pool[token])
|
||||
else:
|
||||
pool[token] = count - 1
|
||||
pool.tokenCount -= 1
|
||||
|
||||
count = self.corpus.get(token, 0)
|
||||
if count:
|
||||
if count == 1:
|
||||
del(self.corpus[token])
|
||||
else:
|
||||
self.corpus[token] = count - 1
|
||||
self.corpus.tokenCount -= 1
|
||||
|
||||
def trainedOn(self, msg):
|
||||
for p in self.cache.values():
|
||||
if msg in p.training:
|
||||
return True
|
||||
return False
|
||||
|
||||
def guess(self, msg):
|
||||
tokens = Set(self.getTokens(msg))
|
||||
pools = self.poolProbs()
|
||||
|
||||
res = {}
|
||||
for pname, pprobs in pools.items():
|
||||
p = self.getProbs(pprobs, tokens)
|
||||
if len(p) != 0:
|
||||
res[pname]=self.combiner(p, pname)
|
||||
res = res.items()
|
||||
res.sort(lambda x,y: cmp(y[1], x[1]))
|
||||
return res
|
||||
|
||||
def robinson(self, probs, ignore):
|
||||
""" computes the probability of a message being spam (Robinson's method)
|
||||
P = 1 - prod(1-p)^(1/n)
|
||||
Q = 1 - prod(p)^(1/n)
|
||||
S = (1 + (P-Q)/(P+Q)) / 2
|
||||
Courtesy of http://christophe.delord.free.fr/en/index.html
|
||||
"""
|
||||
|
||||
nth = 1./len(probs)
|
||||
P = 1.0 - reduce(operator.mul, map(lambda p: 1.0-p[1], probs), 1.0) ** nth
|
||||
Q = 1.0 - reduce(operator.mul, map(lambda p: p[1], probs)) ** nth
|
||||
S = (P - Q) / (P + Q)
|
||||
return (1 + S) / 2
|
||||
|
||||
|
||||
def robinsonFisher(self, probs, ignore):
|
||||
""" computes the probability of a message being spam (Robinson-Fisher method)
|
||||
H = C-1( -2.ln(prod(p)), 2*n )
|
||||
S = C-1( -2.ln(prod(1-p)), 2*n )
|
||||
I = (1 + H - S) / 2
|
||||
Courtesy of http://christophe.delord.free.fr/en/index.html
|
||||
"""
|
||||
n = len(probs)
|
||||
try: H = chi2P(-2.0 * math.log(reduce(operator.mul, map(lambda p: p[1], probs), 1.0)), 2*n)
|
||||
except OverflowError: H = 0.0
|
||||
try: S = chi2P(-2.0 * math.log(reduce(operator.mul, map(lambda p: 1.0-p[1], probs), 1.0)), 2*n)
|
||||
except OverflowError: S = 0.0
|
||||
return (1 + H - S) / 2
|
||||
|
||||
def __repr__(self):
|
||||
return '<Bayes: %s>' % [self.pools[p] for p in self.poolNames()]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.corpus)
|
||||
|
||||
class Tokenizer:
|
||||
"""A simple regex-based whitespace tokenizer.
|
||||
It expects a string and can return all tokens lower-cased
|
||||
or in their existing case.
|
||||
"""
|
||||
|
||||
WORD_RE = re.compile('\\w+', re.U)
|
||||
|
||||
def __init__(self, lower=False):
|
||||
self.lower = lower
|
||||
|
||||
def tokenize(self, obj):
|
||||
for match in self.WORD_RE.finditer(obj):
|
||||
if self.lower:
|
||||
yield match.group().lower()
|
||||
else:
|
||||
yield match.group()
|
||||
|
||||
def chi2P(chi, df):
|
||||
""" return P(chisq >= chi, with df degree of freedom)
|
||||
|
||||
df must be even
|
||||
"""
|
||||
assert df & 1 == 0
|
||||
m = chi / 2.0
|
||||
sum = term = math.exp(-m)
|
||||
for i in range(1, df/2):
|
||||
term *= m/i
|
||||
sum += term
|
||||
return min(sum, 1.0)
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 59
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/guessers
|
||||
END
|
||||
__init__.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 70
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/guessers/__init__.py
|
||||
END
|
||||
email.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 68
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/guessers/email.py
|
||||
END
|
|
@ -1,96 +0,0 @@
|
|||
10
|
||||
|
||||
dir
|
||||
17937
|
||||
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/guessers
|
||||
http://divmod.org/svn/Divmod
|
||||
|
||||
|
||||
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
866e43f7-fbfc-0310-8f2a-ec88d1da2979
|
||||
|
||||
__init__.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:35.000000Z
|
||||
d41d8cd98f00b204e9800998ecf8427e
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
0
|
||||
|
||||
email.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:35.000000Z
|
||||
852c557941154a2f0bed11640429d8bd
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
has-props
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
3249
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
K 14
|
||||
svn:executable
|
||||
V 1
|
||||
*
|
||||
END
|
|
@ -1,104 +0,0 @@
|
|||
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
|
||||
# amir@divmod.org. This is free software; you can redistribute it and/or
|
||||
# modify it under the terms of version 2.1 of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
from rfc822 import AddressList
|
||||
|
||||
from reverend.thomas import Bayes
|
||||
|
||||
|
||||
class EmailClassifier(Bayes):
|
||||
|
||||
def getTokens(self, msg):
|
||||
# Overide from parent
|
||||
# This should return a list of strings
|
||||
# which will be used as the key into
|
||||
# the table of token counts
|
||||
tokens = self.getHeaderTokens(msg)
|
||||
tokens += self.getBodyTokens(msg)
|
||||
|
||||
# Get some tokens that are generated from the
|
||||
# header and the structure
|
||||
tokens += self.getMetaTokens(msg)
|
||||
return tokens
|
||||
|
||||
def getBodyTokens(self, msg):
|
||||
text = self.getTextPlain(msg)
|
||||
if text is None:
|
||||
text = ''
|
||||
tl = list(self._tokenizer.tokenize(text))
|
||||
return tl
|
||||
|
||||
def getHeaderTokens(self, msg):
|
||||
subj = msg.get('subject','nosubject')
|
||||
text = subj + ' '
|
||||
text += msg.get('from','fromnoone') + ' '
|
||||
text += msg.get('to','tonoone') + ' '
|
||||
text += msg.get('cc','ccnoone') + ' '
|
||||
tl = list(self._tokenizer.tokenize(text))
|
||||
return tl
|
||||
|
||||
def getTextPlain(self, msg):
|
||||
for part in msg.walk():
|
||||
typ = part.get_content_type()
|
||||
if typ and typ.lower() == "text/plain":
|
||||
text = part.get_payload(decode=True)
|
||||
return text
|
||||
return None
|
||||
|
||||
def getTextHtml(self, msg):
|
||||
for part in msg.walk():
|
||||
typ = part.get_content_type()
|
||||
if typ and typ.lower() == "text/html":
|
||||
text = part.get_payload(decode=False)
|
||||
return text
|
||||
return None
|
||||
|
||||
def getMetaTokens(self, msg):
|
||||
r = []
|
||||
for f in ['Content-type', 'X-Priority', 'X-Mailer',
|
||||
'content-transfer-encoding', 'X-MSMail-Priority']:
|
||||
r.append(f +':' + msg.get(f, 'None'))
|
||||
|
||||
text = self.getTextPlain(msg)
|
||||
html = self.getTextHtml(msg)
|
||||
|
||||
for stem, part in zip(['text','html'],[text,html]):
|
||||
if part is None:
|
||||
r.append(stem + '_None')
|
||||
continue
|
||||
else:
|
||||
r.append(stem + '_True')
|
||||
|
||||
l = len(part.split())
|
||||
if l is 0:
|
||||
a = 'zero'
|
||||
r.append(stem + a)
|
||||
if l > 10000:
|
||||
a = 'more_than_10000'
|
||||
r.append(stem + a)
|
||||
if l > 1000:
|
||||
a = 'more_than_1000'
|
||||
r.append(stem + a)
|
||||
if l > 100:
|
||||
a = 'more_than_100'
|
||||
r.append(stem + a)
|
||||
|
||||
t = msg.get('to','')
|
||||
at = AddressList(t).addresslist
|
||||
c = msg.get('cc','')
|
||||
ac = AddressList(c).addresslist
|
||||
|
||||
if at > 5:
|
||||
r.append('to_more_than_5')
|
||||
if at > 10:
|
||||
r.append('to_more_than_10')
|
||||
if ac > 5:
|
||||
r.append('cc_more_than_5')
|
||||
if ac > 10:
|
||||
r.append('cc_more_than_10')
|
||||
|
||||
return r
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 55
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test
|
||||
END
|
||||
__init__.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 67
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test/__init__.py
|
||||
END
|
||||
test_email.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 69
|
||||
/svn/Divmod/!svn/ver/17655/trunk/Reverend/reverend/test/test_email.py
|
||||
END
|
|
@ -1,96 +0,0 @@
|
|||
10
|
||||
|
||||
dir
|
||||
17937
|
||||
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/test
|
||||
http://divmod.org/svn/Divmod
|
||||
|
||||
|
||||
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
866e43f7-fbfc-0310-8f2a-ec88d1da2979
|
||||
|
||||
__init__.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:35.000000Z
|
||||
d41d8cd98f00b204e9800998ecf8427e
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
0
|
||||
|
||||
test_email.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:35.000000Z
|
||||
458da3b3036588912b1a673fb190021f
|
||||
2009-07-03T21:31:34.117160Z
|
||||
17655
|
||||
exarkun
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
878
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
|
||||
"""
|
||||
Tests for L{reverend.guessers.email}.
|
||||
"""
|
||||
|
||||
import email
|
||||
from unittest import TestCase
|
||||
|
||||
from reverend.guessers.email import EmailClassifier
|
||||
|
||||
|
||||
class EmailClassifierTests(TestCase):
|
||||
"""
|
||||
Tests for L{EmailClassifier}
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
Create a L{Message} and an L{EmailClassifier}.
|
||||
"""
|
||||
self.classifier = EmailClassifier()
|
||||
self.message = email.Message.Message()
|
||||
|
||||
|
||||
def test_training(self):
|
||||
"""
|
||||
L{EmailClassifier.train} accepts a pool name and a L{Message}
|
||||
instance and trains the classifier to put similar messages into that
|
||||
pool.
|
||||
"""
|
||||
self.classifier.train("test", self.message)
|
||||
|
||||
|
||||
def test_guessing(self):
|
||||
"""
|
||||
L{EmailClassifier.guess} accepts a L{Message} and returns a pool
|
||||
name.
|
||||
"""
|
||||
self.classifier.guess(self.message)
|
|
@ -1,29 +0,0 @@
|
|||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 52
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui
|
||||
END
|
||||
util.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 60
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/util.py
|
||||
END
|
||||
__init__.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 64
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/__init__.py
|
||||
END
|
||||
trainer.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 63
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/trainer.py
|
||||
END
|
||||
tester.py
|
||||
K 25
|
||||
svn:wc:ra_dav:version-url
|
||||
V 62
|
||||
/svn/Divmod/!svn/ver/2573/trunk/Reverend/reverend/ui/tester.py
|
||||
END
|
|
@ -1,164 +0,0 @@
|
|||
10
|
||||
|
||||
dir
|
||||
17937
|
||||
http://divmod.org/svn/Divmod/trunk/Reverend/reverend/ui
|
||||
http://divmod.org/svn/Divmod
|
||||
|
||||
|
||||
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
866e43f7-fbfc-0310-8f2a-ec88d1da2979
|
||||
|
||||
util.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
6fb32ec747139aae00a39b92c40cfdb1
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
3097
|
||||
|
||||
__init__.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
d41d8cd98f00b204e9800998ecf8427e
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
0
|
||||
|
||||
trainer.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
231ad4977253c217db8bd9131cb547ca
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
has-props
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
12967
|
||||
|
||||
tester.py
|
||||
file
|
||||
|
||||
|
||||
|
||||
|
||||
2010-04-09T21:05:36.000000Z
|
||||
f14706ea2409bae821f910766c4790b9
|
||||
2005-10-25T19:49:27.727286Z
|
||||
2573
|
||||
washort
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
5382
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
K 14
|
||||
svn:executable
|
||||
V 1
|
||||
*
|
||||
END
|
|
@ -1,152 +0,0 @@
|
|||
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
|
||||
# amir@divmod.org. This is free software; you can redistribute it and/or
|
||||
# modify it under the terms of version 2.1 of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
from __future__ import generators
|
||||
from Tkinter import *
|
||||
import tkFileDialog
|
||||
import tkSimpleDialog
|
||||
import tkMessageBox
|
||||
import os
|
||||
import time
|
||||
|
||||
class TestView(Frame):
|
||||
def __init__(self, parent=None, guesser=None, app=None):
|
||||
Frame.__init__(self, parent)
|
||||
self.pack()
|
||||
self.guesser = guesser
|
||||
self.app = app
|
||||
self.size = 300
|
||||
self.setupViews()
|
||||
|
||||
|
||||
def setupViews(self):
|
||||
line = Frame(self, relief=RAISED, borderwidth=1)
|
||||
line.pack(side=TOP, padx=2, pady=1)
|
||||
colHeadings = [('Guesses', 8), ('Right', 8), ('Wrong', 8), ('Accuracy %', 10)]
|
||||
currCol = 0
|
||||
for cHdr, width in colHeadings:
|
||||
l = Label(line, text=cHdr, width=width, bg='lightblue')
|
||||
l.grid(row=0, column=currCol)
|
||||
currCol += 1
|
||||
line = Frame(self)
|
||||
line.pack(fill=X)
|
||||
|
||||
iGuess = IntVar()
|
||||
iRight = IntVar()
|
||||
iWrong = IntVar()
|
||||
iAcc = IntVar()
|
||||
self.model = (iGuess, iRight, iWrong, iAcc)
|
||||
|
||||
l = Label(line, textvariable=iGuess, anchor=E, width=8, relief=SUNKEN)
|
||||
l.grid(row=0, column=0)
|
||||
l = Label(line, textvariable=iRight, anchor=E, width=8, relief=SUNKEN)
|
||||
l.grid(row=0, column=1)
|
||||
l = Label(line, textvariable=iWrong, anchor=E, width=8, relief=SUNKEN)
|
||||
l.grid(row=0, column=2)
|
||||
l = Label(line, textvariable=iAcc, anchor=E, width=8, relief=SUNKEN)
|
||||
l.grid(row=0, column=3)
|
||||
bp = Button(self, text="Run Test", command=self.runTest)
|
||||
bp.pack(side=BOTTOM)
|
||||
|
||||
canvas = Canvas(self, width=self.size, height=self.size, bg='lightyellow')
|
||||
canvas.pack(expand=YES, fill=BOTH, side=BOTTOM)
|
||||
self.canvas = canvas
|
||||
|
||||
## slid = Scale(self, label='Wrong', variable=iWrong, to=400, orient=HORIZONTAL, bg='red')
|
||||
## slid.pack(side=BOTTOM)
|
||||
## slid = Scale(self, label='Right', variable=iRight, to=400, orient=HORIZONTAL, bg='green')
|
||||
## slid.pack(side=BOTTOM)
|
||||
|
||||
|
||||
def runTest(self):
|
||||
# TODO - This is nasty re-write
|
||||
if len(self.guesser) == 0:
|
||||
tkMessageBox.showwarning('Underprepared for examination!',
|
||||
'Your guesser has had no training. Please train and retry.')
|
||||
return
|
||||
path = tkFileDialog.askdirectory()
|
||||
if not path:
|
||||
return
|
||||
answer = tkSimpleDialog.askstring('Which Pool do these items belong to?', 'Pool name?',
|
||||
parent=self.app)
|
||||
|
||||
if not answer:
|
||||
return
|
||||
if answer not in self.guesser.pools:
|
||||
return
|
||||
|
||||
de = DirectoryExam(path, answer, self.app.itemClass)
|
||||
testCount = len(de)
|
||||
scale = self.calcScale(testCount)
|
||||
x = 0
|
||||
y = 0
|
||||
cumTime = 0
|
||||
iGuess, iRight, iWrong, iAcc = self.model
|
||||
for m, ans in de:
|
||||
then = time.time()
|
||||
g = self.guesser.guess(m)
|
||||
cumTime += time.time() - then
|
||||
if g:
|
||||
g = g[0][0]
|
||||
iGuess.set(iGuess.get()+1)
|
||||
if g == ans:
|
||||
col = 'green'
|
||||
iRight.set(iRight.get()+1)
|
||||
else:
|
||||
col = 'red'
|
||||
iWrong.set(iWrong.get()+1)
|
||||
iAcc.set(round(100 * iRight.get()/float(iGuess.get()), 3))
|
||||
|
||||
# Plot squares
|
||||
self.canvas.create_rectangle(x*scale,y*scale,(x+1)*scale,(y+1)*scale,fill=col)
|
||||
if not divmod(iGuess.get(),(int(self.size/scale)))[1]:
|
||||
# wrap
|
||||
x = 0
|
||||
y += 1
|
||||
else:
|
||||
x += 1
|
||||
|
||||
self.update_idletasks()
|
||||
guesses = iGuess.get()
|
||||
self.app.status.log('%r guesses in %.2f seconds. Avg: %.2f/sec.' % (guesses, cumTime,
|
||||
round(guesses/cumTime, 2)))
|
||||
|
||||
def calcScale(self, testCount):
|
||||
import math
|
||||
scale = int(self.size/(math.sqrt(testCount)+1))
|
||||
return scale
|
||||
|
||||
|
||||
|
||||
class DirectoryExam(object):
|
||||
"""Creates a iterator that returns a pair at a time.
|
||||
(Item, correctAnswer). This Exam creates items from
|
||||
a directory and uses the same answer for each.
|
||||
"""
|
||||
|
||||
def __init__(self, path, answer, itemClass):
|
||||
self.path = path
|
||||
self.answer = answer
|
||||
self.itemClass = itemClass
|
||||
|
||||
def __iter__(self):
|
||||
files = os.listdir(self.path)
|
||||
for file in files:
|
||||
fp = open(os.path.join(self.path, file), 'rb')
|
||||
try:
|
||||
item = self.itemClass.fromFile(fp)
|
||||
finally:
|
||||
fp.close()
|
||||
if item is None:
|
||||
continue
|
||||
yield (item, self.answer)
|
||||
|
||||
def __len__(self):
|
||||
files = os.listdir(self.path)
|
||||
return len(files)
|
||||
|
||||
|
||||
|
|
@ -1,403 +0,0 @@
|
|||
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
|
||||
# amir@divmod.org. This is free software; you can redistribute it and/or
|
||||
# modify it under the terms of version 2.1 of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
from Tkinter import *
|
||||
import tkFileDialog
|
||||
import tkSimpleDialog
|
||||
import tkMessageBox
|
||||
|
||||
import os
|
||||
|
||||
from util import Command, StatusBar, Notebook
|
||||
from tester import TestView
|
||||
|
||||
class PoolView(Frame):
|
||||
def __init__(self, master=None, guesser=None, app=None):
|
||||
Frame.__init__(self, master, bg='lightblue3')
|
||||
self.pack()
|
||||
self.listView = Frame(self)
|
||||
self.listView.pack()
|
||||
bp = Button(self, text="New Pool", command=self.newPool)
|
||||
bp.pack(side=LEFT, anchor=SE)
|
||||
self.addLoadSave()
|
||||
self.columnHeadings()
|
||||
self.model = {}
|
||||
self.guesser = guesser
|
||||
self.app = app
|
||||
self.reload()
|
||||
|
||||
def reload(self):
|
||||
self.listView.destroy()
|
||||
self.listView = Frame(self)
|
||||
self.listView.pack()
|
||||
for pool in self.guesser.poolNames():
|
||||
self.addPool(self.guesser.pools[pool])
|
||||
self.addPool(self.guesser.corpus, 'Total')
|
||||
|
||||
def upload(self):
|
||||
pass
|
||||
|
||||
def addLoadSave(self):
|
||||
frame = Frame(self)
|
||||
frame.pack(side=RIGHT)
|
||||
bp = Button(frame, text="Upload", command=self.upload, state=DISABLED)
|
||||
bp.pack(side=BOTTOM, fill=X)
|
||||
bp = Button(frame, text="Save", command=self.save)
|
||||
bp.pack(side=BOTTOM, fill=X)
|
||||
bp = Button(frame, text="Load", command=self.load)
|
||||
bp.pack(side=BOTTOM, fill=X)
|
||||
|
||||
def addPool(self, pool, name=None):
|
||||
col=None
|
||||
tTok = IntVar()
|
||||
train = IntVar()
|
||||
line = Frame(self.listView)
|
||||
line.pack()
|
||||
if name is None:
|
||||
name = pool.name
|
||||
idx = self.guesser.poolNames().index(name)
|
||||
col = self.defaultColours()[idx]
|
||||
l = Label(line, text=name, anchor=W, width=10)
|
||||
l.grid(row=0, column=0)
|
||||
colourStripe = Label(line, text=' ', width=1, bg=col, anchor=W, relief=GROOVE)
|
||||
colourStripe.grid(row=0, column=1)
|
||||
train = IntVar()
|
||||
train.set(pool.trainCount)
|
||||
l = Label(line, textvariable=train, anchor=E, width=10, relief=SUNKEN)
|
||||
l.grid(row=0, column=2)
|
||||
uTok = IntVar()
|
||||
uTok.set(len(pool))
|
||||
l = Label(line, textvariable=uTok, anchor=E, width=12, relief=SUNKEN)
|
||||
l.grid(row=0, column=3)
|
||||
tTok = IntVar()
|
||||
tTok.set(pool.tokenCount)
|
||||
l = Label(line, textvariable=tTok, anchor=E, width=10, relief=SUNKEN)
|
||||
l.grid(row=0, column=4)
|
||||
self.model[name]=(pool, uTok, tTok, train)
|
||||
|
||||
def refresh(self):
|
||||
for pool, ut, tt, train in self.model.values():
|
||||
ut.set(len(pool))
|
||||
tt.set(pool.tokenCount)
|
||||
train.set(pool.trainCount)
|
||||
|
||||
def save(self):
|
||||
path = tkFileDialog.asksaveasfilename()
|
||||
if not path:
|
||||
return
|
||||
self.guesser.save(path)
|
||||
self.app.dirty = False
|
||||
|
||||
def load(self):
|
||||
path = tkFileDialog.askopenfilename()
|
||||
if not path:
|
||||
return
|
||||
self.guesser.load(path)
|
||||
self.reload()
|
||||
self.app.dirty = False
|
||||
|
||||
def newPool(self):
|
||||
p = tkSimpleDialog.askstring('Create Pool', 'Name for new pool?')
|
||||
if not p:
|
||||
return
|
||||
if p in self.guesser.pools:
|
||||
tkMessageBox.showwarning('Bad pool name!', 'Pool %s already exists.' % p)
|
||||
self.guesser.newPool(p)
|
||||
self.reload()
|
||||
self.app.poolAdded()
|
||||
self.app.status.log('New pool created: %s.' % p, clear=3)
|
||||
|
||||
def defaultColours(self):
|
||||
return ['green', 'yellow', 'lightblue', 'red', 'blue', 'orange', 'purple', 'pink']
|
||||
|
||||
def columnHeadings(self):
|
||||
# FIXME factor out and generalize
|
||||
title = Label(self, text='Pools', relief=RAISED, borderwidth=1)
|
||||
title.pack(side=TOP, fill=X)
|
||||
msgLine = Frame(self, relief=RAISED, borderwidth=1)
|
||||
msgLine.pack(side=TOP)
|
||||
currCol = 0
|
||||
colHeadings = [('Name', 10), ('', 1), ('Trained', 10), ('Unique Tokens', 12), ('Tokens', 10)]
|
||||
for cHdr, width in colHeadings:
|
||||
l = Label(msgLine, text=cHdr, width=width, bg='lightblue')
|
||||
l.grid(row=0, column=currCol)
|
||||
currCol += 1
|
||||
|
||||
|
||||
class Trainer(Frame):
|
||||
def __init__(self, parent, guesser=None, itemClass=None):
|
||||
self.status = StatusBar(parent)
|
||||
self.status.pack(side=BOTTOM, fill=X)
|
||||
Frame.__init__(self, parent)
|
||||
self.pack(side=TOP, fill=BOTH)
|
||||
self.itemsPerPage = 20
|
||||
self.rows = []
|
||||
for i in range(self.itemsPerPage):
|
||||
self.rows.append(ItemRow())
|
||||
self.items = []
|
||||
self.files = []
|
||||
self.cursor = 0
|
||||
self.dirty = False
|
||||
if guesser is None:
|
||||
from reverend.thomas import Bayes
|
||||
self.guesser = Bayes()
|
||||
else:
|
||||
self.guesser = guesser
|
||||
if itemClass is None:
|
||||
self.itemClass = TextItem
|
||||
else:
|
||||
self.itemClass = itemClass
|
||||
for row in self.rows:
|
||||
row.summary.set('foo')
|
||||
self.initViews()
|
||||
|
||||
def initViews(self):
|
||||
self.nb = Notebook(self)
|
||||
## frame1 = Frame(self.nb())
|
||||
## self.poolView = PoolView(frame1, guesser=self.guesser, app=self)
|
||||
## self.poolView.pack(side=TOP)
|
||||
frame2 = Frame(self.nb())
|
||||
self.poolView = PoolView(frame2, guesser=self.guesser, app=self)
|
||||
self.poolView.pack(side=TOP)
|
||||
self.listView = Canvas(frame2, relief=GROOVE)
|
||||
self.listView.pack(padx=3)
|
||||
bn = Button(self.listView, text="Load training", command=self.loadCorpus)
|
||||
bn.pack(side=RIGHT, anchor=NE, fill=X)
|
||||
self.columnHeadings()
|
||||
self.addNextPrev()
|
||||
|
||||
frame3 = Frame(self.nb())
|
||||
self.testView = TestView(frame3, guesser=self.guesser, app=self)
|
||||
self.testView.pack()
|
||||
|
||||
frame4 = Frame(self.nb())
|
||||
bp = Button(frame4, text="Quit", command=self.quitNow)
|
||||
bp.pack(side=BOTTOM)
|
||||
|
||||
#self.nb.add_screen(frame1, 'Reverend')
|
||||
self.nb.add_screen(frame2, 'Training')
|
||||
self.nb.add_screen(frame3, 'Testing')
|
||||
self.nb.add_screen(frame4, 'Quit')
|
||||
|
||||
|
||||
def addNextPrev(self):
|
||||
npFrame = Frame(self.listView)
|
||||
npFrame.pack(side=BOTTOM, fill=X)
|
||||
bn = Button(npFrame, text="Prev Page", command=self.prevPage)
|
||||
bn.grid(row=0, column=0)
|
||||
bn = Button(npFrame, text="Next Page", command=self.nextPage)
|
||||
bn.grid(row=0, column=1)
|
||||
|
||||
|
||||
def loadCorpus(self):
|
||||
path = tkFileDialog.askdirectory()
|
||||
if not path:
|
||||
return
|
||||
self.loadFileList(path)
|
||||
self.displayItems()
|
||||
self.displayRows()
|
||||
|
||||
def bulkTest(self):
|
||||
dirs = []
|
||||
for pool in self.guesser.poolNames():
|
||||
path = tkFileDialog.askdirectory()
|
||||
dirs.append((pool, path))
|
||||
for pool, path in dirs:
|
||||
print pool, path
|
||||
|
||||
|
||||
def displayList(self):
|
||||
for item in self.items:
|
||||
self.itemRow(item)
|
||||
|
||||
def displayRows(self):
|
||||
for row in self.rows:
|
||||
self.displayRow(row)
|
||||
|
||||
def loadFileList(self, path):
|
||||
listing = os.listdir(path)
|
||||
self.files = [os.path.join(path, file) for file in listing]
|
||||
self.cursor = 0
|
||||
|
||||
def prevPage(self):
|
||||
self.cursor = max(0, self.cursor - self.itemsPerPage)
|
||||
self.displayItems()
|
||||
|
||||
def nextPage(self):
|
||||
self.cursor = min(len(self.files), self.cursor + self.itemsPerPage)
|
||||
self.displayItems()
|
||||
|
||||
def displayItems(self):
|
||||
theseFiles = self.files[self.cursor:self.cursor + self.itemsPerPage]
|
||||
items = []
|
||||
for file, row in zip(theseFiles, self.rows):
|
||||
fp = open(file, 'rb')
|
||||
try:
|
||||
item = self.itemClass.fromFile(fp)
|
||||
finally:
|
||||
fp.close()
|
||||
if item is None:
|
||||
continue
|
||||
items.append(item)
|
||||
guesses = self.guesser.guess(item)
|
||||
summary = item.summary()
|
||||
cols = item.columnDefs()
|
||||
s = ''
|
||||
for c, ignore in cols:
|
||||
s += summary[c] + ' '
|
||||
row.initialize(item, s, guesses, self.guesser.poolNames())
|
||||
self.items = items
|
||||
|
||||
def quitNow(self):
|
||||
if self.dirty:
|
||||
if tkMessageBox.askyesno("You have unsaved changes!", "Quit without saving?"):
|
||||
self.quit()
|
||||
self.quit()
|
||||
|
||||
def columnHeadings(self):
|
||||
# FIXME - Something better for columns and rows in general
|
||||
line = Frame(self.listView, relief=RAISED, borderwidth=1)
|
||||
line.pack(side=TOP, padx=2, pady=1)
|
||||
colHeadings = self.itemClass.columnDefs()
|
||||
currCol = 0
|
||||
for cHdr, width in colHeadings:
|
||||
l = Label(line, text=cHdr, width=width, bg='lightblue')
|
||||
l.grid(row=0, column=currCol)
|
||||
currCol += 1
|
||||
line = Frame(self)
|
||||
line.pack(fill=X)
|
||||
|
||||
def training(self, row):
|
||||
sel = row.selection.get()
|
||||
self.guesser.train(sel, row.original)
|
||||
row.current = sel
|
||||
self.guessAll()
|
||||
|
||||
def guessAll(self):
|
||||
self.poolView.refresh()
|
||||
pools = self.guesser.poolNames()
|
||||
for row in self.rows:
|
||||
row.setGuess(self.guesser.guess(row.original), pools)
|
||||
|
||||
def displayRow(self, row, bgc=None):
|
||||
# UGH - REWRITE!
|
||||
line = Frame(self.listView, bg=bgc)
|
||||
line.pack(pady=1)
|
||||
row.line = line
|
||||
self.insertRadios(row)
|
||||
Label(line, text=row.summary.get(), textvariable=row.summary, width=60, bg=bgc,
|
||||
anchor=W).grid(row=0, column=2)
|
||||
#Label(line, text=row.guess, width=7, bg=bgc, anchor=W).grid(row=0, column=1)
|
||||
colourStripe = Label(line, text=' ', width=1, bg=bgc, anchor=W, relief=GROOVE)
|
||||
colourStripe.grid(row=0, column=1)
|
||||
line.colourStripe = colourStripe
|
||||
pools = self.guesser.poolNames()
|
||||
row.refreshColour(pools)
|
||||
|
||||
def poolAdded(self):
|
||||
if not self.items:
|
||||
return
|
||||
pools = self.guesser.poolNames()
|
||||
for row in self.rows:
|
||||
for r in row.radios:
|
||||
r.destroy()
|
||||
self.insertRadios(row)
|
||||
row.refreshColour(pools)
|
||||
self.dirty = True
|
||||
|
||||
def insertRadios(self, row):
|
||||
radioFrame = Frame(row.line)
|
||||
radioFrame.grid(row=0, column=0)
|
||||
currCol = 0
|
||||
radios = []
|
||||
v = row.selection
|
||||
ci = 0
|
||||
colours = row.defaultColours()
|
||||
pools = self.guesser.poolNames()
|
||||
for pool in pools:
|
||||
rb = Radiobutton(radioFrame, text=pool, variable=v, value=pool, command=Command(self.training, row), bg=None)
|
||||
rb.grid(row=0, column=currCol)
|
||||
radios.append(rb)
|
||||
currCol += 1
|
||||
ci += 1
|
||||
row.radios = radios
|
||||
|
||||
|
||||
class TextItem(object):
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
def summary(self):
|
||||
return {'Text': self.text}
|
||||
|
||||
def columnNames(self):
|
||||
return ['Text']
|
||||
|
||||
def lower(self):
|
||||
return self.text.lower()
|
||||
|
||||
def fromFile(self, fp):
|
||||
"""Return the first line of the file.
|
||||
"""
|
||||
ti = self(fp.readline())
|
||||
return ti
|
||||
fromFile = classmethod(fromFile)
|
||||
|
||||
|
||||
class ItemRow(object):
|
||||
def __init__(self, orig=None):
|
||||
self.line = None
|
||||
self.radios = []
|
||||
self.original = orig
|
||||
self.current = ''
|
||||
self.guess = []
|
||||
self.summary = StringVar()
|
||||
self.selection = StringVar()
|
||||
|
||||
def initialize(self, item=None, summary='', guess=None, pools=[]):
|
||||
self.selection.set('')
|
||||
self.original = item
|
||||
self.summary.set(summary)
|
||||
self.setGuess(guess, pools)
|
||||
|
||||
def setGuess(self, guess, pools):
|
||||
if not guess:
|
||||
guess = [['']]
|
||||
self.guess = guess
|
||||
self.selection.set(self.bestGuess())
|
||||
self.current = self.bestGuess()
|
||||
self.refreshColour(pools)
|
||||
|
||||
def refreshColour(self, pools):
|
||||
col = None
|
||||
if self.guess[0][0] in pools:
|
||||
idx = pools.index(self.guess[0][0])
|
||||
col = self.defaultColours()[idx]
|
||||
if self.line:
|
||||
self.line.colourStripe.config(bg=col)
|
||||
|
||||
def __repr__(self):
|
||||
return self.original.__repr__()
|
||||
|
||||
def defaultColours(self):
|
||||
return ['green', 'yellow', 'lightblue', 'red', 'blue', 'orange', 'purple', 'pink']
|
||||
|
||||
def bestGuess(self):
|
||||
if self.guess:
|
||||
return self.guess[0][0]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = Tk()
|
||||
root.title('Reverend Trainer')
|
||||
root.minsize(width=300, height=300)
|
||||
#root.maxsize(width=600, height=600)
|
||||
display = Trainer(root)
|
||||
root.mainloop()
|
|
@ -1,98 +0,0 @@
|
|||
# This module is part of the Divmod project and is Copyright 2003 Amir Bakhtiar:
|
||||
# amir@divmod.org. This is free software; you can redistribute it and/or
|
||||
# modify it under the terms of version 2.1 of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
from Tkinter import *
|
||||
|
||||
class StatusBar(Frame):
|
||||
"""Courtesy of Fredrik Lundh.
|
||||
"""
|
||||
|
||||
def __init__(self, master):
|
||||
Frame.__init__(self, master)
|
||||
self.label = Label(self, bd=1, relief=SUNKEN, anchor=W)
|
||||
self.label.pack(fill=X)
|
||||
|
||||
def set(self, format, *args):
|
||||
self.label.config(text=format % args)
|
||||
self.label.update_idletasks()
|
||||
|
||||
def clear(self):
|
||||
self.label.config(text="")
|
||||
self.label.update_idletasks()
|
||||
|
||||
def log(self, text, clear=0):
|
||||
# Clear after clear seconds
|
||||
self.set('%s', text)
|
||||
if clear:
|
||||
self.label.after(clear * 1000, self.clear)
|
||||
|
||||
|
||||
class Command:
|
||||
"""Courtesy of Danny Yoo
|
||||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66521
|
||||
"""
|
||||
def __init__(self, callback, *args, **kwargs):
|
||||
self.callback = callback
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __call__(self):
|
||||
return apply(self.callback, self.args, self.kwargs)
|
||||
|
||||
class Notebook:
|
||||
"""Courtesy of Iuri Wickert
|
||||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/188537
|
||||
"""
|
||||
|
||||
# initialization. receives the master widget
|
||||
# reference and the notebook orientation
|
||||
def __init__(self, master, side=LEFT):
|
||||
self.active_fr = None
|
||||
self.count = 0
|
||||
self.choice = IntVar(0)
|
||||
|
||||
# allows the TOP and BOTTOM
|
||||
# radiobuttons' positioning.
|
||||
if side in (TOP, BOTTOM):
|
||||
self.side = LEFT
|
||||
else:
|
||||
self.side = TOP
|
||||
|
||||
# creates notebook's frames structure
|
||||
self.rb_fr = Frame(master, borderwidth=2, relief=RIDGE)
|
||||
self.rb_fr.pack(side=side, fill=BOTH)
|
||||
self.screen_fr = Frame(master, borderwidth=2, relief=RIDGE)
|
||||
self.screen_fr.pack(fill=BOTH)
|
||||
|
||||
|
||||
# return a master frame reference for the external frames (screens)
|
||||
def __call__(self):
|
||||
return self.screen_fr
|
||||
|
||||
|
||||
# add a new frame (screen) to the (bottom/left of the) notebook
|
||||
def add_screen(self, fr, title):
|
||||
b = Radiobutton(self.rb_fr, text=title, indicatoron=0, \
|
||||
variable=self.choice, value=self.count, \
|
||||
command=lambda: self.display(fr))
|
||||
b.pack(fill=BOTH, side=self.side)
|
||||
|
||||
# ensures the first frame will be
|
||||
# the first selected/enabled
|
||||
if not self.active_fr:
|
||||
fr.pack(fill=BOTH, expand=1)
|
||||
self.active_fr = fr
|
||||
|
||||
self.count += 1
|
||||
|
||||
|
||||
# hides the former active frame and shows
|
||||
# another one, keeping its reference
|
||||
def display(self, fr):
|
||||
self.active_fr.forget()
|
||||
fr.pack(fill=BOTH, expand=1)
|
||||
self.active_fr = fr
|
||||
|
Binary file not shown.
0
utils/opml/README → vendor/opml/README
vendored
0
utils/opml/README → vendor/opml/README
vendored
0
utils/opml/tests.py → vendor/opml/tests.py
vendored
0
utils/opml/tests.py → vendor/opml/tests.py
vendored
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue