2009-06-16 03:08:55 +00:00
|
|
|
import datetime
|
|
|
|
import time
|
2009-08-01 04:26:57 +00:00
|
|
|
import sys
|
2009-06-16 03:08:55 +00:00
|
|
|
|
|
|
|
def encode(tstr):
|
|
|
|
""" Encodes a unicode string in utf-8
|
|
|
|
"""
|
|
|
|
if not tstr:
|
|
|
|
return ''
|
|
|
|
# this is _not_ pretty, but it works
|
|
|
|
try:
|
|
|
|
return tstr.encode('utf-8', "xmlcharrefreplace")
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
# it's already UTF8.. sigh
|
|
|
|
return tstr.decode('utf-8').encode('utf-8')
|
|
|
|
|
|
|
|
def prints(tstr):
|
|
|
|
""" lovely unicode
|
|
|
|
"""
|
|
|
|
sys.stdout.write('%s\n' % (tstr.encode(sys.getdefaultencoding(),
|
|
|
|
'replace')))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
def mtime(ttime):
|
|
|
|
""" datetime auxiliar function.
|
|
|
|
"""
|
2009-07-28 02:27:27 +00:00
|
|
|
return datetime.datetime.fromtimestamp(time.mktime(ttime))
|
2009-08-01 04:26:57 +00:00
|
|
|
|
|
|
|
# From: http://www.poromenos.org/node/87
|
|
|
|
def levenshtein_distance(first, second):
|
|
|
|
"""Find the Levenshtein distance between two strings."""
|
|
|
|
if len(first) > len(second):
|
|
|
|
first, second = second, first
|
|
|
|
if len(second) == 0:
|
|
|
|
return len(first)
|
|
|
|
first_length = len(first) + 1
|
|
|
|
second_length = len(second) + 1
|
|
|
|
distance_matrix = [[0] * second_length for x in range(first_length)]
|
|
|
|
for i in range(first_length):
|
|
|
|
distance_matrix[i][0] = i
|
|
|
|
for j in range(second_length):
|
|
|
|
distance_matrix[0][j]=j
|
|
|
|
for i in xrange(1, first_length):
|
|
|
|
for j in range(1, second_length):
|
|
|
|
deletion = distance_matrix[i-1][j] + 1
|
|
|
|
insertion = distance_matrix[i][j-1] + 1
|
|
|
|
substitution = distance_matrix[i-1][j-1]
|
|
|
|
if first[i-1] != second[j-1]:
|
|
|
|
substitution += 1
|
|
|
|
distance_matrix[i][j] = min(insertion, deletion, substitution)
|
|
|
|
return distance_matrix[first_length-1][second_length-1]
|