2009-06-16 03:08:55 +00:00
|
|
|
import datetime
|
|
|
|
import time
|
2009-08-01 04:26:57 +00:00
|
|
|
import sys
|
2010-07-21 11:38:33 -04:00
|
|
|
from utils import feedfinder
|
2009-06-16 03:08:55 +00:00
|
|
|
|
|
|
|
def encode(tstr):
|
|
|
|
""" Encodes a unicode string in utf-8
|
|
|
|
"""
|
|
|
|
if not tstr:
|
|
|
|
return ''
|
|
|
|
# this is _not_ pretty, but it works
|
|
|
|
try:
|
|
|
|
return tstr.encode('utf-8', "xmlcharrefreplace")
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
# it's already UTF8.. sigh
|
|
|
|
return tstr.decode('utf-8').encode('utf-8')
|
|
|
|
|
|
|
|
def prints(tstr):
|
|
|
|
""" lovely unicode
|
|
|
|
"""
|
|
|
|
sys.stdout.write('%s\n' % (tstr.encode(sys.getdefaultencoding(),
|
|
|
|
'replace')))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
def mtime(ttime):
|
|
|
|
""" datetime auxiliar function.
|
|
|
|
"""
|
2009-07-28 02:27:27 +00:00
|
|
|
return datetime.datetime.fromtimestamp(time.mktime(ttime))
|
2009-08-01 04:26:57 +00:00
|
|
|
|
|
|
|
# From: http://www.poromenos.org/node/87
|
|
|
|
def levenshtein_distance(first, second):
|
|
|
|
"""Find the Levenshtein distance between two strings."""
|
|
|
|
if len(first) > len(second):
|
|
|
|
first, second = second, first
|
|
|
|
if len(second) == 0:
|
|
|
|
return len(first)
|
|
|
|
first_length = len(first) + 1
|
|
|
|
second_length = len(second) + 1
|
|
|
|
distance_matrix = [[0] * second_length for x in range(first_length)]
|
|
|
|
for i in range(first_length):
|
|
|
|
distance_matrix[i][0] = i
|
|
|
|
for j in range(second_length):
|
|
|
|
distance_matrix[0][j]=j
|
|
|
|
for i in xrange(1, first_length):
|
|
|
|
for j in range(1, second_length):
|
|
|
|
deletion = distance_matrix[i-1][j] + 1
|
|
|
|
insertion = distance_matrix[i][j-1] + 1
|
|
|
|
substitution = distance_matrix[i-1][j-1]
|
|
|
|
if first[i-1] != second[j-1]:
|
|
|
|
substitution += 1
|
|
|
|
distance_matrix[i][j] = min(insertion, deletion, substitution)
|
2010-07-21 11:38:33 -04:00
|
|
|
return distance_matrix[first_length-1][second_length-1]
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_address_from_page(url, existing_feed=None):
|
|
|
|
from apps.rss_feeds.models import Feed
|
|
|
|
feed_finder_url = feedfinder.feed(url)
|
|
|
|
if feed_finder_url:
|
|
|
|
if existing_feed:
|
|
|
|
existing_feed.feed_address = feed_finder_url
|
|
|
|
existing_feed.save()
|
|
|
|
feed = existing_feed
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
feed = Feed.objects.get(feed_address=feed_finder_url)
|
|
|
|
except Feed.DoesNotExist:
|
|
|
|
feed = Feed(feed_address=feed_finder_url)
|
|
|
|
feed.save()
|
|
|
|
feed.update()
|
|
|
|
return feed
|