mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-31 21:41:33 +00:00
277 lines
9.7 KiB
Python
Executable file
277 lines
9.7 KiB
Python
Executable file
import random
|
|
from unittest import TestCase
|
|
|
|
from redis_completion.engine import RedisEngine
|
|
|
|
|
|
stop_words = set(['a', 'an', 'the', 'of'])
|
|
|
|
class RedisCompletionTestCase(TestCase):
|
|
def setUp(self):
|
|
self.engine = self.get_engine()
|
|
self.engine.flush()
|
|
|
|
def get_engine(self):
|
|
return RedisEngine(prefix='testac', db=15)
|
|
|
|
def store_data(self, id=None):
|
|
test_data = (
|
|
(1, 'testing python'),
|
|
(2, 'testing python code'),
|
|
(3, 'web testing python code'),
|
|
(4, 'unit tests with python'),
|
|
)
|
|
for obj_id, title in test_data:
|
|
if id is None or id == obj_id:
|
|
self.engine.store_json(obj_id, title, {
|
|
'obj_id': obj_id,
|
|
'title': title,
|
|
'secret': obj_id % 2 == 0 and 'derp' or 'herp',
|
|
})
|
|
|
|
def sort_results(self, r):
|
|
return sorted(r, key=lambda i:i['obj_id'])
|
|
|
|
def test_search(self):
|
|
self.store_data()
|
|
|
|
results = self.engine.search_json('testing python')
|
|
self.assertEqual(self.sort_results(results), [
|
|
{'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
|
|
{'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
|
|
{'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
|
|
])
|
|
|
|
results = self.engine.search_json('test')
|
|
self.assertEqual(self.sort_results(results), [
|
|
{'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
|
|
{'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
|
|
{'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
|
|
{'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'},
|
|
])
|
|
|
|
results = self.engine.search_json('unit')
|
|
self.assertEqual(results, [
|
|
{'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'},
|
|
])
|
|
|
|
results = self.engine.search_json('')
|
|
self.assertEqual(results, [])
|
|
|
|
results = self.engine.search_json('missing')
|
|
self.assertEqual(results, [])
|
|
|
|
def test_boosting(self):
|
|
test_data = (
|
|
(1, 'test alpha', 't1'),
|
|
(2, 'test beta', 't1'),
|
|
(3, 'test gamma', 't1'),
|
|
(4, 'test delta', 't1'),
|
|
(5, 'test alpha', 't2'),
|
|
(6, 'test beta', 't2'),
|
|
(7, 'test gamma', 't2'),
|
|
(8, 'test delta', 't2'),
|
|
(9, 'test alpha', 't3'),
|
|
(10, 'test beta', 't3'),
|
|
(11, 'test gamma', 't3'),
|
|
(12, 'test delta', 't3'),
|
|
)
|
|
for obj_id, title, obj_type in test_data:
|
|
self.engine.store_json(obj_id, title, {
|
|
'obj_id': obj_id,
|
|
'title': title,
|
|
}, obj_type)
|
|
|
|
def assertExpected(results, id_list):
|
|
self.assertEqual([r['obj_id'] for r in results], id_list)
|
|
|
|
results = self.engine.search_json('alp')
|
|
assertExpected(results, [1, 5, 9])
|
|
|
|
results = self.engine.search_json('alp', boosts={'t2': 1.1})
|
|
assertExpected(results, [5, 1, 9])
|
|
|
|
results = self.engine.search_json('test', boosts={'t3': 1.5, 't2': 1.1})
|
|
assertExpected(results, [9, 10, 12, 11, 5, 6, 8, 7, 1, 2, 4, 3])
|
|
|
|
results = self.engine.search_json('alp', boosts={'t1': 0.5})
|
|
assertExpected(results, [5, 9, 1])
|
|
|
|
results = self.engine.search_json('alp', boosts={'t1': 1.5, 't3': 1.6})
|
|
assertExpected(results, [9, 1, 5])
|
|
|
|
results = self.engine.search_json('alp', boosts={'t3': 1.5, '5': 1.6})
|
|
assertExpected(results, [5, 9, 1])
|
|
|
|
def test_autoboost(self):
|
|
self.engine.store('t1', 'testing 1')
|
|
self.engine.store('t2', 'testing 2')
|
|
self.engine.store('t3', 'testing 3')
|
|
self.engine.store('t4', 'testing 4')
|
|
self.engine.store('t5', 'testing 5')
|
|
|
|
def assertExpected(results, id_list):
|
|
self.assertEqual(results, ['testing %s' % i for i in id_list])
|
|
|
|
results = self.engine.search('testing', autoboost=True)
|
|
assertExpected(results, [1, 2, 3, 4, 5])
|
|
|
|
self.engine.boost('t3')
|
|
results = self.engine.search('testing', autoboost=True)
|
|
assertExpected(results, [3, 1, 2, 4, 5])
|
|
|
|
self.engine.boost('t2')
|
|
results = self.engine.search('testing', autoboost=True)
|
|
assertExpected(results, [2, 3, 1, 4, 5])
|
|
|
|
self.engine.boost('t1', negative=True)
|
|
results = self.engine.search('testing', autoboost=True)
|
|
assertExpected(results, [2, 3, 4, 5, 1])
|
|
|
|
results = self.engine.search('testing', boosts={'t5': 4.0}, autoboost=True)
|
|
assertExpected(results, [5, 2, 3, 4, 1])
|
|
|
|
results = self.engine.search('testing', boosts={'t3': 1.5}, autoboost=True)
|
|
assertExpected(results, [3, 2, 4, 5, 1])
|
|
|
|
def test_limit(self):
|
|
self.store_data()
|
|
|
|
results = self.engine.search_json('testing', limit=1)
|
|
self.assertEqual(results, [
|
|
{'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
|
|
])
|
|
|
|
def test_filters(self):
|
|
self.store_data()
|
|
|
|
f = lambda i: i['secret'] == 'herp'
|
|
results = self.engine.search_json('testing python', filters=[f])
|
|
|
|
self.assertEqual(self.sort_results(results), [
|
|
{'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
|
|
{'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
|
|
])
|
|
|
|
def test_simple(self):
|
|
self.engine.print_scores = True
|
|
self.engine.store('testing python')
|
|
self.engine.store('testing python code')
|
|
self.engine.store('web testing python code')
|
|
self.engine.store('unit tests with python')
|
|
|
|
results = self.engine.search('testing')
|
|
self.assertEqual(results, ['testing python', 'testing python code', 'web testing python code'])
|
|
|
|
results = self.engine.search('code')
|
|
self.assertEqual(results, ['testing python code', 'web testing python code'])
|
|
|
|
def test_correct_sorting(self):
|
|
strings = []
|
|
for i in range(26):
|
|
strings.append('aaaa%s' % chr(i + ord('a')))
|
|
if i > 0:
|
|
strings.append('aaa%sa' % chr(i + ord('a')))
|
|
|
|
random.shuffle(strings)
|
|
|
|
for s in strings:
|
|
self.engine.store(s)
|
|
|
|
results = self.engine.search('aaa')
|
|
self.assertEqual(results, sorted(strings))
|
|
|
|
results = self.engine.search('aaa', limit=30)
|
|
self.assertEqual(results, sorted(strings)[:30])
|
|
|
|
def test_removing_objects(self):
|
|
self.store_data()
|
|
|
|
self.engine.remove(1)
|
|
|
|
results = self.engine.search_json('testing')
|
|
self.assertEqual(self.sort_results(results), [
|
|
{'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
|
|
{'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
|
|
])
|
|
|
|
self.store_data(1)
|
|
self.engine.remove(2)
|
|
|
|
results = self.engine.search_json('testing')
|
|
self.assertEqual(self.sort_results(results), [
|
|
{'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
|
|
{'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
|
|
])
|
|
|
|
def test_clean_phrase(self):
|
|
self.assertEqual(self.engine.clean_phrase('abc def ghi'), ['abc', 'def', 'ghi'])
|
|
|
|
self.assertEqual(self.engine.clean_phrase('a A tHe an a'), [])
|
|
self.assertEqual(self.engine.clean_phrase(''), [])
|
|
|
|
self.assertEqual(
|
|
self.engine.clean_phrase('The Best of times, the blurst of times'),
|
|
['best', 'times', 'blurst', 'times'])
|
|
|
|
def test_exists(self):
|
|
self.assertFalse(self.engine.exists('test'))
|
|
self.engine.store('test')
|
|
self.assertTrue(self.engine.exists('test'))
|
|
|
|
def test_removing_objects_in_depth(self):
|
|
# want to ensure that redis is cleaned up and does not become polluted
|
|
# with spurious keys when objects are removed
|
|
redis_client = self.engine.client
|
|
prefix = self.engine.prefix
|
|
|
|
initial_key_count = len(redis_client.keys())
|
|
|
|
# store the blog "testing python"
|
|
self.store_data(1)
|
|
|
|
# see how many keys we have in the db - check again in a bit
|
|
key_len = len(redis_client.keys())
|
|
|
|
self.store_data(2)
|
|
key_len2 = len(redis_client.keys())
|
|
|
|
self.assertTrue(key_len != key_len2)
|
|
self.engine.remove(2)
|
|
|
|
# back to the original amount of keys
|
|
self.assertEqual(len(redis_client.keys()), key_len)
|
|
|
|
self.engine.remove(1)
|
|
self.assertEqual(len(redis_client.keys()), initial_key_count)
|
|
|
|
def test_updating(self):
|
|
self.engine.store('id1', 'title one', 'd1', 't1')
|
|
self.engine.store('id2', 'title two', 'd2', 't2')
|
|
self.engine.store('id3', 'title three', 'd3', 't3')
|
|
|
|
results = self.engine.search('tit')
|
|
self.assertEqual(results, ['d1', 'd3', 'd2'])
|
|
|
|
# overwrite the data for id1
|
|
self.engine.store('id1', 'title one', 'D1', 't1')
|
|
|
|
results = self.engine.search('tit')
|
|
self.assertEqual(results, ['D1', 'd3', 'd2'])
|
|
|
|
# overwrite the data with a new title, will remove the title one refs
|
|
self.engine.store('id1', 'Herple One', 'done', 't1')
|
|
|
|
results = self.engine.search('tit')
|
|
self.assertEqual(results, ['d3', 'd2'])
|
|
|
|
results = self.engine.search('her')
|
|
self.assertEqual(results, ['done'])
|
|
|
|
self.engine.store('id1', 'title one', 'Done', 't1', False)
|
|
results = self.engine.search('tit')
|
|
self.assertEqual(results, ['Done', 'd3', 'd2'])
|
|
|
|
# this shows that when we don't clean up crap gets left around
|
|
results = self.engine.search('her')
|
|
self.assertEqual(results, ['Done'])
|