diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 7797f59a2..95761f439 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -8,6 +8,7 @@ import mongoengine as mongo import zlib import hashlib import redis +from urlparse import urlparse from utils.feed_functions import Counter from collections import defaultdict from operator import itemgetter @@ -36,6 +37,7 @@ from utils.feed_functions import timelimit, TimeoutError from utils.feed_functions import relative_timesince from utils.feed_functions import seconds_timesince from utils.story_functions import strip_tags, htmldiff, strip_comments +from vendor.redis_completion.engine import RedisEngine ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4) @@ -191,7 +193,6 @@ class Feed(models.Model): try: super(Feed, self).save(*args, **kwargs) - return self except IntegrityError, e: logging.debug(" ---> ~FRFeed save collision (%s), checking dupe..." % e) duplicate_feeds = Feed.objects.filter(feed_address=self.feed_address, @@ -209,8 +210,10 @@ class Feed(models.Model): logging.debug(" ---> ~FRFound different feed (%s), merging..." % duplicate_feeds[0]) feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk)) return feed - - return self + + self.sync_autocompletion() + + return self def index_for_search(self): if self.num_subscribers > 1 and not self.branch_from_feed: @@ -223,6 +226,31 @@ class Feed(models.Model): def sync_redis(self): return MStory.sync_all_redis(self.pk) + + def sync_autocompletion(self): + if self.num_subscribers <= 1: return + if self.branch_from_feed: return + if any(t in self.feed_address for t in ['token', 'private']): return + + engine = RedisEngine(prefix="FT", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL) + engine.store(self.pk, title=self.feed_title) + engine.boost(self.pk, self.num_subscribers) + + parts = urlparse(self.feed_address) + engine = RedisEngine(prefix="FA", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL) + engine.store(self.pk, title=parts.hostname) + engine.boost(self.pk, self.num_subscribers) + + @classmethod + def autocomplete(self, prefix, limit=5): + engine = RedisEngine(prefix="FA", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL) + results = engine.search(phrase=prefix, limit=limit, autoboost=True) + + if len(results) < limit: + engine = RedisEngine(prefix="FT", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL) + results += engine.search(phrase=prefix, limit=limit-len(results), autoboost=True, filters=[lambda f: f not in results]) + + return results @classmethod def find_or_create(cls, feed_address, feed_link, *args, **kwargs): diff --git a/apps/rss_feeds/views.py b/apps/rss_feeds/views.py index 63adae63a..0c27e3e0b 100644 --- a/apps/rss_feeds/views.py +++ b/apps/rss_feeds/views.py @@ -74,29 +74,14 @@ def feed_autocomplete(request): query = request.GET.get('term') version = int(request.GET.get('v', 1)) - if True or not user.profile.is_premium: - return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query) + # if True or not user.profile.is_premium: + # return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query) if not query: return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query) - feeds = [] - for field in ['feed_address', 'feed_title', 'feed_link']: - if not feeds: - feeds = Feed.objects.filter(**{ - '%s__icontains' % field: query, - 'num_subscribers__gt': 1, - 'branch_from_feed__isnull': True, - }).exclude( - Q(**{'%s__icontains' % field: 'token'}) | - Q(**{'%s__icontains' % field: 'private'}) - ).only( - 'id', - 'feed_title', - 'feed_address', - 'num_subscribers' - ).select_related("data").order_by('-num_subscribers')[:5] - + feed_ids = Feed.autocomplete(query) + feeds = [Feed.get_by_id(feed_id) for feed_id in feed_ids] feeds = [{ 'id': feed.pk, 'value': feed.feed_address, @@ -104,6 +89,7 @@ def feed_autocomplete(request): 'tagline': feed.data and feed.data.feed_tagline, 'num_subscribers': feed.num_subscribers, } for feed in feeds] + feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers']) feed_ids = [f['id'] for f in feeds] feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids)) diff --git a/settings.py b/settings.py index d48eea6c0..b6aea3f6e 100644 --- a/settings.py +++ b/settings.py @@ -549,6 +549,8 @@ REDIS_ANALYTICS_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=2) REDIS_STATISTICS_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=3) REDIS_FEED_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=4) REDIS_SESSION_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=5) +# DB 6 = Session Store +REDIS_AUTOCOMPLETE_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=7) JAMMIT = jammit.JammitAssets(NEWSBLUR_DIR) diff --git a/vendor/redis_completion/__init__.py b/vendor/redis_completion/__init__.py new file mode 100755 index 000000000..1d3967b95 --- /dev/null +++ b/vendor/redis_completion/__init__.py @@ -0,0 +1 @@ +from redis_completion.engine import RedisEngine diff --git a/vendor/redis_completion/engine.py b/vendor/redis_completion/engine.py new file mode 100755 index 000000000..5f3c75b21 --- /dev/null +++ b/vendor/redis_completion/engine.py @@ -0,0 +1,228 @@ +try: + import simplejson as json +except ImportError: + import json +import re +from redis import Redis + +from redis_completion.stop_words import STOP_WORDS as _STOP_WORDS + + +# aggressive stop words will be better when the length of the document is longer +AGGRESSIVE_STOP_WORDS = _STOP_WORDS + +# default stop words should work fine for titles and things like that +DEFAULT_STOP_WORDS = set(['a', 'an', 'of', 'the']) + + +class RedisEngine(object): + """ + References + ---------- + + http://antirez.com/post/autocomplete-with-redis.html + http://stackoverflow.com/questions/1958005/redis-autocomplete/1966188#1966188 + http://patshaughnessy.net/2011/11/29/two-ways-of-using-redis-to-build-a-nosql-autocomplete-search-index + """ + def __init__(self, prefix='ac', stop_words=None, cache_timeout=300, **conn_kwargs): + self.prefix = prefix + self.stop_words = (stop_words is None) and DEFAULT_STOP_WORDS or stop_words + + self.conn_kwargs = conn_kwargs + self.client = self.get_client() + + self.cache_timeout = cache_timeout + + self.boost_key = '%s:b' % self.prefix + self.data_key = '%s:d' % self.prefix + self.title_key = '%s:t' % self.prefix + self.search_key = lambda k: '%s:s:%s' % (self.prefix, k) + self.cache_key = lambda pk, bk: '%s:c:%s:%s' % (self.prefix, pk, bk) + + self.kcombine = lambda _id, _type: str(_id) + self.ksplit = lambda k: k + + def get_client(self): + return Redis(**self.conn_kwargs) + + def score_key(self, k, max_size=20): + k_len = len(k) + a = ord('a') - 2 + score = 0 + + for i in range(max_size): + if i < k_len: + c = (ord(k[i]) - a) + if c < 2 or c > 27: + c = 1 + else: + c = 1 + score += c*(27**(max_size-i)) + return score + + def clean_phrase(self, phrase): + phrase = re.sub('[^a-z0-9_\-\s]', '', phrase.lower()) + return [w for w in phrase.split() if w not in self.stop_words] + + def create_key(self, phrase): + return ' '.join(self.clean_phrase(phrase)) + + def autocomplete_keys(self, w): + for i in range(1, len(w)): + yield w[:i] + yield w + + def flush(self, everything=False, batch_size=1000): + if everything: + return self.client.flushdb() + + # this could be expensive :-( + keys = self.client.keys('%s:*' % self.prefix) + + # batch keys + for i in range(0, len(keys), batch_size): + self.client.delete(*keys[i:i+batch_size]) + + def store(self, obj_id, title=None, data=None, obj_type=None, check_exist=True): + if title is None: + title = obj_id + if data is None: + data = title + + title_score = self.score_key(self.create_key(title)) + + combined_id = self.kcombine(obj_id, obj_type or '') + + if check_exist and self.exists(obj_id, obj_type): + stored_title = self.client.hget(self.title_key, combined_id) + + # if the stored title is the same, we can simply update the data key + # since everything else will have stayed the same + if stored_title == title: + self.client.hset(self.data_key, combined_id, data) + return + else: + self.remove(obj_id, obj_type) + + pipe = self.client.pipeline() + pipe.hset(self.data_key, combined_id, data) + pipe.hset(self.title_key, combined_id, title) + + for word in self.clean_phrase(title): + for partial_key in self.autocomplete_keys(word): + pipe.zadd(self.search_key(partial_key), combined_id, title_score) + + pipe.execute() + + def store_json(self, obj_id, title, data_dict, obj_type=None): + return self.store(obj_id, title, json.dumps(data_dict), obj_type) + + def remove(self, obj_id, obj_type=None): + obj_id = self.kcombine(obj_id, obj_type or '') + title = self.client.hget(self.title_key, obj_id) or '' + keys = [] + + for word in self.clean_phrase(title): + for partial_key in self.autocomplete_keys(word): + key = self.search_key(partial_key) + if not self.client.zrange(key, 1, 2): + self.client.delete(key) + else: + self.client.zrem(key, obj_id) + + self.client.hdel(self.data_key, obj_id) + self.client.hdel(self.title_key, obj_id) + self.client.hdel(self.boost_key, obj_id) + + def boost(self, obj_id, multiplier=1.1, negative=False): + # take the existing boost for this item and increase it by the multiplier + current = self.client.hget(self.boost_key, obj_id) + current_f = float(current or 1.0) + if negative: + multiplier = 1 / multiplier + self.client.hset(self.boost_key, obj_id, current_f * multiplier) + + def exists(self, obj_id, obj_type=None): + obj_id = self.kcombine(obj_id, obj_type or '') + return self.client.hexists(self.data_key, obj_id) + + def get_cache_key(self, phrases, boosts): + if boosts: + boost_key = '|'.join('%s:%s' % (k, v) for k, v in sorted(boosts.items())) + else: + boost_key = '' + phrase_key = '|'.join(phrases) + return self.cache_key(phrase_key, boost_key) + + def _process_ids(self, id_list, limit, filters, mappers): + ct = 0 + data = [] + + for raw_id in id_list: + # raw_data = self.client.hget(self.data_key, raw_id) + raw_data = raw_id + if not raw_data: + continue + + if mappers: + for m in mappers: + raw_data = m(raw_data) + + if filters: + passes = True + for f in filters: + if not f(raw_data): + passes = False + break + + if not passes: + continue + + data.append(raw_data) + ct += 1 + if limit and ct == limit: + break + + return data + + def search(self, phrase, limit=None, filters=None, mappers=None, boosts=None, autoboost=False): + cleaned = self.clean_phrase(phrase) + if not cleaned: + return [] + + if autoboost: + boosts = boosts or {} + stored = self.client.hgetall(self.boost_key) + for obj_id in stored: + if obj_id not in boosts: + boosts[obj_id] = float(stored[obj_id]) + + if len(cleaned) == 1 and not boosts: + new_key = self.search_key(cleaned[0]) + else: + new_key = self.get_cache_key(cleaned, boosts) + if not self.client.exists(new_key): + # zinterstore also takes {k1: wt1, k2: wt2} + self.client.zinterstore(new_key, map(self.search_key, cleaned)) + self.client.expire(new_key, self.cache_timeout) + + if boosts: + pipe = self.client.pipeline() + for raw_id, score in self.client.zrange(new_key, 0, -1, withscores=True): + orig_score = score + for part in self.ksplit(raw_id): + if part and part in boosts: + score *= 1 / boosts[part] + if orig_score != score: + pipe.zadd(new_key, raw_id, score) + pipe.execute() + + id_list = self.client.zrange(new_key, 0, -1) + # return id_list + return self._process_ids(id_list, limit, filters, mappers) + + def search_json(self, phrase, limit=None, filters=None, mappers=None, boosts=None, autoboost=False): + if not mappers: + mappers = [] + mappers.insert(0, json.loads) + return self.search(phrase, limit, filters, mappers, boosts, autoboost) diff --git a/vendor/redis_completion/stop_words.py b/vendor/redis_completion/stop_words.py new file mode 100755 index 000000000..b868beffb --- /dev/null +++ b/vendor/redis_completion/stop_words.py @@ -0,0 +1,594 @@ +words = """a +a's +able +about +above +according +accordingly +across +actually +after +afterwards +again +against +ain't +all +allow +allows +almost +alone +along +already +also +although +always +am +among +amongst +amoungst +amount +an +and +another +any +anybody +anyhow +anyone +anything +anyway +anyways +anywhere +apart +appear +appreciate +appropriate +are +aren't +around +as +aside +ask +asking +associated +at +available +away +awfully +back +be +became +because +become +becomes +becoming +been +before +beforehand +behind +being +believe +below +beside +besides +best +better +between +beyond +bill +both +bottom +brief +but +by +c'mon +c's +call +came +can +can't +cannot +cant +cause +causes +certain +certainly +changes +clearly +co +com +come +comes +computer +con +concerning +consequently +consider +considering +contain +containing +contains +corresponding +could +couldn't +couldnt +course +cry +currently +de +definitely +describe +described +despite +detail +did +didn't +different +do +does +doesn't +doing +don't +done +down +downwards +due +during +each +edu +eg +eight +either +eleven +else +elsewhere +empty +enough +entirely +especially +et +etc +even +ever +every +everybody +everyone +everything +everywhere +ex +exactly +example +except +far +few +fifteen +fifth +fify +fill +find +fire +first +five +followed +following +follows +for +former +formerly +forth +forty +found +four +from +front +full +further +furthermore +get +gets +getting +give +given +gives +go +goes +going +gone +got +gotten +greetings +had +hadn't +happens +hardly +has +hasn't +hasnt +have +haven't +having +he +he's +hello +help +hence +her +here +here's +hereafter +hereby +herein +hereupon +hers +herself +hi +him +himself +his +hither +hopefully +how +howbeit +however +hundred +i +i'd +i'll +i'm +i've +ie +if +ignored +immediate +in +inasmuch +inc +indeed +indicate +indicated +indicates +inner +insofar +instead +interest +into +inward +is +isn't +it +it'd +it'll +it's +its +itself +just +keep +keeps +kept +know +known +knows +last +lately +later +latter +latterly +least +less +lest +let +let's +like +liked +likely +little +look +looking +looks +ltd +made +mainly +many +may +maybe +me +mean +meanwhile +merely +might +mill +mine +more +moreover +most +mostly +move +much +must +my +myself +name +namely +nd +near +nearly +necessary +need +needs +neither +never +nevertheless +new +next +nine +no +nobody +non +none +noone +nor +normally +not +nothing +novel +now +nowhere +obviously +of +off +often +oh +ok +okay +old +on +once +one +ones +only +onto +or +other +others +otherwise +ought +our +ours +ourselves +out +outside +over +overall +own +part +particular +particularly +per +perhaps +placed +please +plus +possible +presumably +probably +provides +put +que +quite +qv +rather +rd +re +really +reasonably +regarding +regardless +regards +relatively +respectively +right +said +same +saw +say +saying +says +second +secondly +see +seeing +seem +seemed +seeming +seems +seen +self +selves +sensible +sent +serious +seriously +seven +several +shall +she +should +shouldn't +show +side +since +sincere +six +sixty +so +some +somebody +somehow +someone +something +sometime +sometimes +somewhat +somewhere +soon +sorry +specified +specify +specifying +still +sub +such +sup +sure +system +t's +take +taken +tell +ten +tends +th +than +thank +thanks +thanx +that +that's +thats +the +their +theirs +them +themselves +then +thence +there +there's +thereafter +thereby +therefore +therein +theres +thereupon +these +they +they'd +they'll +they're +they've +thick +thin +think +third +this +thorough +thoroughly +those +though +three +through +throughout +thru +thus +to +together +too +took +top +toward +towards +tried +tries +truly +try +trying +twelve +twenty +twice +two +un +under +unfortunately +unless +unlikely +until +unto +up +upon +us +use +used +useful +uses +using +usually +value +various +very +via +viz +vs +want +wants +was +wasn't +way +we +we'd +we'll +we're +we've +welcome +well +went +were +weren't +what +what's +whatever +when +whence +whenever +where +where's +whereafter +whereas +whereby +wherein +whereupon +wherever +whether +which +while +whither +who +who's +whoever +whole +whom +whose +why +will +willing +wish +with +within +without +won't +wonder +would +wouldn't +yes +yet +you +you'd +you'll +you're +you've +your +yours +yourself +yourselves +zero""" +STOP_WORDS = set([ + w.strip() for w in words.splitlines() if w +]) diff --git a/vendor/redis_completion/tests.py b/vendor/redis_completion/tests.py new file mode 100755 index 000000000..49a19059c --- /dev/null +++ b/vendor/redis_completion/tests.py @@ -0,0 +1,277 @@ +import random +from unittest import TestCase + +from redis_completion.engine import RedisEngine + + +stop_words = set(['a', 'an', 'the', 'of']) + +class RedisCompletionTestCase(TestCase): + def setUp(self): + self.engine = self.get_engine() + self.engine.flush() + + def get_engine(self): + return RedisEngine(prefix='testac', db=15) + + def store_data(self, id=None): + test_data = ( + (1, 'testing python'), + (2, 'testing python code'), + (3, 'web testing python code'), + (4, 'unit tests with python'), + ) + for obj_id, title in test_data: + if id is None or id == obj_id: + self.engine.store_json(obj_id, title, { + 'obj_id': obj_id, + 'title': title, + 'secret': obj_id % 2 == 0 and 'derp' or 'herp', + }) + + def sort_results(self, r): + return sorted(r, key=lambda i:i['obj_id']) + + def test_search(self): + self.store_data() + + results = self.engine.search_json('testing python') + self.assertEqual(self.sort_results(results), [ + {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'}, + {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'}, + {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'}, + ]) + + results = self.engine.search_json('test') + self.assertEqual(self.sort_results(results), [ + {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'}, + {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'}, + {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'}, + {'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'}, + ]) + + results = self.engine.search_json('unit') + self.assertEqual(results, [ + {'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'}, + ]) + + results = self.engine.search_json('') + self.assertEqual(results, []) + + results = self.engine.search_json('missing') + self.assertEqual(results, []) + + def test_boosting(self): + test_data = ( + (1, 'test alpha', 't1'), + (2, 'test beta', 't1'), + (3, 'test gamma', 't1'), + (4, 'test delta', 't1'), + (5, 'test alpha', 't2'), + (6, 'test beta', 't2'), + (7, 'test gamma', 't2'), + (8, 'test delta', 't2'), + (9, 'test alpha', 't3'), + (10, 'test beta', 't3'), + (11, 'test gamma', 't3'), + (12, 'test delta', 't3'), + ) + for obj_id, title, obj_type in test_data: + self.engine.store_json(obj_id, title, { + 'obj_id': obj_id, + 'title': title, + }, obj_type) + + def assertExpected(results, id_list): + self.assertEqual([r['obj_id'] for r in results], id_list) + + results = self.engine.search_json('alp') + assertExpected(results, [1, 5, 9]) + + results = self.engine.search_json('alp', boosts={'t2': 1.1}) + assertExpected(results, [5, 1, 9]) + + results = self.engine.search_json('test', boosts={'t3': 1.5, 't2': 1.1}) + assertExpected(results, [9, 10, 12, 11, 5, 6, 8, 7, 1, 2, 4, 3]) + + results = self.engine.search_json('alp', boosts={'t1': 0.5}) + assertExpected(results, [5, 9, 1]) + + results = self.engine.search_json('alp', boosts={'t1': 1.5, 't3': 1.6}) + assertExpected(results, [9, 1, 5]) + + results = self.engine.search_json('alp', boosts={'t3': 1.5, '5': 1.6}) + assertExpected(results, [5, 9, 1]) + + def test_autoboost(self): + self.engine.store('t1', 'testing 1') + self.engine.store('t2', 'testing 2') + self.engine.store('t3', 'testing 3') + self.engine.store('t4', 'testing 4') + self.engine.store('t5', 'testing 5') + + def assertExpected(results, id_list): + self.assertEqual(results, ['testing %s' % i for i in id_list]) + + results = self.engine.search('testing', autoboost=True) + assertExpected(results, [1, 2, 3, 4, 5]) + + self.engine.boost('t3') + results = self.engine.search('testing', autoboost=True) + assertExpected(results, [3, 1, 2, 4, 5]) + + self.engine.boost('t2') + results = self.engine.search('testing', autoboost=True) + assertExpected(results, [2, 3, 1, 4, 5]) + + self.engine.boost('t1', negative=True) + results = self.engine.search('testing', autoboost=True) + assertExpected(results, [2, 3, 4, 5, 1]) + + results = self.engine.search('testing', boosts={'t5': 4.0}, autoboost=True) + assertExpected(results, [5, 2, 3, 4, 1]) + + results = self.engine.search('testing', boosts={'t3': 1.5}, autoboost=True) + assertExpected(results, [3, 2, 4, 5, 1]) + + def test_limit(self): + self.store_data() + + results = self.engine.search_json('testing', limit=1) + self.assertEqual(results, [ + {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'}, + ]) + + def test_filters(self): + self.store_data() + + f = lambda i: i['secret'] == 'herp' + results = self.engine.search_json('testing python', filters=[f]) + + self.assertEqual(self.sort_results(results), [ + {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'}, + {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'}, + ]) + + def test_simple(self): + self.engine.print_scores = True + self.engine.store('testing python') + self.engine.store('testing python code') + self.engine.store('web testing python code') + self.engine.store('unit tests with python') + + results = self.engine.search('testing') + self.assertEqual(results, ['testing python', 'testing python code', 'web testing python code']) + + results = self.engine.search('code') + self.assertEqual(results, ['testing python code', 'web testing python code']) + + def test_correct_sorting(self): + strings = [] + for i in range(26): + strings.append('aaaa%s' % chr(i + ord('a'))) + if i > 0: + strings.append('aaa%sa' % chr(i + ord('a'))) + + random.shuffle(strings) + + for s in strings: + self.engine.store(s) + + results = self.engine.search('aaa') + self.assertEqual(results, sorted(strings)) + + results = self.engine.search('aaa', limit=30) + self.assertEqual(results, sorted(strings)[:30]) + + def test_removing_objects(self): + self.store_data() + + self.engine.remove(1) + + results = self.engine.search_json('testing') + self.assertEqual(self.sort_results(results), [ + {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'}, + {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'}, + ]) + + self.store_data(1) + self.engine.remove(2) + + results = self.engine.search_json('testing') + self.assertEqual(self.sort_results(results), [ + {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'}, + {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'}, + ]) + + def test_clean_phrase(self): + self.assertEqual(self.engine.clean_phrase('abc def ghi'), ['abc', 'def', 'ghi']) + + self.assertEqual(self.engine.clean_phrase('a A tHe an a'), []) + self.assertEqual(self.engine.clean_phrase(''), []) + + self.assertEqual( + self.engine.clean_phrase('The Best of times, the blurst of times'), + ['best', 'times', 'blurst', 'times']) + + def test_exists(self): + self.assertFalse(self.engine.exists('test')) + self.engine.store('test') + self.assertTrue(self.engine.exists('test')) + + def test_removing_objects_in_depth(self): + # want to ensure that redis is cleaned up and does not become polluted + # with spurious keys when objects are removed + redis_client = self.engine.client + prefix = self.engine.prefix + + initial_key_count = len(redis_client.keys()) + + # store the blog "testing python" + self.store_data(1) + + # see how many keys we have in the db - check again in a bit + key_len = len(redis_client.keys()) + + self.store_data(2) + key_len2 = len(redis_client.keys()) + + self.assertTrue(key_len != key_len2) + self.engine.remove(2) + + # back to the original amount of keys + self.assertEqual(len(redis_client.keys()), key_len) + + self.engine.remove(1) + self.assertEqual(len(redis_client.keys()), initial_key_count) + + def test_updating(self): + self.engine.store('id1', 'title one', 'd1', 't1') + self.engine.store('id2', 'title two', 'd2', 't2') + self.engine.store('id3', 'title three', 'd3', 't3') + + results = self.engine.search('tit') + self.assertEqual(results, ['d1', 'd3', 'd2']) + + # overwrite the data for id1 + self.engine.store('id1', 'title one', 'D1', 't1') + + results = self.engine.search('tit') + self.assertEqual(results, ['D1', 'd3', 'd2']) + + # overwrite the data with a new title, will remove the title one refs + self.engine.store('id1', 'Herple One', 'done', 't1') + + results = self.engine.search('tit') + self.assertEqual(results, ['d3', 'd2']) + + results = self.engine.search('her') + self.assertEqual(results, ['done']) + + self.engine.store('id1', 'title one', 'Done', 't1', False) + results = self.engine.search('tit') + self.assertEqual(results, ['Done', 'd3', 'd2']) + + # this shows that when we don't clean up crap gets left around + results = self.engine.search('her') + self.assertEqual(results, ['Done'])