Adding new autocomplete. Let's hope this doesn't destroy performance in Redis.

2025-08-31 21:41:33 +00:00 · 2013-04-08 12:54:02 -07:00 · 2013-04-08 12:54:02 -07:00 · c25c4478e6
commit c25c4478e6
parent cb90344cda
7 changed files with 1138 additions and 22 deletions
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@ -8,6 +8,7 @@ import mongoengine as mongo
 import zlib
 import hashlib
 import redis
+from urlparse import urlparse
 from utils.feed_functions import Counter
 from collections import defaultdict
 from operator import itemgetter
@ -36,6 +37,7 @@ from utils.feed_functions import timelimit, TimeoutError
 from utils.feed_functions import relative_timesince
 from utils.feed_functions import seconds_timesince
 from utils.story_functions import strip_tags, htmldiff, strip_comments
+from vendor.redis_completion.engine import RedisEngine

 ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)

@ -191,7 +193,6 @@ class Feed(models.Model):
        
        try:
            super(Feed, self).save(*args, **kwargs)
-            return self
        except IntegrityError, e:
            logging.debug(" ---> ~FRFeed save collision (%s), checking dupe..." % e)
            duplicate_feeds = Feed.objects.filter(feed_address=self.feed_address,
@ -209,8 +210,10 @@ class Feed(models.Model):
                logging.debug(" ---> ~FRFound different feed (%s), merging..." % duplicate_feeds[0])
                feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
                return feed
-                
-            return self
+        
+        self.sync_autocompletion()
+        
+        return self

    def index_for_search(self):
        if self.num_subscribers > 1 and not self.branch_from_feed:
@ -223,6 +226,31 @@ class Feed(models.Model):
    
    def sync_redis(self):
        return MStory.sync_all_redis(self.pk)
+    
+    def sync_autocompletion(self):
+        if self.num_subscribers <= 1: return
+        if self.branch_from_feed: return
+        if any(t in self.feed_address for t in ['token', 'private']): return
+        
+        engine = RedisEngine(prefix="FT", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL)
+        engine.store(self.pk, title=self.feed_title)
+        engine.boost(self.pk, self.num_subscribers)
+        
+        parts = urlparse(self.feed_address)
+        engine = RedisEngine(prefix="FA", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL)
+        engine.store(self.pk, title=parts.hostname)
+        engine.boost(self.pk, self.num_subscribers)
+        
+    @classmethod
+    def autocomplete(self, prefix, limit=5):
+        engine = RedisEngine(prefix="FA", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL)
+        results = engine.search(phrase=prefix, limit=limit, autoboost=True)
+        
+        if len(results) < limit:
+            engine = RedisEngine(prefix="FT", connection_pool=settings.REDIS_AUTOCOMPLETE_POOL)
+            results += engine.search(phrase=prefix, limit=limit-len(results), autoboost=True, filters=[lambda f: f not in results])
+            
+        return results
        
    @classmethod
    def find_or_create(cls, feed_address, feed_link, *args, **kwargs):
--- a/apps/rss_feeds/views.py
+++ b/apps/rss_feeds/views.py
@ -74,29 +74,14 @@ def feed_autocomplete(request):
    query = request.GET.get('term')
    version = int(request.GET.get('v', 1))
    
-    if True or not user.profile.is_premium:
-        return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)
+    # if True or not user.profile.is_premium:
+    #     return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)
    
    if not query:
        return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query)
        
-    feeds = []
-    for field in ['feed_address', 'feed_title', 'feed_link']:
-        if not feeds:
-            feeds = Feed.objects.filter(**{
-                '%s__icontains' % field: query,
-                'num_subscribers__gt': 1,
-                'branch_from_feed__isnull': True,
-            }).exclude(
-                Q(**{'%s__icontains' % field: 'token'}) |
-                Q(**{'%s__icontains' % field: 'private'})
-            ).only(
-                'id',
-                'feed_title', 
-                'feed_address', 
-                'num_subscribers'
-            ).select_related("data").order_by('-num_subscribers')[:5]
-    
+    feed_ids = Feed.autocomplete(query)
+    feeds = [Feed.get_by_id(feed_id) for feed_id in feed_ids]
    feeds = [{
        'id': feed.pk,
        'value': feed.feed_address,
@ -104,6 +89,7 @@ def feed_autocomplete(request):
        'tagline': feed.data and feed.data.feed_tagline,
        'num_subscribers': feed.num_subscribers,
    } for feed in feeds]
+    feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers'])
    
    feed_ids = [f['id'] for f in feeds]
    feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids))
--- a/settings.py
+++ b/settings.py
@ -549,6 +549,8 @@ REDIS_ANALYTICS_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=2)
 REDIS_STATISTICS_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=3)
 REDIS_FEED_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=4)
 REDIS_SESSION_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=5)
+# DB 6 = Session Store
+REDIS_AUTOCOMPLETE_POOL = redis.ConnectionPool(host=REDIS['host'], port=6379, db=7)

 JAMMIT = jammit.JammitAssets(NEWSBLUR_DIR)

--- a/vendor/redis_completion/init.py
+++ b/vendor/redis_completion/init.py
@ -0,0 +1 @@
+from redis_completion.engine import RedisEngine
--- a/vendor/redis_completion/engine.py
+++ b/vendor/redis_completion/engine.py
@ -0,0 +1,228 @@
+try:
+    import simplejson as json
+except ImportError:
+    import json
+import re
+from redis import Redis
+
+from redis_completion.stop_words import STOP_WORDS as _STOP_WORDS
+
+
+# aggressive stop words will be better when the length of the document is longer
+AGGRESSIVE_STOP_WORDS = _STOP_WORDS
+
+# default stop words should work fine for titles and things like that
+DEFAULT_STOP_WORDS = set(['a', 'an', 'of', 'the'])
+
+
+class RedisEngine(object):
+    """
+    References
+    ----------
+
+    http://antirez.com/post/autocomplete-with-redis.html
+    http://stackoverflow.com/questions/1958005/redis-autocomplete/1966188#1966188
+    http://patshaughnessy.net/2011/11/29/two-ways-of-using-redis-to-build-a-nosql-autocomplete-search-index
+    """
+    def __init__(self, prefix='ac', stop_words=None, cache_timeout=300, **conn_kwargs):
+        self.prefix = prefix
+        self.stop_words = (stop_words is None) and DEFAULT_STOP_WORDS or stop_words
+
+        self.conn_kwargs = conn_kwargs
+        self.client = self.get_client()
+
+        self.cache_timeout = cache_timeout
+
+        self.boost_key = '%s:b' % self.prefix
+        self.data_key = '%s:d' % self.prefix
+        self.title_key = '%s:t' % self.prefix
+        self.search_key = lambda k: '%s:s:%s' % (self.prefix, k)
+        self.cache_key = lambda pk, bk: '%s:c:%s:%s' % (self.prefix, pk, bk)
+
+        self.kcombine = lambda _id, _type: str(_id)
+        self.ksplit = lambda k: k
+
+    def get_client(self):
+        return Redis(**self.conn_kwargs)
+
+    def score_key(self, k, max_size=20):
+        k_len = len(k)
+        a = ord('a') - 2
+        score = 0
+
+        for i in range(max_size):
+            if i < k_len:
+                c = (ord(k[i]) - a)
+                if c < 2 or c > 27:
+                    c = 1
+            else:
+                c = 1
+            score += c*(27**(max_size-i))
+        return score
+
+    def clean_phrase(self, phrase):
+        phrase = re.sub('[^a-z0-9_\-\s]', '', phrase.lower())
+        return [w for w in phrase.split() if w not in self.stop_words]
+
+    def create_key(self, phrase):
+        return ' '.join(self.clean_phrase(phrase))
+
+    def autocomplete_keys(self, w):
+        for i in range(1, len(w)):
+            yield w[:i]
+        yield w
+
+    def flush(self, everything=False, batch_size=1000):
+        if everything:
+            return self.client.flushdb()
+
+        # this could be expensive :-(
+        keys = self.client.keys('%s:*' % self.prefix)
+
+        # batch keys
+        for i in range(0, len(keys), batch_size):
+            self.client.delete(*keys[i:i+batch_size])
+
+    def store(self, obj_id, title=None, data=None, obj_type=None, check_exist=True):
+        if title is None:
+            title = obj_id
+        if data is None:
+            data = title
+
+        title_score = self.score_key(self.create_key(title))
+
+        combined_id = self.kcombine(obj_id, obj_type or '')
+
+        if check_exist and self.exists(obj_id, obj_type):
+            stored_title = self.client.hget(self.title_key, combined_id)
+
+            # if the stored title is the same, we can simply update the data key
+            # since everything else will have stayed the same
+            if stored_title == title:
+                self.client.hset(self.data_key, combined_id, data)
+                return
+            else:
+                self.remove(obj_id, obj_type)
+
+        pipe = self.client.pipeline()
+        pipe.hset(self.data_key, combined_id, data)
+        pipe.hset(self.title_key, combined_id, title)
+
+        for word in self.clean_phrase(title):
+            for partial_key in self.autocomplete_keys(word):
+                pipe.zadd(self.search_key(partial_key), combined_id, title_score)
+
+        pipe.execute()
+
+    def store_json(self, obj_id, title, data_dict, obj_type=None):
+        return self.store(obj_id, title, json.dumps(data_dict), obj_type)
+
+    def remove(self, obj_id, obj_type=None):
+        obj_id = self.kcombine(obj_id, obj_type or '')
+        title = self.client.hget(self.title_key, obj_id) or ''
+        keys = []
+
+        for word in self.clean_phrase(title):
+            for partial_key in self.autocomplete_keys(word):
+                key = self.search_key(partial_key)
+                if not self.client.zrange(key, 1, 2):
+                    self.client.delete(key)
+                else:
+                    self.client.zrem(key, obj_id)
+
+        self.client.hdel(self.data_key, obj_id)
+        self.client.hdel(self.title_key, obj_id)
+        self.client.hdel(self.boost_key, obj_id)
+
+    def boost(self, obj_id, multiplier=1.1, negative=False):
+        # take the existing boost for this item and increase it by the multiplier
+        current = self.client.hget(self.boost_key, obj_id)
+        current_f = float(current or 1.0)
+        if negative:
+            multiplier = 1 / multiplier
+        self.client.hset(self.boost_key, obj_id, current_f * multiplier)
+
+    def exists(self, obj_id, obj_type=None):
+        obj_id = self.kcombine(obj_id, obj_type or '')
+        return self.client.hexists(self.data_key, obj_id)
+
+    def get_cache_key(self, phrases, boosts):
+        if boosts:
+            boost_key = '|'.join('%s:%s' % (k, v) for k, v in sorted(boosts.items()))
+        else:
+            boost_key = ''
+        phrase_key = '|'.join(phrases)
+        return self.cache_key(phrase_key, boost_key)
+
+    def _process_ids(self, id_list, limit, filters, mappers):
+        ct = 0
+        data = []
+
+        for raw_id in id_list:
+            # raw_data = self.client.hget(self.data_key, raw_id)
+            raw_data = raw_id
+            if not raw_data:
+                continue
+
+            if mappers:
+                for m in mappers:
+                    raw_data = m(raw_data)
+
+            if filters:
+                passes = True
+                for f in filters:
+                    if not f(raw_data):
+                        passes = False
+                        break
+
+                if not passes:
+                    continue
+
+            data.append(raw_data)
+            ct += 1
+            if limit and ct == limit:
+                break
+
+        return data
+
+    def search(self, phrase, limit=None, filters=None, mappers=None, boosts=None, autoboost=False):
+        cleaned = self.clean_phrase(phrase)
+        if not cleaned:
+            return []
+
+        if autoboost:
+            boosts = boosts or {}
+            stored = self.client.hgetall(self.boost_key)
+            for obj_id in stored:
+                if obj_id not in boosts:
+                    boosts[obj_id] = float(stored[obj_id])
+
+        if len(cleaned) == 1 and not boosts:
+            new_key = self.search_key(cleaned[0])
+        else:
+            new_key = self.get_cache_key(cleaned, boosts)
+            if not self.client.exists(new_key):
+                # zinterstore also takes {k1: wt1, k2: wt2}
+                self.client.zinterstore(new_key, map(self.search_key, cleaned))
+                self.client.expire(new_key, self.cache_timeout)
+
+        if boosts:
+            pipe = self.client.pipeline()
+            for raw_id, score in self.client.zrange(new_key, 0, -1, withscores=True):
+                orig_score = score
+                for part in self.ksplit(raw_id):
+                    if part and part in boosts:
+                        score *= 1 / boosts[part]
+                if orig_score != score:
+                    pipe.zadd(new_key, raw_id, score)
+            pipe.execute()
+
+        id_list = self.client.zrange(new_key, 0, -1)
+        # return id_list
+        return self._process_ids(id_list, limit, filters, mappers)
+
+    def search_json(self, phrase, limit=None, filters=None, mappers=None, boosts=None, autoboost=False):
+        if not mappers:
+            mappers = []
+        mappers.insert(0, json.loads)
+        return self.search(phrase, limit, filters, mappers, boosts, autoboost)
--- a/vendor/redis_completion/stop_words.py
+++ b/vendor/redis_completion/stop_words.py
@ -0,0 +1,594 @@
+words = """a
+a's
+able
+about
+above
+according
+accordingly
+across
+actually
+after
+afterwards
+again
+against
+ain't
+all
+allow
+allows
+almost
+alone
+along
+already
+also
+although
+always
+am
+among
+amongst
+amoungst
+amount
+an
+and
+another
+any
+anybody
+anyhow
+anyone
+anything
+anyway
+anyways
+anywhere
+apart
+appear
+appreciate
+appropriate
+are
+aren't
+around
+as
+aside
+ask
+asking
+associated
+at
+available
+away
+awfully
+back
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+behind
+being
+believe
+below
+beside
+besides
+best
+better
+between
+beyond
+bill
+both
+bottom
+brief
+but
+by
+c'mon
+c's
+call
+came
+can
+can't
+cannot
+cant
+cause
+causes
+certain
+certainly
+changes
+clearly
+co
+com
+come
+comes
+computer
+con
+concerning
+consequently
+consider
+considering
+contain
+containing
+contains
+corresponding
+could
+couldn't
+couldnt
+course
+cry
+currently
+de
+definitely
+describe
+described
+despite
+detail
+did
+didn't
+different
+do
+does
+doesn't
+doing
+don't
+done
+down
+downwards
+due
+during
+each
+edu
+eg
+eight
+either
+eleven
+else
+elsewhere
+empty
+enough
+entirely
+especially
+et
+etc
+even
+ever
+every
+everybody
+everyone
+everything
+everywhere
+ex
+exactly
+example
+except
+far
+few
+fifteen
+fifth
+fify
+fill
+find
+fire
+first
+five
+followed
+following
+follows
+for
+former
+formerly
+forth
+forty
+found
+four
+from
+front
+full
+further
+furthermore
+get
+gets
+getting
+give
+given
+gives
+go
+goes
+going
+gone
+got
+gotten
+greetings
+had
+hadn't
+happens
+hardly
+has
+hasn't
+hasnt
+have
+haven't
+having
+he
+he's
+hello
+help
+hence
+her
+here
+here's
+hereafter
+hereby
+herein
+hereupon
+hers
+herself
+hi
+him
+himself
+his
+hither
+hopefully
+how
+howbeit
+however
+hundred
+i
+i'd
+i'll
+i'm
+i've
+ie
+if
+ignored
+immediate
+in
+inasmuch
+inc
+indeed
+indicate
+indicated
+indicates
+inner
+insofar
+instead
+interest
+into
+inward
+is
+isn't
+it
+it'd
+it'll
+it's
+its
+itself
+just
+keep
+keeps
+kept
+know
+known
+knows
+last
+lately
+later
+latter
+latterly
+least
+less
+lest
+let
+let's
+like
+liked
+likely
+little
+look
+looking
+looks
+ltd
+made
+mainly
+many
+may
+maybe
+me
+mean
+meanwhile
+merely
+might
+mill
+mine
+more
+moreover
+most
+mostly
+move
+much
+must
+my
+myself
+name
+namely
+nd
+near
+nearly
+necessary
+need
+needs
+neither
+never
+nevertheless
+new
+next
+nine
+no
+nobody
+non
+none
+noone
+nor
+normally
+not
+nothing
+novel
+now
+nowhere
+obviously
+of
+off
+often
+oh
+ok
+okay
+old
+on
+once
+one
+ones
+only
+onto
+or
+other
+others
+otherwise
+ought
+our
+ours
+ourselves
+out
+outside
+over
+overall
+own
+part
+particular
+particularly
+per
+perhaps
+placed
+please
+plus
+possible
+presumably
+probably
+provides
+put
+que
+quite
+qv
+rather
+rd
+re
+really
+reasonably
+regarding
+regardless
+regards
+relatively
+respectively
+right
+said
+same
+saw
+say
+saying
+says
+second
+secondly
+see
+seeing
+seem
+seemed
+seeming
+seems
+seen
+self
+selves
+sensible
+sent
+serious
+seriously
+seven
+several
+shall
+she
+should
+shouldn't
+show
+side
+since
+sincere
+six
+sixty
+so
+some
+somebody
+somehow
+someone
+something
+sometime
+sometimes
+somewhat
+somewhere
+soon
+sorry
+specified
+specify
+specifying
+still
+sub
+such
+sup
+sure
+system
+t's
+take
+taken
+tell
+ten
+tends
+th
+than
+thank
+thanks
+thanx
+that
+that's
+thats
+the
+their
+theirs
+them
+themselves
+then
+thence
+there
+there's
+thereafter
+thereby
+therefore
+therein
+theres
+thereupon
+these
+they
+they'd
+they'll
+they're
+they've
+thick
+thin
+think
+third
+this
+thorough
+thoroughly
+those
+though
+three
+through
+throughout
+thru
+thus
+to
+together
+too
+took
+top
+toward
+towards
+tried
+tries
+truly
+try
+trying
+twelve
+twenty
+twice
+two
+un
+under
+unfortunately
+unless
+unlikely
+until
+unto
+up
+upon
+us
+use
+used
+useful
+uses
+using
+usually
+value
+various
+very
+via
+viz
+vs
+want
+wants
+was
+wasn't
+way
+we
+we'd
+we'll
+we're
+we've
+welcome
+well
+went
+were
+weren't
+what
+what's
+whatever
+when
+whence
+whenever
+where
+where's
+whereafter
+whereas
+whereby
+wherein
+whereupon
+wherever
+whether
+which
+while
+whither
+who
+who's
+whoever
+whole
+whom
+whose
+why
+will
+willing
+wish
+with
+within
+without
+won't
+wonder
+would
+wouldn't
+yes
+yet
+you
+you'd
+you'll
+you're
+you've
+your
+yours
+yourself
+yourselves
+zero"""
+STOP_WORDS = set([
+    w.strip() for w in words.splitlines() if w
+])
--- a/vendor/redis_completion/tests.py
+++ b/vendor/redis_completion/tests.py
@ -0,0 +1,277 @@
+import random
+from unittest import TestCase
+
+from redis_completion.engine import RedisEngine
+
+
+stop_words = set(['a', 'an', 'the', 'of'])
+
+class RedisCompletionTestCase(TestCase):
+    def setUp(self):
+        self.engine = self.get_engine()
+        self.engine.flush()
+
+    def get_engine(self):
+        return RedisEngine(prefix='testac', db=15)
+
+    def store_data(self, id=None):
+        test_data = (
+            (1, 'testing python'),
+            (2, 'testing python code'),
+            (3, 'web testing python code'),
+            (4, 'unit tests with python'),
+        )
+        for obj_id, title in test_data:
+            if id is None or id == obj_id:
+                self.engine.store_json(obj_id, title, {
+                    'obj_id': obj_id,
+                    'title': title,
+                    'secret': obj_id % 2 == 0 and 'derp' or 'herp',
+                })
+
+    def sort_results(self, r):
+        return sorted(r, key=lambda i:i['obj_id'])
+
+    def test_search(self):
+        self.store_data()
+
+        results = self.engine.search_json('testing python')
+        self.assertEqual(self.sort_results(results), [
+            {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
+            {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
+            {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
+        ])
+
+        results = self.engine.search_json('test')
+        self.assertEqual(self.sort_results(results), [
+            {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
+            {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
+            {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
+            {'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'},
+        ])
+
+        results = self.engine.search_json('unit')
+        self.assertEqual(results, [
+            {'obj_id': 4, 'title': 'unit tests with python', 'secret': 'derp'},
+        ])
+
+        results = self.engine.search_json('')
+        self.assertEqual(results, [])
+
+        results = self.engine.search_json('missing')
+        self.assertEqual(results, [])
+
+    def test_boosting(self):
+        test_data = (
+            (1, 'test alpha', 't1'),
+            (2, 'test beta', 't1'),
+            (3, 'test gamma', 't1'),
+            (4, 'test delta', 't1'),
+            (5, 'test alpha', 't2'),
+            (6, 'test beta', 't2'),
+            (7, 'test gamma', 't2'),
+            (8, 'test delta', 't2'),
+            (9, 'test alpha', 't3'),
+            (10, 'test beta', 't3'),
+            (11, 'test gamma', 't3'),
+            (12, 'test delta', 't3'),
+        )
+        for obj_id, title, obj_type in test_data:
+            self.engine.store_json(obj_id, title, {
+                'obj_id': obj_id,
+                'title': title,
+            }, obj_type)
+
+        def assertExpected(results, id_list):
+            self.assertEqual([r['obj_id'] for r in results], id_list)
+
+        results = self.engine.search_json('alp')
+        assertExpected(results, [1, 5, 9])
+
+        results = self.engine.search_json('alp', boosts={'t2': 1.1})
+        assertExpected(results, [5, 1, 9])
+
+        results = self.engine.search_json('test', boosts={'t3': 1.5, 't2': 1.1})
+        assertExpected(results, [9, 10, 12, 11, 5, 6, 8, 7, 1, 2, 4, 3])
+
+        results = self.engine.search_json('alp', boosts={'t1': 0.5})
+        assertExpected(results, [5, 9, 1])
+
+        results = self.engine.search_json('alp', boosts={'t1': 1.5, 't3': 1.6})
+        assertExpected(results, [9, 1, 5])
+
+        results = self.engine.search_json('alp', boosts={'t3': 1.5, '5': 1.6})
+        assertExpected(results, [5, 9, 1])
+
+    def test_autoboost(self):
+        self.engine.store('t1', 'testing 1')
+        self.engine.store('t2', 'testing 2')
+        self.engine.store('t3', 'testing 3')
+        self.engine.store('t4', 'testing 4')
+        self.engine.store('t5', 'testing 5')
+
+        def assertExpected(results, id_list):
+            self.assertEqual(results, ['testing %s' % i for i in id_list])
+
+        results = self.engine.search('testing', autoboost=True)
+        assertExpected(results, [1, 2, 3, 4, 5])
+
+        self.engine.boost('t3')
+        results = self.engine.search('testing', autoboost=True)
+        assertExpected(results, [3, 1, 2, 4, 5])
+
+        self.engine.boost('t2')
+        results = self.engine.search('testing', autoboost=True)
+        assertExpected(results, [2, 3, 1, 4, 5])
+
+        self.engine.boost('t1', negative=True)
+        results = self.engine.search('testing', autoboost=True)
+        assertExpected(results, [2, 3, 4, 5, 1])
+
+        results = self.engine.search('testing', boosts={'t5': 4.0}, autoboost=True)
+        assertExpected(results, [5, 2, 3, 4, 1])
+
+        results = self.engine.search('testing', boosts={'t3': 1.5}, autoboost=True)
+        assertExpected(results, [3, 2, 4, 5, 1])
+
+    def test_limit(self):
+        self.store_data()
+
+        results = self.engine.search_json('testing', limit=1)
+        self.assertEqual(results, [
+            {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
+        ])
+
+    def test_filters(self):
+        self.store_data()
+
+        f = lambda i: i['secret'] == 'herp'
+        results = self.engine.search_json('testing python', filters=[f])
+
+        self.assertEqual(self.sort_results(results), [
+            {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
+            {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
+        ])
+
+    def test_simple(self):
+        self.engine.print_scores = True
+        self.engine.store('testing python')
+        self.engine.store('testing python code')
+        self.engine.store('web testing python code')
+        self.engine.store('unit tests with python')
+
+        results = self.engine.search('testing')
+        self.assertEqual(results, ['testing python', 'testing python code', 'web testing python code'])
+
+        results = self.engine.search('code')
+        self.assertEqual(results, ['testing python code', 'web testing python code'])
+
+    def test_correct_sorting(self):
+        strings = []
+        for i in range(26):
+            strings.append('aaaa%s' % chr(i + ord('a')))
+            if i > 0:
+                strings.append('aaa%sa' % chr(i + ord('a')))
+
+        random.shuffle(strings)
+
+        for s in strings:
+            self.engine.store(s)
+
+        results = self.engine.search('aaa')
+        self.assertEqual(results, sorted(strings))
+
+        results = self.engine.search('aaa', limit=30)
+        self.assertEqual(results, sorted(strings)[:30])
+
+    def test_removing_objects(self):
+        self.store_data()
+
+        self.engine.remove(1)
+
+        results = self.engine.search_json('testing')
+        self.assertEqual(self.sort_results(results), [
+            {'obj_id': 2, 'title': 'testing python code', 'secret': 'derp'},
+            {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
+        ])
+
+        self.store_data(1)
+        self.engine.remove(2)
+
+        results = self.engine.search_json('testing')
+        self.assertEqual(self.sort_results(results), [
+            {'obj_id': 1, 'title': 'testing python', 'secret': 'herp'},
+            {'obj_id': 3, 'title': 'web testing python code', 'secret': 'herp'},
+        ])
+
+    def test_clean_phrase(self):
+        self.assertEqual(self.engine.clean_phrase('abc def ghi'), ['abc', 'def', 'ghi'])
+
+        self.assertEqual(self.engine.clean_phrase('a A tHe an a'), [])
+        self.assertEqual(self.engine.clean_phrase(''), [])
+
+        self.assertEqual(
+            self.engine.clean_phrase('The Best of times, the blurst of times'),
+            ['best', 'times', 'blurst', 'times'])
+
+    def test_exists(self):
+        self.assertFalse(self.engine.exists('test'))
+        self.engine.store('test')
+        self.assertTrue(self.engine.exists('test'))
+
+    def test_removing_objects_in_depth(self):
+        # want to ensure that redis is cleaned up and does not become polluted
+        # with spurious keys when objects are removed
+        redis_client = self.engine.client
+        prefix = self.engine.prefix
+
+        initial_key_count = len(redis_client.keys())
+
+        # store the blog "testing python"
+        self.store_data(1)
+
+        # see how many keys we have in the db - check again in a bit
+        key_len = len(redis_client.keys())
+
+        self.store_data(2)
+        key_len2 = len(redis_client.keys())
+
+        self.assertTrue(key_len != key_len2)
+        self.engine.remove(2)
+
+        # back to the original amount of keys
+        self.assertEqual(len(redis_client.keys()), key_len)
+
+        self.engine.remove(1)
+        self.assertEqual(len(redis_client.keys()), initial_key_count)
+
+    def test_updating(self):
+        self.engine.store('id1', 'title one', 'd1', 't1')
+        self.engine.store('id2', 'title two', 'd2', 't2')
+        self.engine.store('id3', 'title three', 'd3', 't3')
+
+        results = self.engine.search('tit')
+        self.assertEqual(results, ['d1', 'd3', 'd2'])
+
+        # overwrite the data for id1
+        self.engine.store('id1', 'title one', 'D1', 't1')
+
+        results = self.engine.search('tit')
+        self.assertEqual(results, ['D1', 'd3', 'd2'])
+
+        # overwrite the data with a new title, will remove the title one refs
+        self.engine.store('id1', 'Herple One', 'done', 't1')
+
+        results = self.engine.search('tit')
+        self.assertEqual(results, ['d3', 'd2'])
+
+        results = self.engine.search('her')
+        self.assertEqual(results, ['done'])
+
+        self.engine.store('id1', 'title one', 'Done', 't1', False)
+        results = self.engine.search('tit')
+        self.assertEqual(results, ['Done', 'd3', 'd2'])
+
+        # this shows that when we don't clean up crap gets left around
+        results = self.engine.search('her')
+        self.assertEqual(results, ['Done'])
				`@ -0,0 +1 @@`
				`from redis_completion.engine import RedisEngine`