import pyes from pyes.query import FilteredQuery, FuzzyQuery, TextQuery, PrefixQuery from pyes.filters import RangeFilter from pyes.utils import ESRange from django.conf import settings from django.contrib.auth.models import User from utils import log as logging class SearchStarredStory: ES = pyes.ES(settings.ELASTICSEARCH_HOSTS) name = "starred-stories" @classmethod def create_elasticsearch_mapping(cls): cls.ES.create_index("%s-index" % cls.name) mapping = { 'title': { 'boost': 2.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', "term_vector" : "with_positions_offsets" }, 'content': { 'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', "term_vector" : "with_positions_offsets" }, 'author': { 'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', }, 'db_id': { 'index': 'not_analyzed', 'store': 'yes', 'type': 'string', }, 'feed_id': { 'store': 'yes', 'type': 'integer' }, 'date': { 'store': 'yes', 'type': 'date', }, 'user_ids': { 'index': 'not_analyzed', 'store': 'yes', 'type': 'integer', 'index_name': 'user_id' } } cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name]) @classmethod def index(cls, user_id, story_id, story_title, story_content, story_author, story_date, db_id): doc = { "content": story_content, "title": story_title, "author": story_author, "date": story_date, "user_ids": user_id, "db_id": db_id, } cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, story_id) @classmethod def query(cls, user_id, text): user = User.objects.get(pk=user_id) cls.ES.refresh() q = pyes.query.StringQuery(text) results = cls.ES.search(q) logging.user(user, "~FGSearch ~FCsaved stories~FG for: ~SB%s" % text) if not results.total: logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text) q = FuzzyQuery('title', text) results = cls.ES.search(q) if not results.total: logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text) q = FuzzyQuery('content', text) results = cls.ES.search(q) if not results.total: logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text) q = FuzzyQuery('author', text) results = cls.ES.search(q) return results class SearchFeed: ES = pyes.ES(settings.ELASTICSEARCH_HOSTS) name = "feeds" @classmethod def create_elasticsearch_mapping(cls): try: cls.ES.delete_index("%s-index" % cls.name) except pyes.TypeMissingException: print "Index missing, can't delete: %s-index" % cls.name settings = { "index" : { "analysis" : { "analyzer" : { "url_analyzer" : { "type" : "custom", "tokenizer" : "urls", "filter" : ["stop", "url_stop"] } }, "tokenizer": { "urls": { "type": "uax_url_email", "max_token_length": 255, } }, "filter" : { "url_stop" : { "type" : "stop", "stopwords" : ["http", "https"] }, "url_ngram" : { "type" : "nGram", "min_gram" : 2, "max_gram" : 20, } } } } } cls.ES.create_index("%s-index" % cls.name, settings) mapping = { 'address': { 'boost': 3.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', "term_vector" : "with_positions_offsets", "analyzer": "url_analyzer", }, 'title': { 'boost': 2.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', "term_vector" : "with_positions_offsets", }, 'link': { 'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': 'string', "term_vector" : "with_positions_offsets", "analyzer": "url_analyzer", }, 'num_subscribers': { 'boost': 1.0, 'index': 'not_analyzed', 'store': 'yes', 'type': 'integer', }, 'feed_id': { 'store': 'yes', 'type': 'integer', }, } cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name]) @classmethod def index(cls, feed_id, title, address, link, num_subscribers): doc = { "feed_id": feed_id, "title": title, "address": address, "link": link, "num_subscribers": num_subscribers, } cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, feed_id) @classmethod def query(cls, text): cls.ES.refresh() sub_filter = RangeFilter(qrange=ESRange('num_subscribers', 2)) logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text) q = TextQuery('address', text) results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text) q = PrefixQuery('title', text) results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text) q = TextQuery('link.partial', text) results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5) return results