NewsBlur-viq/apps/search/models.py

209 lines
6.8 KiB
Python
Raw Normal View History

import pyes
2013-01-04 16:34:27 -08:00
from pyes.query import FilteredQuery, FuzzyQuery, TextQuery, PrefixQuery
from pyes.filters import RangeFilter
from pyes.utils import ESRange
from django.conf import settings
2012-12-21 15:05:38 -08:00
from django.contrib.auth.models import User
from utils import log as logging
class SearchStarredStory:
ES = pyes.ES(settings.ELASTICSEARCH_HOSTS)
name = "starred-stories"
@classmethod
def create_elasticsearch_mapping(cls):
cls.ES.create_index("%s-index" % cls.name)
mapping = {
'title': {
'boost': 2.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets"
},
'content': {
'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets"
},
'author': {
'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
},
'db_id': {
'index': 'not_analyzed',
'store': 'yes',
'type': 'string',
},
'feed_id': {
'store': 'yes',
'type': 'integer'
},
'date': {
'store': 'yes',
'type': 'date',
},
'user_ids': {
'index': 'not_analyzed',
'store': 'yes',
'type': 'integer',
'index_name': 'user_id'
}
}
cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name])
@classmethod
def index(cls, user_id, story_id, story_title, story_content, story_author, story_date, db_id):
doc = {
"content": story_content,
"title": story_title,
"author": story_author,
"date": story_date,
"user_ids": user_id,
"db_id": db_id,
}
cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, story_id)
@classmethod
def query(cls, user_id, text):
2012-12-21 15:05:38 -08:00
user = User.objects.get(pk=user_id)
cls.ES.refresh()
q = pyes.query.StringQuery(text)
results = cls.ES.search(q)
2012-12-21 15:05:38 -08:00
logging.user(user, "~FGSearch ~FCsaved stories~FG for: ~SB%s" % text)
if not results.total:
2012-12-21 15:05:38 -08:00
logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text)
2013-01-04 16:34:27 -08:00
q = FuzzyQuery('title', text)
results = cls.ES.search(q)
if not results.total:
2012-12-21 15:05:38 -08:00
logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text)
2013-01-04 16:34:27 -08:00
q = FuzzyQuery('content', text)
results = cls.ES.search(q)
if not results.total:
2012-12-21 15:05:38 -08:00
logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text)
2013-01-04 16:34:27 -08:00
q = FuzzyQuery('author', text)
results = cls.ES.search(q)
return results
2013-01-04 16:34:27 -08:00
class SearchFeed:
ES = pyes.ES(settings.ELASTICSEARCH_HOSTS)
name = "feeds"
@classmethod
def create_elasticsearch_mapping(cls):
try:
cls.ES.delete_index("%s-index" % cls.name)
except pyes.TypeMissingException:
print "Index missing, can't delete: %s-index" % cls.name
settings = {
"index" : {
"analysis" : {
"analyzer" : {
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "urls",
"filter" : ["stop", "url_stop"]
}
},
"tokenizer": {
"urls": {
"type": "uax_url_email",
"max_token_length": 255,
}
},
"filter" : {
"url_stop" : {
"type" : "stop",
"stopwords" : ["http", "https"]
},
"url_ngram" : {
"type" : "nGram",
"min_gram" : 2,
"max_gram" : 20,
}
}
}
}
}
cls.ES.create_index("%s-index" % cls.name, settings)
mapping = {
'address': {
'boost': 3.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
"analyzer": "url_analyzer",
},
'title': {
'boost': 2.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
},
'link': {
'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
"analyzer": "url_analyzer",
},
'num_subscribers': {
'boost': 1.0,
'index': 'not_analyzed',
'store': 'yes',
'type': 'integer',
},
'feed_id': {
'store': 'yes',
'type': 'integer',
},
}
cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name])
@classmethod
def index(cls, feed_id, title, address, link, num_subscribers):
doc = {
"feed_id": feed_id,
"title": title,
"address": address,
"link": link,
"num_subscribers": num_subscribers,
}
cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, feed_id)
@classmethod
def query(cls, text):
cls.ES.refresh()
sub_filter = RangeFilter(qrange=ESRange('num_subscribers', 2))
logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
q = TextQuery('address', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
if not results.total:
logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
q = PrefixQuery('title', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
if not results.total:
logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
q = TextQuery('link.partial', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
return results