Adding broken search for feeds.

This commit is contained in:
Samuel Clay 2013-01-04 16:34:27 -08:00
parent c4381c31af
commit bc0192c3df
3 changed files with 146 additions and 7 deletions

View file

@ -22,7 +22,7 @@ from mongoengine.queryset import OperationError, Q
from mongoengine.base import ValidationError
from vendor.timezones.utilities import localtime_for_timezone
from apps.rss_feeds.tasks import UpdateFeeds, PushFeeds
from apps.search.models import SearchStarredStory
from apps.search.models import SearchStarredStory, SearchFeed
from utils import json_functions as json
from utils import feedfinder, feedparser
from utils import urlnorm
@ -83,7 +83,12 @@ class Feed(models.Model):
if not self.feed_title:
self.feed_title = "[Untitled]"
self.save()
return "%s (%s)" % (self.feed_title, self.pk)
return "%s (%s - %s/%s/%s)" % (
self.feed_title,
self.pk,
self.num_subscribers,
self.active_subscribers,
self.premium_subscribers)
@property
def title(self):
@ -207,6 +212,14 @@ class Feed(models.Model):
return self
def index_for_search(self):
if self.num_subscribers > 1 and not self.branch_from_feed:
SearchFeed.index(feed_id=self.pk,
title=self.feed_title,
address=self.feed_address,
link=self.feed_link,
num_subscribers=self.num_subscribers)
def sync_redis(self):
return MStory.sync_all_redis(self.pk)
@ -759,7 +772,17 @@ class Feed(models.Model):
duplicate_feeds = DuplicateFeed.objects.filter(duplicate_address=feed_address)
if duplicate_feeds:
return duplicate_feeds[0].feed
@classmethod
def get_by_name(cls, query, limit=1):
results = SearchFeed.query(query)
feed_ids = [result.feed_id for result in results]
if limit == 1:
return Feed.get_by_id(feed_ids[0])
else:
return [Feed.get_by_id(f) for f in feed_ids][:limit]
def add_update_stories(self, stories, existing_stories, verbose=False):
ret_values = dict(new=0, updated=0, same=0, error=0)

View file

@ -75,7 +75,7 @@ def feed_autocomplete(request):
return dict(code=-1, message="Specify a search 'term'.")
feeds = []
for field in ['feed_address', 'feed_link', 'feed_title']:
for field in ['feed_address', 'feed_title', 'feed_link']:
if not feeds:
feeds = Feed.objects.filter(**{
'%s__icontains' % field: query,

View file

@ -1,4 +1,7 @@
import pyes
from pyes.query import FilteredQuery, FuzzyQuery, TextQuery, PrefixQuery
from pyes.filters import RangeFilter
from pyes.utils import ESRange
from django.conf import settings
from django.contrib.auth.models import User
from utils import log as logging
@ -76,17 +79,130 @@ class SearchStarredStory:
if not results.total:
logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text)
q = pyes.query.FuzzyQuery('title', text)
q = FuzzyQuery('title', text)
results = cls.ES.search(q)
if not results.total:
logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text)
q = pyes.query.FuzzyQuery('content', text)
q = FuzzyQuery('content', text)
results = cls.ES.search(q)
if not results.total:
logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text)
q = pyes.query.FuzzyQuery('author', text)
q = FuzzyQuery('author', text)
results = cls.ES.search(q)
return results
class SearchFeed:
ES = pyes.ES(settings.ELASTICSEARCH_HOSTS)
name = "feeds"
@classmethod
def create_elasticsearch_mapping(cls):
try:
cls.ES.delete_index("%s-index" % cls.name)
except pyes.TypeMissingException:
print "Index missing, can't delete: %s-index" % cls.name
settings = {
"index" : {
"analysis" : {
"analyzer" : {
"url_analyzer" : {
"type" : "custom",
"tokenizer" : "urls",
"filter" : ["stop", "url_stop"]
}
},
"tokenizer": {
"urls": {
"type": "uax_url_email",
"max_token_length": 255,
}
},
"filter" : {
"url_stop" : {
"type" : "stop",
"stopwords" : ["http", "https"]
},
"url_ngram" : {
"type" : "nGram",
"min_gram" : 2,
"max_gram" : 20,
}
}
}
}
}
cls.ES.create_index("%s-index" % cls.name, settings)
mapping = {
'address': {
'boost': 3.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
"analyzer": "url_analyzer",
},
'title': {
'boost': 2.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
},
'link': {
'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector" : "with_positions_offsets",
"analyzer": "url_analyzer",
},
'num_subscribers': {
'boost': 1.0,
'index': 'not_analyzed',
'store': 'yes',
'type': 'integer',
},
'feed_id': {
'store': 'yes',
'type': 'integer',
},
}
cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name])
@classmethod
def index(cls, feed_id, title, address, link, num_subscribers):
doc = {
"feed_id": feed_id,
"title": title,
"address": address,
"link": link,
"num_subscribers": num_subscribers,
}
cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, feed_id)
@classmethod
def query(cls, text):
cls.ES.refresh()
sub_filter = RangeFilter(qrange=ESRange('num_subscribers', 2))
logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
q = TextQuery('address', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
if not results.total:
logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
q = PrefixQuery('title', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
if not results.total:
logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
q = TextQuery('link.partial', text)
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
return results