mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Adding broken search for feeds.
This commit is contained in:
parent
c4381c31af
commit
bc0192c3df
3 changed files with 146 additions and 7 deletions
|
@ -22,7 +22,7 @@ from mongoengine.queryset import OperationError, Q
|
|||
from mongoengine.base import ValidationError
|
||||
from vendor.timezones.utilities import localtime_for_timezone
|
||||
from apps.rss_feeds.tasks import UpdateFeeds, PushFeeds
|
||||
from apps.search.models import SearchStarredStory
|
||||
from apps.search.models import SearchStarredStory, SearchFeed
|
||||
from utils import json_functions as json
|
||||
from utils import feedfinder, feedparser
|
||||
from utils import urlnorm
|
||||
|
@ -83,7 +83,12 @@ class Feed(models.Model):
|
|||
if not self.feed_title:
|
||||
self.feed_title = "[Untitled]"
|
||||
self.save()
|
||||
return "%s (%s)" % (self.feed_title, self.pk)
|
||||
return "%s (%s - %s/%s/%s)" % (
|
||||
self.feed_title,
|
||||
self.pk,
|
||||
self.num_subscribers,
|
||||
self.active_subscribers,
|
||||
self.premium_subscribers)
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
|
@ -207,6 +212,14 @@ class Feed(models.Model):
|
|||
|
||||
return self
|
||||
|
||||
def index_for_search(self):
|
||||
if self.num_subscribers > 1 and not self.branch_from_feed:
|
||||
SearchFeed.index(feed_id=self.pk,
|
||||
title=self.feed_title,
|
||||
address=self.feed_address,
|
||||
link=self.feed_link,
|
||||
num_subscribers=self.num_subscribers)
|
||||
|
||||
|
||||
def sync_redis(self):
|
||||
return MStory.sync_all_redis(self.pk)
|
||||
|
@ -759,7 +772,17 @@ class Feed(models.Model):
|
|||
duplicate_feeds = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
||||
if duplicate_feeds:
|
||||
return duplicate_feeds[0].feed
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_by_name(cls, query, limit=1):
|
||||
results = SearchFeed.query(query)
|
||||
feed_ids = [result.feed_id for result in results]
|
||||
|
||||
if limit == 1:
|
||||
return Feed.get_by_id(feed_ids[0])
|
||||
else:
|
||||
return [Feed.get_by_id(f) for f in feed_ids][:limit]
|
||||
|
||||
def add_update_stories(self, stories, existing_stories, verbose=False):
|
||||
ret_values = dict(new=0, updated=0, same=0, error=0)
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ def feed_autocomplete(request):
|
|||
return dict(code=-1, message="Specify a search 'term'.")
|
||||
|
||||
feeds = []
|
||||
for field in ['feed_address', 'feed_link', 'feed_title']:
|
||||
for field in ['feed_address', 'feed_title', 'feed_link']:
|
||||
if not feeds:
|
||||
feeds = Feed.objects.filter(**{
|
||||
'%s__icontains' % field: query,
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
import pyes
|
||||
from pyes.query import FilteredQuery, FuzzyQuery, TextQuery, PrefixQuery
|
||||
from pyes.filters import RangeFilter
|
||||
from pyes.utils import ESRange
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from utils import log as logging
|
||||
|
@ -76,17 +79,130 @@ class SearchStarredStory:
|
|||
|
||||
if not results.total:
|
||||
logging.user(user, "~FGSearch ~FCsaved stories~FG by title: ~SB%s" % text)
|
||||
q = pyes.query.FuzzyQuery('title', text)
|
||||
q = FuzzyQuery('title', text)
|
||||
results = cls.ES.search(q)
|
||||
|
||||
if not results.total:
|
||||
logging.user(user, "~FGSearch ~FCsaved stories~FG by content: ~SB%s" % text)
|
||||
q = pyes.query.FuzzyQuery('content', text)
|
||||
q = FuzzyQuery('content', text)
|
||||
results = cls.ES.search(q)
|
||||
|
||||
if not results.total:
|
||||
logging.user(user, "~FGSearch ~FCsaved stories~FG by author: ~SB%s" % text)
|
||||
q = pyes.query.FuzzyQuery('author', text)
|
||||
q = FuzzyQuery('author', text)
|
||||
results = cls.ES.search(q)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class SearchFeed:
|
||||
|
||||
ES = pyes.ES(settings.ELASTICSEARCH_HOSTS)
|
||||
name = "feeds"
|
||||
|
||||
@classmethod
|
||||
def create_elasticsearch_mapping(cls):
|
||||
try:
|
||||
cls.ES.delete_index("%s-index" % cls.name)
|
||||
except pyes.TypeMissingException:
|
||||
print "Index missing, can't delete: %s-index" % cls.name
|
||||
|
||||
settings = {
|
||||
"index" : {
|
||||
"analysis" : {
|
||||
"analyzer" : {
|
||||
"url_analyzer" : {
|
||||
"type" : "custom",
|
||||
"tokenizer" : "urls",
|
||||
"filter" : ["stop", "url_stop"]
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"urls": {
|
||||
"type": "uax_url_email",
|
||||
"max_token_length": 255,
|
||||
}
|
||||
},
|
||||
"filter" : {
|
||||
"url_stop" : {
|
||||
"type" : "stop",
|
||||
"stopwords" : ["http", "https"]
|
||||
},
|
||||
"url_ngram" : {
|
||||
"type" : "nGram",
|
||||
"min_gram" : 2,
|
||||
"max_gram" : 20,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cls.ES.create_index("%s-index" % cls.name, settings)
|
||||
mapping = {
|
||||
'address': {
|
||||
'boost': 3.0,
|
||||
'index': 'analyzed',
|
||||
'store': 'yes',
|
||||
'type': 'string',
|
||||
"term_vector" : "with_positions_offsets",
|
||||
"analyzer": "url_analyzer",
|
||||
},
|
||||
'title': {
|
||||
'boost': 2.0,
|
||||
'index': 'analyzed',
|
||||
'store': 'yes',
|
||||
'type': 'string',
|
||||
"term_vector" : "with_positions_offsets",
|
||||
},
|
||||
'link': {
|
||||
'boost': 1.0,
|
||||
'index': 'analyzed',
|
||||
'store': 'yes',
|
||||
'type': 'string',
|
||||
"term_vector" : "with_positions_offsets",
|
||||
"analyzer": "url_analyzer",
|
||||
},
|
||||
'num_subscribers': {
|
||||
'boost': 1.0,
|
||||
'index': 'not_analyzed',
|
||||
'store': 'yes',
|
||||
'type': 'integer',
|
||||
},
|
||||
'feed_id': {
|
||||
'store': 'yes',
|
||||
'type': 'integer',
|
||||
},
|
||||
}
|
||||
cls.ES.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name])
|
||||
|
||||
@classmethod
|
||||
def index(cls, feed_id, title, address, link, num_subscribers):
|
||||
doc = {
|
||||
"feed_id": feed_id,
|
||||
"title": title,
|
||||
"address": address,
|
||||
"link": link,
|
||||
"num_subscribers": num_subscribers,
|
||||
}
|
||||
cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, feed_id)
|
||||
|
||||
@classmethod
|
||||
def query(cls, text):
|
||||
cls.ES.refresh()
|
||||
|
||||
sub_filter = RangeFilter(qrange=ESRange('num_subscribers', 2))
|
||||
logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
|
||||
q = TextQuery('address', text)
|
||||
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
|
||||
|
||||
if not results.total:
|
||||
logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
|
||||
q = PrefixQuery('title', text)
|
||||
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
|
||||
|
||||
if not results.total:
|
||||
logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
|
||||
q = TextQuery('link.partial', text)
|
||||
results = cls.ES.search(FilteredQuery(q, sub_filter), sort="num_subscribers:desc", size=5)
|
||||
|
||||
return results
|
||||
|
|
Loading…
Add table
Reference in a new issue