diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 25a36433d..c5d023988 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -40,6 +40,7 @@ from utils.feed_functions import timelimit, TimeoutError from utils.feed_functions import relative_timesince from utils.feed_functions import seconds_timesince from utils.story_functions import strip_tags, htmldiff, strip_comments, strip_comments__lxml +from utils.story_functions import prep_for_search ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4) @@ -1179,25 +1180,34 @@ class Feed(models.Model): @classmethod def find_feed_stories(cls, feed_ids, query, offset=0, limit=25): + story_ids = SearchStory.query(feed_ids=feed_ids, query=query) stories_db = MStory.objects( - Q(story_feed_id__in=feed_ids) & - (Q(story_title__icontains=query) | - Q(story_author_name__icontains=query) | - Q(story_tags__icontains=query)) + story_hash__in=story_ids ).order_by('-story_date')[offset:offset+limit] + + # stories_db = MStory.objects( + # Q(story_feed_id__in=feed_ids) & + # (Q(story_title__icontains=query) | + # Q(story_author_name__icontains=query) | + # Q(story_tags__icontains=query)) + # ).order_by('-story_date')[offset:offset+limit] stories = cls.format_stories(stories_db) return stories def find_stories(self, query, offset=0, limit=25): - SearchStory.query(feed_ids=[self.pk], query=query) - + story_ids = SearchStory.query(feed_ids=[self.pk], query=query) stories_db = MStory.objects( - Q(story_feed_id=self.pk) & - (Q(story_title__icontains=query) | - Q(story_author_name__icontains=query) | - Q(story_tags__icontains=query)) + story_hash__in=story_ids ).order_by('-story_date')[offset:offset+limit] + + # stories_db = MStory.objects( + # Q(story_feed_id=self.pk) & + # (Q(story_title__icontains=query) | + # Q(story_author_name__icontains=query) | + # Q(story_tags__icontains=query)) + # ).order_by('-story_date')[offset:offset+limit] + stories = self.format_stories(stories_db, self.pk) return stories @@ -1722,7 +1732,8 @@ class MStory(mongo.Document): @classmethod def index_all_for_search(cls, offset=0): - SearchStory.create_elasticsearch_mapping() + if not offset: + SearchStory.create_elasticsearch_mapping() last_pk = Feed.objects.latest('pk').pk for f in xrange(offset, last_pk, 1000): @@ -1741,8 +1752,9 @@ class MStory(mongo.Document): story_content = zlib.decompress(self.story_content_z) SearchStory.index(story_hash=self.story_hash, story_title=self.story_title, - story_content=story_content, + story_content=prep_for_search(story_content), story_author=self.story_author_name, + story_feed_id=self.story_feed_id, story_date=self.story_date) @classmethod diff --git a/apps/search/models.py b/apps/search/models.py index b1507eb79..5d035254b 100644 --- a/apps/search/models.py +++ b/apps/search/models.py @@ -1,5 +1,5 @@ import pyes -from pyes.query import FuzzyQuery, MatchQuery +from pyes.query import MatchQuery from django.conf import settings from utils import log as logging @@ -24,46 +24,43 @@ class SearchStory: 'title': { 'boost': 2.0, 'index': 'analyzed', - 'store': 'yes', + 'store': 'no', 'type': 'string', - "term_vector" : "with_positions_offsets" + 'analyzer': 'snowball', }, 'content': { 'boost': 1.0, 'index': 'analyzed', - 'store': 'yes', + 'store': 'no', 'type': 'string', - "term_vector" : "with_positions_offsets" + 'analyzer': 'snowball', }, 'author': { 'boost': 1.0, 'index': 'analyzed', - 'store': 'yes', + 'store': 'no', 'type': 'string', - }, - 'story_hash': { - 'index': 'not_analyzed', - 'store': 'yes', - 'type': 'string', + 'analyzer': 'keyword', }, 'feed_id': { - 'store': 'yes', + 'store': 'no', 'type': 'integer' }, 'date': { - 'store': 'yes', + 'store': 'no', 'type': 'date', } } cls.ES.indices.put_mapping("%s-type" % cls.name, {'properties': mapping}, ["%s-index" % cls.name]) @classmethod - def index(cls, story_hash, story_title, story_content, story_author, story_date): + def index(cls, story_hash, story_title, story_content, story_author, story_feed_id, + story_date): doc = { - "story_hash": story_hash, "content": story_content, "title": story_title, "author": story_author, + "feed_id": story_feed_id, "date": story_date, } cls.ES.index(doc, "%s-index" % cls.name, "%s-type" % cls.name, story_hash) @@ -71,26 +68,15 @@ class SearchStory: @classmethod def query(cls, feed_ids, query): cls.ES.indices.refresh() - q = pyes.query.StringQuery(query) - results = cls.ES.search(q, indices=cls.index_name, doc_types=[cls.type_name]) - logging.info("~FGSearch ~FCstories~FG for: ~SB%s" % query) - - if not results.total: - logging.info("~FGSearch ~FCstories~FG by title: ~SB%s" % query) - q = FuzzyQuery('title', query) - results = cls.ES.search(q) - - if not results.total: - logging.info("~FGSearch ~FCstories~FG by content: ~SB%s" % query) - q = FuzzyQuery('content', query) - results = cls.ES.search(q) - - if not results.total: - logging.info("~FGSearch ~FCstories~FG by author: ~SB%s" % query) - q = FuzzyQuery('author', query) - results = cls.ES.search(q) - - return results + + string_q = pyes.query.StringQuery(query, default_operator="AND") + feed_q = pyes.query.TermsQuery('feed_id', feed_ids) + q = pyes.query.BoolQuery(must=[string_q, feed_q]) + results = cls.ES.search(q, indices=cls.index_name(), doc_types=[cls.type_name()]) + logging.info("~FGSearch ~FCstories~FG for: ~SB%s (across %s feed%s)" % + (query, len(feed_ids), 's' if len(feed_ids) != 1 else '')) + + return [r.get_id() for r in results] class SearchFeed: diff --git a/media/js/newsblur/views/folder_view.js b/media/js/newsblur/views/folder_view.js index aa08649a9..84219f711 100644 --- a/media/js/newsblur/views/folder_view.js +++ b/media/js/newsblur/views/folder_view.js @@ -165,8 +165,6 @@ NEWSBLUR.Views.Folder = Backbone.View.extend({ if (this.options.feedbar) { this.show_collapsed_folder_count(); - } - if (this.options.feedbar && NEWSBLUR.Globals.is_staff) { this.search_view = new NEWSBLUR.Views.FeedSearchView({ feedbar_view: this }).render(); diff --git a/media/js/newsblur/views/story_titles_header_view.js b/media/js/newsblur/views/story_titles_header_view.js index e9ba05ff9..313895709 100644 --- a/media/js/newsblur/views/story_titles_header_view.js +++ b/media/js/newsblur/views/story_titles_header_view.js @@ -51,6 +51,7 @@ NEWSBLUR.Views.StoryTitlesHeader = Backbone.View.extend({ } else if (this.showing_fake_folder) { $view = $(_.template('\