mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
No need to reindex stories once indexed.
This commit is contained in:
parent
0654024194
commit
5b9f6d402f
2 changed files with 21 additions and 15 deletions
|
@ -380,7 +380,9 @@ class Feed(models.Model):
|
|||
self.save()
|
||||
|
||||
if not self.discover_indexed:
|
||||
for story in stories:
|
||||
for index, story in enumerate(stories):
|
||||
if index % 100 == 0:
|
||||
logging.debug(f" ---> ~FBIndexing discover story {index} of {len(stories)} in {self}")
|
||||
story.index_story_for_discover()
|
||||
|
||||
self.discover_indexed = True
|
||||
|
@ -538,7 +540,9 @@ class Feed(models.Model):
|
|||
duplicate_feed = DuplicateFeed.objects.filter(**criteria("duplicate_address", address))
|
||||
if duplicate_feed and len(duplicate_feed) > offset:
|
||||
feed = [duplicate_feed[offset].feed]
|
||||
logging.debug(f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})")
|
||||
logging.debug(
|
||||
f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})"
|
||||
)
|
||||
if not feed and aggressive:
|
||||
feed = (
|
||||
cls.objects.filter(branch_from_feed=None)
|
||||
|
@ -546,7 +550,7 @@ class Feed(models.Model):
|
|||
.order_by("-num_subscribers")
|
||||
)
|
||||
logging.debug(f" ---> Feeds found by link: {feed}")
|
||||
|
||||
|
||||
return feed
|
||||
|
||||
@timelimit(10)
|
||||
|
|
|
@ -623,6 +623,17 @@ class DiscoverStory:
|
|||
):
|
||||
cls.create_elasticsearch_mapping()
|
||||
|
||||
try:
|
||||
record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
|
||||
# logging.debug(f" ---> ~FBStory already indexed: {story_hash}")
|
||||
return
|
||||
except elasticsearch.exceptions.NotFoundError:
|
||||
record = None
|
||||
except (elasticsearch.exceptions.ConnectionError, urllib3.exceptions.NewConnectionError) as e:
|
||||
logging.debug(f" ***> ~FRNo search server available for discover story indexing: {e}")
|
||||
except elasticsearch.exceptions.ConflictError as e:
|
||||
logging.debug(f" ***> ~FBAlready indexed discover story: {e}")
|
||||
|
||||
if not story_content_vector:
|
||||
story_content_vector = cls.generate_story_content_vector(story_hash)
|
||||
|
||||
|
@ -632,18 +643,9 @@ class DiscoverStory:
|
|||
"content_vector": story_content_vector,
|
||||
}
|
||||
try:
|
||||
record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
|
||||
# Check if the content vector has changed
|
||||
if record and record["_source"]["content_vector"] != story_content_vector:
|
||||
cls.ES().update(
|
||||
index=cls.index_name(),
|
||||
id=story_hash,
|
||||
body={"doc": doc}, # Wrap the document in a "doc" field for updates
|
||||
doc_type=cls.doc_type(),
|
||||
)
|
||||
logging.debug(f" ---> ~FBStory already indexed, new content vector: {story_hash}")
|
||||
else:
|
||||
logging.debug(f" ---> ~FBStory already indexed, no change: {story_hash}")
|
||||
if not record:
|
||||
logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")
|
||||
cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
|
||||
except elasticsearch.exceptions.NotFoundError:
|
||||
cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
|
||||
logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")
|
||||
|
|
Loading…
Add table
Reference in a new issue