No need to reindex stories once indexed.

This commit is contained in:
Samuel Clay 2024-12-01 12:32:18 -05:00
parent 0654024194
commit 5b9f6d402f
2 changed files with 21 additions and 15 deletions

View file

@ -380,7 +380,9 @@ class Feed(models.Model):
self.save()
if not self.discover_indexed:
for story in stories:
for index, story in enumerate(stories):
if index % 100 == 0:
logging.debug(f" ---> ~FBIndexing discover story {index} of {len(stories)} in {self}")
story.index_story_for_discover()
self.discover_indexed = True
@ -538,7 +540,9 @@ class Feed(models.Model):
duplicate_feed = DuplicateFeed.objects.filter(**criteria("duplicate_address", address))
if duplicate_feed and len(duplicate_feed) > offset:
feed = [duplicate_feed[offset].feed]
logging.debug(f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})")
logging.debug(
f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})"
)
if not feed and aggressive:
feed = (
cls.objects.filter(branch_from_feed=None)
@ -546,7 +550,7 @@ class Feed(models.Model):
.order_by("-num_subscribers")
)
logging.debug(f" ---> Feeds found by link: {feed}")
return feed
@timelimit(10)

View file

@ -623,6 +623,17 @@ class DiscoverStory:
):
cls.create_elasticsearch_mapping()
try:
record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
# logging.debug(f" ---> ~FBStory already indexed: {story_hash}")
return
except elasticsearch.exceptions.NotFoundError:
record = None
except (elasticsearch.exceptions.ConnectionError, urllib3.exceptions.NewConnectionError) as e:
logging.debug(f" ***> ~FRNo search server available for discover story indexing: {e}")
except elasticsearch.exceptions.ConflictError as e:
logging.debug(f" ***> ~FBAlready indexed discover story: {e}")
if not story_content_vector:
story_content_vector = cls.generate_story_content_vector(story_hash)
@ -632,18 +643,9 @@ class DiscoverStory:
"content_vector": story_content_vector,
}
try:
record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
# Check if the content vector has changed
if record and record["_source"]["content_vector"] != story_content_vector:
cls.ES().update(
index=cls.index_name(),
id=story_hash,
body={"doc": doc}, # Wrap the document in a "doc" field for updates
doc_type=cls.doc_type(),
)
logging.debug(f" ---> ~FBStory already indexed, new content vector: {story_hash}")
else:
logging.debug(f" ---> ~FBStory already indexed, no change: {story_hash}")
if not record:
logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")
cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
except elasticsearch.exceptions.NotFoundError:
cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")