No need to reindex stories once indexed.

2025-09-18 21:50:56 +00:00 · 2024-12-01 12:32:18 -05:00 · 2024-12-01 12:32:18 -05:00 · 5b9f6d402f
commit 5b9f6d402f
parent 0654024194
2 changed files with 21 additions and 15 deletions
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@ -380,7 +380,9 @@ class Feed(models.Model):
            self.save()

        if not self.discover_indexed:
-            for story in stories:
+            for index, story in enumerate(stories):
+                if index % 100 == 0:
+                    logging.debug(f" ---> ~FBIndexing discover story {index} of {len(stories)} in {self}")
                story.index_story_for_discover()

            self.discover_indexed = True
@ -538,7 +540,9 @@ class Feed(models.Model):
                duplicate_feed = DuplicateFeed.objects.filter(**criteria("duplicate_address", address))
                if duplicate_feed and len(duplicate_feed) > offset:
                    feed = [duplicate_feed[offset].feed]
-                logging.debug(f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})")
+                logging.debug(
+                    f" ---> Feeds found by duplicate address: {duplicate_feed} {feed} (offset: {offset})"
+                )
            if not feed and aggressive:
                feed = (
                    cls.objects.filter(branch_from_feed=None)
@ -546,7 +550,7 @@ class Feed(models.Model):
                    .order_by("-num_subscribers")
                )
                logging.debug(f" ---> Feeds found by link: {feed}")
-                
+
            return feed

        @timelimit(10)
--- a/apps/search/models.py
+++ b/apps/search/models.py
@ -623,6 +623,17 @@ class DiscoverStory:
    ):
        cls.create_elasticsearch_mapping()

+        try:
+            record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
+            # logging.debug(f" ---> ~FBStory already indexed: {story_hash}")
+            return
+        except elasticsearch.exceptions.NotFoundError:
+            record = None
+        except (elasticsearch.exceptions.ConnectionError, urllib3.exceptions.NewConnectionError) as e:
+            logging.debug(f" ***> ~FRNo search server available for discover story indexing: {e}")
+        except elasticsearch.exceptions.ConflictError as e:
+            logging.debug(f" ***> ~FBAlready indexed discover story: {e}")
+
        if not story_content_vector:
            story_content_vector = cls.generate_story_content_vector(story_hash)

@ -632,18 +643,9 @@ class DiscoverStory:
            "content_vector": story_content_vector,
        }
        try:
-            record = cls.ES().get(index=cls.index_name(), id=story_hash, doc_type=cls.doc_type())
-            # Check if the content vector has changed
-            if record and record["_source"]["content_vector"] != story_content_vector:
-                cls.ES().update(
-                    index=cls.index_name(),
-                    id=story_hash,
-                    body={"doc": doc},  # Wrap the document in a "doc" field for updates
-                    doc_type=cls.doc_type(),
-                )
-                logging.debug(f" ---> ~FBStory already indexed, new content vector: {story_hash}")
-            else:
-                logging.debug(f" ---> ~FBStory already indexed, no change: {story_hash}")
+            if not record:
+                logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")
+                cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
        except elasticsearch.exceptions.NotFoundError:
            cls.ES().create(index=cls.index_name(), id=story_hash, body=doc, doc_type=cls.doc_type())
            logging.debug(f" ---> ~FCIndexing discover story: {story_hash}")