From 1fd724293b71beeebe7edb62c0c77e0fbdf0ea12 Mon Sep 17 00:00:00 2001
From: Stefano Lottini <hemidactylus@users.noreply.github.com>
Date: Tue, 28 Nov 2023 22:25:44 +0100
Subject: [PATCH] Astra DB vector store, move constructor docstring to class
 docstring (#13784)

This PR rearranges the docstring for the `AstraDB` vector store class so
as to have all useful information in the _class_ docstring for ease of
reading.

(incidentally, due to an oversight, the docstring that was in the
constructor ended up buried below some lines of code, thereby
disappearing altogether from accessibility. Apologies.)
---
 .../langchain/vectorstores/astradb.py         | 83 +++++++++----------
 1 file changed, 41 insertions(+), 42 deletions(-)
diff --git a/libs/langchain/langchain/vectorstores/astradb.py b/libs/langchain/langchain/vectorstores/astradb.py
index 3716fa854cc9e..0eacbd2d9264a 100644
--- a/libs/langchain/langchain/vectorstores/astradb.py
+++ b/libs/langchain/langchain/vectorstores/astradb.py
@@ -77,6 +77,44 @@ class AstraDB(VectorStore):
 
                 vectorstore.add_texts(["Giraffes", "All good here"])
                 results = vectorstore.similarity_search("Everything's ok", k=1)
+
+    Constructor args (only keyword-arguments accepted):
+        embedding (Embeddings): embedding function to use.
+        collection_name (str): name of the Astra DB collection to create/use.
+        token (Optional[str]): API token for Astra DB usage.
+        api_endpoint (Optional[str]): full URL to the API endpoint,
+            such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
+        astra_db_client (Optional[Any]): *alternative to token+api_endpoint*,
+            you can pass an already-created 'astrapy.db.AstraDB' instance.
+        namespace (Optional[str]): namespace (aka keyspace) where the
+            collection is created. Defaults to the database's "default namespace".
+        metric (Optional[str]): similarity function to use out of those
+            available in Astra DB. If left out, it will use Astra DB API's
+            defaults (i.e. "cosine" - but, for performance reasons,
+            "dot_product" is suggested if embeddings are normalized to one).
+
+    Advanced arguments (coming with sensible defaults):
+        batch_size (Optional[int]): Size of batches for bulk insertions.
+        bulk_insert_batch_concurrency (Optional[int]): Number of threads
+            to insert batches concurrently.
+        bulk_insert_overwrite_concurrency (Optional[int]): Number of
+            threads in a batch to insert pre-existing entries.
+        bulk_delete_concurrency (Optional[int]): Number of threads
+            (for deleting multiple rows concurrently).
+
+    A note on concurrency: as a rule of thumb, on a typical client machine
+    it is suggested to keep the quantity
+        bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency
+    much below 1000 to avoid exhausting the client multithreading/networking
+    resources. The hardcoded defaults are somewhat conservative to meet
+    most machines' specs, but a sensible choice to test may be:
+        bulk_insert_batch_concurrency = 80
+        bulk_insert_overwrite_concurrency = 10
+    A bit of experimentation is required to nail the best results here,
+    depending on both the machine/network specs and the expected workload
+    (specifically, how often a write is an update of an existing id).
+    Remember you can pass concurrency settings to individual calls to
+    add_texts and add_documents as well.
     """
 
     @staticmethod
@@ -101,6 +139,9 @@ def __init__(
         bulk_insert_overwrite_concurrency: Optional[int] = None,
         bulk_delete_concurrency: Optional[int] = None,
     ) -> None:
+        """
+        Create an AstraDB vector store object. See class docstring for help.
+        """
         try:
             from astrapy.db import (
                 AstraDB as LibAstraDB,
@@ -113,48 +154,6 @@ def __init__(
                 "Could not import a recent astrapy python package. "
                 "Please install it with `pip install --upgrade astrapy`."
             )
-        """
-        Create an AstraDB vector store object.
-
-        Args (only keyword-arguments accepted):
-            embedding (Embeddings): embedding function to use.
-            collection_name (str): name of the Astra DB collection to create/use.
-            token (Optional[str]): API token for Astra DB usage.
-            api_endpoint (Optional[str]): full URL to the API endpoint,
-                such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
-            astra_db_client (Optional[Any]): *alternative to token+api_endpoint*,
-                you can pass an already-created 'astrapy.db.AstraDB' instance.
-            namespace (Optional[str]): namespace (aka keyspace) where the
-                collection is created. Defaults to the database's "default namespace".
-            metric (Optional[str]): similarity function to use out of those
-                available in Astra DB. If left out, it will use Astra DB API's
-                defaults (i.e. "cosine" - but, for performance reasons,
-                "dot_product" is suggested if embeddings are normalized to one).
-
-        Advanced arguments (coming with sensible defaults):
-            batch_size (Optional[int]): Size of batches for bulk insertions.
-            bulk_insert_batch_concurrency (Optional[int]): Number of threads
-                to insert batches concurrently.
-            bulk_insert_overwrite_concurrency (Optional[int]): Number of
-                threads in a batch to insert pre-existing entries.
-            bulk_delete_concurrency (Optional[int]): Number of threads
-                (for deleting multiple rows concurrently).
-
-        A note on concurrency: as a rule of thumb, on a typical client machine
-        it is suggested to keep the quantity
-            bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency
-        much below 1000 to avoid exhausting the client multithreading/networking
-        resources. The hardcoded defaults are somewhat conservative to meet
-        most machines' specs, but a sensible choice to test may be:
-            bulk_insert_batch_concurrency = 80
-            bulk_insert_overwrite_concurrency = 10
-        A bit of experimentation is required to nail the best results here,
-        depending on both the machine/network specs and the expected workload
-        (specifically, how often a write is an update of an existing id).
-        Remember you can pass concurrency settings to individual calls to
-        add_texts and add_documents as well.
-        """
-
         # Conflicting-arg checks:
         if astra_db_client is not None:
             if token is not None or api_endpoint is not None: