From 02096a6aa5e3b78b237b51bcba923bb33f348f09 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Wed, 31 Jan 2024 17:02:56 +0100
Subject: [PATCH 1/5] Update
 integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
---
 .../document_stores/pgvector/document_store.py              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
index 73da14bdc..40612a135 100644
--- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
+++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
@@ -89,9 +89,9 @@ def __init__(
         :param table_name: The name of the table to use to store Haystack documents. Defaults to "haystack_documents".
         :param embedding_dimension: The dimension of the embedding. Defaults to 768.
         :param vector_function: The similarity function to use when searching for similar embeddings.
-            Defaults to "cosine_similarity". "cosine_similarity" and "inner_product" are similarity functions,
-            so the most similar documents are the ones with the lowest score.
-            "l2_distance" is a distance function, so the most similar documents are the ones with the smallest score.
+            Defaults to "cosine_similarity". "cosine_similarity" and "inner_product" are similarity functions and
+            higher scores indicate greater similarity between the documents.
+            "l2_distance" returns the straight-line distance between vectors, and the most similar documents are the ones with the smallest score.
             When using the "hnsw" search strategy, the vector_function value is used to build an appropriate index.
         :type vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"]
         :param recreate_table: Whether to recreate the table if it already exists. Defaults to False.

From 7dd59799beaa661b91ab08225fdd88da2d06c6e2 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Wed, 31 Jan 2024 17:03:14 +0100
Subject: [PATCH 2/5] Update
 integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
---
 .../document_stores/pgvector/document_store.py                 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
index 40612a135..5e24168ea 100644
--- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
+++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
@@ -92,7 +92,8 @@ def __init__(
             Defaults to "cosine_similarity". "cosine_similarity" and "inner_product" are similarity functions and
             higher scores indicate greater similarity between the documents.
             "l2_distance" returns the straight-line distance between vectors, and the most similar documents are the ones with the smallest score.
-            When using the "hnsw" search strategy, the vector_function value is used to build an appropriate index.
+            
+            Important: when using the "hnsw" search strategy, an index will be created that depends on the `vector_function` passed here. Make sure subsequent queries will keep using the same vector similarity function in order to take advantage of the index.
         :type vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"]
         :param recreate_table: Whether to recreate the table if it already exists. Defaults to False.
         :param search_strategy: The search strategy to use when searching for similar embeddings.

From 9b7ee61cc4aade84abb837ac9d790d4a0bf015d0 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Wed, 31 Jan 2024 17:03:22 +0100
Subject: [PATCH 3/5] Update
 integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
---
 .../document_stores/pgvector/document_store.py                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
index 5e24168ea..033f867a2 100644
--- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
+++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
@@ -99,7 +99,7 @@ def __init__(
         :param search_strategy: The search strategy to use when searching for similar embeddings.
             Defaults to "exact_nearest_neighbor". "hnsw" is an approximate nearest neighbor search strategy,
             which trades off some accuracy for speed; it is recommended for large numbers of documents.
-            When using the "hnsw" search strategy, the vector_function value is used to build an appropriate index.
+            Important: when using the "hnsw" search strategy, an index will be created that depends on the `vector_function` passed here. Make sure subsequent queries will keep using the same vector similarity function in             order to take advantage of the index.
         :type search_strategy: Literal["exact_nearest_neighbor", "hnsw"]
         :param hnsw_recreate_index_if_exists: Whether to recreate the HNSW index if it already exists.
             Defaults to False. Only used if search_strategy is set to "hnsw".

From eacb97f90a7754b06f1401087a6607d08c6247a0 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Wed, 31 Jan 2024 17:03:54 +0100
Subject: [PATCH 4/5] Update
 integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
---
 .../pgvector/document_store.py                | 21 ++-----------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
index 033f867a2..eba2f5c21 100644
--- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
+++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
@@ -445,26 +445,9 @@ def _embedding_retrieval(
         """
         Retrieves documents that are most similar to the query embedding using a vector similarity metric.
 
-        This method is not mean to be part of the public interface of
-        `PgvectorDocumentStore` nor called directly.
+        This method is not meant to be part of the public interface of
+        `PgvectorDocumentStore` and it should not be called directly.
         `PgvectorEmbeddingRetriever` uses this method directly and is the public interface for it.
-
-        :param query_embedding: Embedding of the query.
-        :param filters: Filters applied to the retrieved Documents. Defaults to None.
-            When using the "hnsw" search strategy, filters are applied after the most similar Documents are retrieved,
-            so the number of results may be less than `top_k`.
-            To better understand HNSW index creation and configuration, refer to the pgvector documentation:
-            https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw
-        :param top_k: Maximum number of Documents to return, defaults to 10
-        :param vector_function: The similarity function to use when searching for similar embeddings.
-            Defaults to the PgvectorDocumentStore's vector_function.
-            Since vector_function is used to build the HNSW index (when using the "hnsw" search strategy),
-            if a vector_function other than the one used to build the index is chosen,
-            the index will not be used and the search will be slower.
-            "cosine_similarity" and "inner_product" are similarity functions,
-            so the most similar documents are the ones with the lowest score.
-            "l2_distance" is a distance function, so the most similar documents are the ones with the smallest score.
-        :type vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"]
         :raises ValueError
         :return: List of Documents that are most similar to `query_embedding`
         """

From 68b7a11985fa0268df635c93d02175347affad95 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 31 Jan 2024 17:07:44 +0100
Subject: [PATCH 5/5] fix fmt

---
 .../document_stores/pgvector/document_store.py     | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
index eba2f5c21..0abaaecce 100644
--- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
+++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py
@@ -91,15 +91,21 @@ def __init__(
         :param vector_function: The similarity function to use when searching for similar embeddings.
             Defaults to "cosine_similarity". "cosine_similarity" and "inner_product" are similarity functions and
             higher scores indicate greater similarity between the documents.
-            "l2_distance" returns the straight-line distance between vectors, and the most similar documents are the ones with the smallest score.
-            
-            Important: when using the "hnsw" search strategy, an index will be created that depends on the `vector_function` passed here. Make sure subsequent queries will keep using the same vector similarity function in order to take advantage of the index.
+            "l2_distance" returns the straight-line distance between vectors,
+            and the most similar documents are the ones with the smallest score.
+
+            Important: when using the "hnsw" search strategy, an index will be created that depends on the
+            `vector_function` passed here. Make sure subsequent queries will keep using the same
+            vector similarity function in order to take advantage of the index.
         :type vector_function: Literal["cosine_similarity", "inner_product", "l2_distance"]
         :param recreate_table: Whether to recreate the table if it already exists. Defaults to False.
         :param search_strategy: The search strategy to use when searching for similar embeddings.
             Defaults to "exact_nearest_neighbor". "hnsw" is an approximate nearest neighbor search strategy,
             which trades off some accuracy for speed; it is recommended for large numbers of documents.
-            Important: when using the "hnsw" search strategy, an index will be created that depends on the `vector_function` passed here. Make sure subsequent queries will keep using the same vector similarity function in             order to take advantage of the index.
+
+            Important: when using the "hnsw" search strategy, an index will be created that depends on the
+            `vector_function` passed here. Make sure subsequent queries will keep using the same
+            vector similarity function in order to take advantage of the index.
         :type search_strategy: Literal["exact_nearest_neighbor", "hnsw"]
         :param hnsw_recreate_index_if_exists: Whether to recreate the HNSW index if it already exists.
             Defaults to False. Only used if search_strategy is set to "hnsw".