Skip to content

Commit

Permalink
change embedding_similarity_function to vector_function
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 committed Jan 23, 2024
1 parent ef442c2 commit 5c3d1ec
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

COLUMNS = [el.split()[0] for el in CREATE_TABLE_STATEMENT.splitlines()[2:-1]]

SIMILARITY_FUNCTION_TO_POSTGRESQL_OPS = {
VECTOR_FUNCTION_TO_POSTGRESQL_OPS = {
"cosine_distance": "vector_cosine_ops",
"inner_product": "vector_ip_ops",
"l2_distance": "vector_l2_ops",
Expand All @@ -52,7 +52,7 @@ def __init__(
connection_string: str,
table_name: str = "haystack_documents",
embedding_dimension: int = 768,
embedding_similarity_function: Literal["cosine_distance", "inner_product", "l2_distance"] = "cosine_distance",
vector_function: Literal["cosine_distance", "inner_product", "l2_distance"] = "cosine_distance",
recreate_table: bool = False,
search_strategy: Literal["exact_nearest_neighbor", "hnsw"] = "exact_nearest_neighbor",
hnsw_recreate_index_if_exists: bool = False,
Expand All @@ -68,9 +68,9 @@ def __init__(
e.g. "postgresql://USER:PASSWORD@HOST:PORT/DB_NAME"
:param table_name: The name of the table to use to store Haystack documents. Defaults to "haystack_documents".
:param embedding_dimension: The dimension of the embedding. Defaults to 768.
:param embedding_similarity_function: The similarity function to use when searching for similar embeddings.
:param vector_function: The similarity function to use when searching for similar embeddings.
Defaults to "cosine_distance". Set it to one of the following values:
:type embedding_similarity_function: Literal["cosine_distance", "inner_product", "l2_distance"]
:type vector_function: Literal["cosine_distance", "inner_product", "l2_distance"]
:param recreate_table: Whether to recreate the table if it already exists. Defaults to False.
:param search_strategy: The search strategy to use when searching for similar embeddings.
Defaults to "exact_nearest_neighbor". "hnsw" is an approximate nearest neighbor search strategy,
Expand All @@ -89,7 +89,7 @@ def __init__(
self.connection_string = connection_string
self.table_name = table_name
self.embedding_dimension = embedding_dimension
self.embedding_similarity_function = embedding_similarity_function
self.vector_function = vector_function
self.recreate_table = recreate_table
self.search_strategy = search_strategy
self.hnsw_recreate_index_if_exists = hnsw_recreate_index_if_exists
Expand Down Expand Up @@ -120,7 +120,7 @@ def to_dict(self) -> Dict[str, Any]:
connection_string=self.connection_string,
table_name=self.table_name,
embedding_dimension=self.embedding_dimension,
embedding_similarity_function=self.embedding_similarity_function,
vector_function=self.vector_function,
recreate_table=self.recreate_table,
search_strategy=self.search_strategy,
hnsw_recreate_index_if_exists=self.hnsw_recreate_index_if_exists,
Expand Down Expand Up @@ -208,7 +208,7 @@ def _create_hnsw_index(self):
Internal method to create the HNSW index.
"""

pg_ops = SIMILARITY_FUNCTION_TO_POSTGRESQL_OPS[self.embedding_similarity_function]
pg_ops = VECTOR_FUNCTION_TO_POSTGRESQL_OPS[self.vector_function]
actual_hnsw_index_creation_kwargs = {
key: value
for key, value in self.hnsw_index_creation_kwargs.items()
Expand Down
12 changes: 6 additions & 6 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def document_store(self, request):
connection_string = "postgresql://postgres:postgres@localhost:5432/postgres"
table_name = f"haystack_{request.node.name}"
embedding_dimension = 768
embedding_similarity_function = "cosine_distance"
vector_function = "cosine_distance"
recreate_table = True
search_strategy = "exact_nearest_neighbor"

store = PgvectorDocumentStore(
connection_string=connection_string,
table_name=table_name,
embedding_dimension=embedding_dimension,
embedding_similarity_function=embedding_similarity_function,
vector_function=vector_function,
recreate_table=recreate_table,
search_strategy=search_strategy,
)
Expand Down Expand Up @@ -64,7 +64,7 @@ def test_init(self):
connection_string="postgresql://postgres:postgres@localhost:5432/postgres",
table_name="my_table",
embedding_dimension=512,
embedding_similarity_function="l2_distance",
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
Expand All @@ -75,7 +75,7 @@ def test_init(self):
assert document_store.connection_string == "postgresql://postgres:postgres@localhost:5432/postgres"
assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
assert document_store.embedding_similarity_function == "l2_distance"
assert document_store.vector_function == "l2_distance"
assert document_store.recreate_table
assert document_store.search_strategy == "hnsw"
assert document_store.hnsw_recreate_index_if_exists
Expand All @@ -87,7 +87,7 @@ def test_to_dict(self):
connection_string="postgresql://postgres:postgres@localhost:5432/postgres",
table_name="my_table",
embedding_dimension=512,
embedding_similarity_function="l2_distance",
vector_function="l2_distance",
recreate_table=True,
search_strategy="hnsw",
hnsw_recreate_index_if_exists=True,
Expand All @@ -101,7 +101,7 @@ def test_to_dict(self):
"connection_string": "postgresql://postgres:postgres@localhost:5432/postgres",
"table_name": "my_table",
"embedding_dimension": 512,
"embedding_similarity_function": "l2_distance",
"vector_function": "l2_distance",
"recreate_table": True,
"search_strategy": "hnsw",
"hnsw_recreate_index_if_exists": True,
Expand Down

0 comments on commit 5c3d1ec

Please sign in to comment.