Skip to content

Commit

Permalink
small refinements
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 committed Nov 22, 2024
1 parent d1e52f7 commit 02b4134
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
self,
*,
connection_string: Secret = Secret.from_env_var("PG_CONN_STR"),
create_extension: bool = True,
schema_name: str = "public",
table_name: str = "haystack_documents",
language: str = "english",
Expand All @@ -90,7 +91,6 @@ def __init__(
hnsw_index_name: str = "haystack_hnsw_index",
hnsw_ef_search: Optional[int] = None,
keyword_index_name: str = "haystack_keyword_index",
create_extension: bool = True,
):
"""
Creates a new PgvectorDocumentStore instance.
Expand All @@ -103,6 +103,10 @@ def __init__(
e.g.: `PG_CONN_STR="host=HOST port=PORT dbname=DBNAME user=USER password=PASSWORD"`
See [PostgreSQL Documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING)
for more details.
:param create_extension: Whether to create the pgvector extension if it doesn't exist.
Set this to `True` (default) to automatically create the extension if it is missing.
Creating the extension may require superuser privileges.
If set to `False`, ensure the extension is already installed; otherwise, an error will be raised.
:param schema_name: The name of the schema the table is created in. The schema must already exist.
:param table_name: The name of the table to use to store Haystack documents.
:param language: The language to be used to parse query and document content in keyword retrieval.
Expand Down Expand Up @@ -136,10 +140,10 @@ def __init__(
`"hnsw"`. You can find more information about this parameter in the
[pgvector documentation](https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw).
:param keyword_index_name: Index name for the Keyword index.
:param create_extension: create pgvector extension if it doesn't exist.'
"""

self.connection_string = connection_string
self.create_extension = create_extension
self.table_name = table_name
self.schema_name = schema_name
self.embedding_dimension = embedding_dimension
Expand All @@ -155,7 +159,6 @@ def __init__(
self.hnsw_ef_search = hnsw_ef_search
self.keyword_index_name = keyword_index_name
self.language = language
self.create_extension = create_extension
self._connection = None
self._cursor = None
self._dict_cursor = None
Expand Down Expand Up @@ -250,6 +253,7 @@ def to_dict(self) -> Dict[str, Any]:
return default_to_dict(
self,
connection_string=self.connection_string.to_dict(),
create_extension=self.create_extension,
schema_name=self.schema_name,
table_name=self.table_name,
embedding_dimension=self.embedding_dimension,
Expand Down
4 changes: 4 additions & 0 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def test_init(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")

document_store = PgvectorDocumentStore(
create_extension=True,
schema_name="my_schema",
table_name="my_table",
embedding_dimension=512,
Expand All @@ -79,6 +80,7 @@ def test_init(monkeypatch):
keyword_index_name="my_keyword_index",
)

assert document_store.create_extension
assert document_store.schema_name == "my_schema"
assert document_store.table_name == "my_table"
assert document_store.embedding_dimension == 512
Expand All @@ -97,6 +99,7 @@ def test_to_dict(monkeypatch):
monkeypatch.setenv("PG_CONN_STR", "some_connection_string")

document_store = PgvectorDocumentStore(
create_extension=False,
table_name="my_table",
embedding_dimension=512,
vector_function="l2_distance",
Expand All @@ -113,6 +116,7 @@ def test_to_dict(monkeypatch):
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"create_extension": False,
"table_name": "my_table",
"schema_name": "public",
"embedding_dimension": 512,
Expand Down
6 changes: 6 additions & 0 deletions integrations/pgvector/tests/test_retrievers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_to_dict(self, mock_store):
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"create_extension": True,
"schema_name": "public",
"table_name": "haystack",
"embedding_dimension": 768,
Expand Down Expand Up @@ -82,6 +83,7 @@ def test_from_dict(self, monkeypatch):
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"create_extension": False,
"table_name": "haystack_test_to_dict",
"embedding_dimension": 768,
"vector_function": "cosine_similarity",
Expand All @@ -106,6 +108,7 @@ def test_from_dict(self, monkeypatch):

assert isinstance(document_store, PgvectorDocumentStore)
assert isinstance(document_store.connection_string, EnvVarSecret)
assert not document_store.create_extension
assert document_store.table_name == "haystack_test_to_dict"
assert document_store.embedding_dimension == 768
assert document_store.vector_function == "cosine_similarity"
Expand Down Expand Up @@ -176,6 +179,7 @@ def test_to_dict(self, mock_store):
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"create_extension": True,
"schema_name": "public",
"table_name": "haystack",
"embedding_dimension": 768,
Expand Down Expand Up @@ -207,6 +211,7 @@ def test_from_dict(self, monkeypatch):
"type": "haystack_integrations.document_stores.pgvector.document_store.PgvectorDocumentStore",
"init_parameters": {
"connection_string": {"env_vars": ["PG_CONN_STR"], "strict": True, "type": "env_var"},
"create_extension": False,
"table_name": "haystack_test_to_dict",
"embedding_dimension": 768,
"vector_function": "cosine_similarity",
Expand All @@ -230,6 +235,7 @@ def test_from_dict(self, monkeypatch):

assert isinstance(document_store, PgvectorDocumentStore)
assert isinstance(document_store.connection_string, EnvVarSecret)
assert not document_store.create_extension
assert document_store.table_name == "haystack_test_to_dict"
assert document_store.embedding_dimension == 768
assert document_store.vector_function == "cosine_similarity"
Expand Down

0 comments on commit 02b4134

Please sign in to comment.