From 0c9b4e4f82b8cdc732d1b5cfa02610ae28f81712 Mon Sep 17 00:00:00 2001 From: hsm207 Date: Wed, 7 Feb 2024 11:38:09 +0000 Subject: [PATCH 1/4] remove by_text parm --- langchain_weaviate/vectorstores.py | 5 ----- tests/integration_tests/test_vectorstores.py | 6 ++++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/langchain_weaviate/vectorstores.py b/langchain_weaviate/vectorstores.py index 19eca88..34b02ed 100644 --- a/langchain_weaviate/vectorstores.py +++ b/langchain_weaviate/vectorstores.py @@ -94,7 +94,6 @@ def __init__( relevance_score_fn: Optional[ Callable[[float], float] ] = _default_score_normalizer, - by_text: bool = True, use_multi_tenancy: bool = False, ): """Initialize with Weaviate client.""" @@ -110,7 +109,6 @@ def __init__( self._text_key = text_key self._query_attrs = [self._text_key] self.relevance_score_fn = relevance_score_fn - self._by_text = by_text if attributes is not None: self._query_attrs.extend(attributes) @@ -410,7 +408,6 @@ def from_texts( *, index_name: Optional[str] = None, text_key: str = "text", - by_text: bool = False, relevance_score_fn: Optional[ Callable[[float], float] ] = _default_score_normalizer, @@ -433,7 +430,6 @@ def from_texts( tenant: The tenant name. Defaults to None. index_name: Index name. text_key: Key to use for uploading/retrieving text to/from vectorstore. - by_text: Whether to search by text or by embedding. relevance_score_fn: Function for converting whatever distance function the vector store uses to a relevance score, which is a normalized similarity score (0 means dissimilar, 1 means similar). @@ -462,7 +458,6 @@ def from_texts( embedding=embedding, attributes=attributes, relevance_score_fn=relevance_score_fn, - by_text=by_text, use_multi_tenancy=tenant is not None, ) diff --git a/tests/integration_tests/test_vectorstores.py b/tests/integration_tests/test_vectorstores.py index 471adaf..c0c9f3e 100644 --- a/tests/integration_tests/test_vectorstores.py +++ b/tests/integration_tests/test_vectorstores.py @@ -165,7 +165,9 @@ def test_similarity_search_by_text( """Test end to end construction and search by text.""" docsearch = WeaviateVectorStore.from_texts( - texts, embedding_openai, client=weaviate_client, by_text=True + texts, + embedding_openai, + client=weaviate_client, ) output = docsearch.similarity_search("foo", k=1) @@ -361,7 +363,7 @@ def test_similarity_search_with_score( # now create an instance with an embedding docsearch = WeaviateVectorStore.from_texts( - texts, embedding_openai, client=weaviate_client, by_text=False + texts, embedding_openai, client=weaviate_client ) results = docsearch.similarity_search_with_score("kitty", k=1) From 8f5b8bfb3a59a4ca1682a569b3b1badd0c5815cf Mon Sep 17 00:00:00 2001 From: hsm207 Date: Wed, 7 Feb 2024 12:01:56 +0000 Subject: [PATCH 2/4] test invalid client type --- langchain_weaviate/vectorstores.py | 3 +-- tests/integration_tests/test_vectorstores.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/langchain_weaviate/vectorstores.py b/langchain_weaviate/vectorstores.py index 34b02ed..c56e2e7 100644 --- a/langchain_weaviate/vectorstores.py +++ b/langchain_weaviate/vectorstores.py @@ -100,8 +100,7 @@ def __init__( if not isinstance(client, weaviate.WeaviateClient): raise ValueError( - "client should be an instance of " - "weaviate.WeaviateClient, got {type(client)}" + f"client should be an instance of weaviate.WeaviateClient, got {type(client)}" ) self._client = client self._index_name = index_name or f"LangChain_{uuid4().hex}" diff --git a/tests/integration_tests/test_vectorstores.py b/tests/integration_tests/test_vectorstores.py index c0c9f3e..0923e20 100644 --- a/tests/integration_tests/test_vectorstores.py +++ b/tests/integration_tests/test_vectorstores.py @@ -590,3 +590,21 @@ def test_search_with_multi_tenancy( ValueError, match="has multi-tenancy enabled, but request was without tenant" ): docsearch.similarity_search("foo", k=1) + + +def test_invalid_client_type(): + with pytest.raises(ValueError) as excinfo: + invalid_client = "invalid_client" + index_name = "test_index" + text_key = "text" + + WeaviateVectorStore( + client=invalid_client, + index_name=index_name, + text_key=text_key, + ) + + assert ( + str(excinfo.value) + == "client should be an instance of weaviate.WeaviateClient, got " + ) From bbdc33c4b7758ce65c7707c2d7f0fe2c20603684 Mon Sep 17 00:00:00 2001 From: hsm207 Date: Wed, 7 Feb 2024 12:29:40 +0000 Subject: [PATCH 3/4] fix long line --- langchain_weaviate/vectorstores.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/langchain_weaviate/vectorstores.py b/langchain_weaviate/vectorstores.py index c56e2e7..ad1f6d5 100644 --- a/langchain_weaviate/vectorstores.py +++ b/langchain_weaviate/vectorstores.py @@ -100,7 +100,8 @@ def __init__( if not isinstance(client, weaviate.WeaviateClient): raise ValueError( - f"client should be an instance of weaviate.WeaviateClient, got {type(client)}" + "client should be an instance of" + f" weaviate.WeaviateClient, got {type(client)}" ) self._client = client self._index_name = index_name or f"LangChain_{uuid4().hex}" From 4ad55c8834d3a4a3c80194dceead9c4dbfd85153 Mon Sep 17 00:00:00 2001 From: hsm207 Date: Wed, 7 Feb 2024 12:29:57 +0000 Subject: [PATCH 4/4] test embeddings property --- tests/integration_tests/test_vectorstores.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/integration_tests/test_vectorstores.py b/tests/integration_tests/test_vectorstores.py index 0923e20..4f7ee50 100644 --- a/tests/integration_tests/test_vectorstores.py +++ b/tests/integration_tests/test_vectorstores.py @@ -608,3 +608,17 @@ def test_invalid_client_type(): str(excinfo.value) == "client should be an instance of weaviate.WeaviateClient, got " ) + + +def test_embedding_property(weaviate_client, embedding_openai): + index_name = "test_index" + text_key = "text" + + docsearch = WeaviateVectorStore( + client=weaviate_client, + index_name=index_name, + text_key=text_key, + embedding=embedding_openai, + ) + + assert type(docsearch.embeddings) == OpenAIEmbeddings