From 7219a58bf0aa0cccc5753d0fbe8eee0a5b47f32a Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 30 Nov 2023 15:36:05 +0100 Subject: [PATCH 01/10] improve deserialization --- .../components/writers/document_writer.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index 297aeb1c48..83e22e6968 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -1,7 +1,13 @@ +from pydoc import doc from typing import List, Optional, Dict, Any from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError from haystack.document_stores import DocumentStore, DuplicatePolicy, document_store +import importlib + +import logging + +logger = logging.getLogger(__name__) @component @@ -41,9 +47,22 @@ def from_dict(cls, data: Dict[str, Any]) -> "DocumentWriter": raise DeserializationError("Missing 'document_store' in serialization data") if "type" not in init_params["document_store"]: raise DeserializationError("Missing 'type' in document store's serialization data") - if init_params["document_store"]["type"] not in document_store.registry: - raise DeserializationError(f"DocumentStore of type '{init_params['document_store']['type']}' not found.") - docstore_class = document_store.registry[init_params["document_store"]["type"]] + + try: + # Import the module first... + module, type_ = init_params["document_store"]["type"].rsplit(".", 1) + logger.debug("Trying to import %s", module) + module = importlib.import_module(module) + # ...then try again + # if init_params["document_store"]["type"] not in component.registry: + # raise DeserializationError( + # f"Successfully imported module {module} but can't find it in the component registry." + # "This is unexpected and most likely a bug." + # ) + except (ImportError, DeserializationError) as e: + raise DeserializationError(f"Component {module} not imported.") from e + + docstore_class = getattr(module, type_) docstore = docstore_class.from_dict(init_params["document_store"]) data["init_parameters"]["document_store"] = docstore From 5bb57ed519d235ad6ae6f5755dd1b021ca046b7d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 09:55:40 +0100 Subject: [PATCH 02/10] rm ds decorator --- .../components/caching/url_cache_checker.py | 23 +++++++++-- .../retrievers/in_memory_bm25_retriever.py | 18 +++------ .../in_memory_embedding_retriever.py | 18 +++------ .../components/writers/document_writer.py | 20 ++++------ haystack/document_stores/decorator.py | 39 ------------------- test/testing/test_factory.py | 6 --- 6 files changed, 36 insertions(+), 88 deletions(-) delete mode 100644 haystack/document_stores/decorator.py diff --git a/haystack/components/caching/url_cache_checker.py b/haystack/components/caching/url_cache_checker.py index c4c178f91f..e86f3e1f91 100644 --- a/haystack/components/caching/url_cache_checker.py +++ b/haystack/components/caching/url_cache_checker.py @@ -1,7 +1,14 @@ from typing import List, Dict, Any +import importlib + +import logging + from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError -from haystack.document_stores import DocumentStore, document_store +from haystack.document_stores import DocumentStore + + +logger = logging.getLogger(__name__) @component @@ -34,9 +41,17 @@ def from_dict(cls, data: Dict[str, Any]) -> "UrlCacheChecker": raise DeserializationError("Missing 'document_store' in serialization data") if "type" not in init_params["document_store"]: raise DeserializationError("Missing 'type' in document store's serialization data") - if init_params["document_store"]["type"] not in document_store.registry: - raise DeserializationError(f"DocumentStore of type '{init_params['document_store']['type']}' not found.") - docstore_class = document_store.registry[init_params["document_store"]["type"]] + + try: + module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1) + logger.debug("Trying to import %s", module_name) + module = importlib.import_module(module_name) + except (ImportError, DeserializationError) as e: + raise DeserializationError( + f"DocumentStore type '{init_params['document_store']['type']}' not correctly imported" + ) from e + + docstore_class = getattr(module, type_) docstore = docstore_class.from_dict(init_params["document_store"]) data["init_parameters"]["document_store"] = docstore diff --git a/haystack/components/retrievers/in_memory_bm25_retriever.py b/haystack/components/retrievers/in_memory_bm25_retriever.py index bf34c0ebfa..9399ac0a0b 100644 --- a/haystack/components/retrievers/in_memory_bm25_retriever.py +++ b/haystack/components/retrievers/in_memory_bm25_retriever.py @@ -1,7 +1,7 @@ from typing import Dict, List, Any, Optional -from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError -from haystack.document_stores import InMemoryDocumentStore, document_store +from haystack import component, Document, default_to_dict, default_from_dict +from haystack.document_stores import InMemoryDocumentStore @component @@ -62,17 +62,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "InMemoryBM25Retriever": """ Deserialize this component from a dictionary. """ - init_params = data.get("init_parameters", {}) - if "document_store" not in init_params: - raise DeserializationError("Missing 'document_store' in serialization data") - if "type" not in init_params["document_store"]: - raise DeserializationError("Missing 'type' in document store's serialization data") - if init_params["document_store"]["type"] not in document_store.registry: - raise DeserializationError(f"DocumentStore type '{init_params['document_store']['type']}' not found") - - docstore_class = document_store.registry[init_params["document_store"]["type"]] - docstore = docstore_class.from_dict(init_params["document_store"]) - data["init_parameters"]["document_store"] = docstore + data["init_parameters"]["document_store"] = InMemoryDocumentStore.from_dict( + data["init_parameters"]["document_store"] + ) return default_from_dict(cls, data) @component.output_types(documents=List[Document]) diff --git a/haystack/components/retrievers/in_memory_embedding_retriever.py b/haystack/components/retrievers/in_memory_embedding_retriever.py index b7332a53a5..4a06892cf9 100644 --- a/haystack/components/retrievers/in_memory_embedding_retriever.py +++ b/haystack/components/retrievers/in_memory_embedding_retriever.py @@ -1,7 +1,7 @@ from typing import Dict, List, Any, Optional -from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError -from haystack.document_stores import InMemoryDocumentStore, document_store +from haystack import component, Document, default_to_dict, default_from_dict +from haystack.document_stores import InMemoryDocumentStore @component @@ -70,17 +70,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "InMemoryEmbeddingRetriever": """ Deserialize this component from a dictionary. """ - init_params = data.get("init_parameters", {}) - if "document_store" not in init_params: - raise DeserializationError("Missing 'document_store' in serialization data") - if "type" not in init_params["document_store"]: - raise DeserializationError("Missing 'type' in document store's serialization data") - if init_params["document_store"]["type"] not in document_store.registry: - raise DeserializationError(f"DocumentStore type '{init_params['document_store']['type']}' not found") - - docstore_class = document_store.registry[init_params["document_store"]["type"]] - docstore = docstore_class.from_dict(init_params["document_store"]) - data["init_parameters"]["document_store"] = docstore + data["init_parameters"]["document_store"] = InMemoryDocumentStore.from_dict( + data["init_parameters"]["document_store"] + ) return default_from_dict(cls, data) @component.output_types(documents=List[Document]) diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index 83e22e6968..3ee161a28c 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -1,8 +1,7 @@ -from pydoc import doc from typing import List, Optional, Dict, Any from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError -from haystack.document_stores import DocumentStore, DuplicatePolicy, document_store +from haystack.document_stores import DocumentStore, DuplicatePolicy import importlib import logging @@ -49,18 +48,13 @@ def from_dict(cls, data: Dict[str, Any]) -> "DocumentWriter": raise DeserializationError("Missing 'type' in document store's serialization data") try: - # Import the module first... - module, type_ = init_params["document_store"]["type"].rsplit(".", 1) - logger.debug("Trying to import %s", module) - module = importlib.import_module(module) - # ...then try again - # if init_params["document_store"]["type"] not in component.registry: - # raise DeserializationError( - # f"Successfully imported module {module} but can't find it in the component registry." - # "This is unexpected and most likely a bug." - # ) + module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1) + logger.debug("Trying to import %s", module_name) + module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: - raise DeserializationError(f"Component {module} not imported.") from e + raise DeserializationError( + f"DocumentStore type '{init_params['document_store']['type']}' not correctly imported" + ) from e docstore_class = getattr(module, type_) docstore = docstore_class.from_dict(init_params["document_store"]) diff --git a/haystack/document_stores/decorator.py b/haystack/document_stores/decorator.py deleted file mode 100644 index c82ccf91d9..0000000000 --- a/haystack/document_stores/decorator.py +++ /dev/null @@ -1,39 +0,0 @@ -import logging - -logger = logging.getLogger(__name__) - - -class _DocumentStore: - """ - Marks a class as an Haystack _DocumentStore. - All classes decorated with @document_store will be registered here and can be used in Haystack Pipelines. - """ - - def __init__(self): - self.registry = {} - - def _decorate(self, cls): - cls.__haystack_document_store__ = True - - classname = f"{cls.__module__}.{cls.__name__}" - if classname in self.registry: - logger.error( - "DocumentStore %s is already registered. Previous imported from '%s', new imported from '%s'", - classname, - self.registry[classname], - cls, - ) - - self.registry[classname] = cls - logger.debug("Registered DocumentStore %s", cls) - - return cls - - def __call__(self, cls=None): - if cls: - return self._decorate(cls) - - return self._decorate - - -document_store = _DocumentStore() diff --git a/test/testing/test_factory.py b/test/testing/test_factory.py index 3a094f7ad7..04d3ec71c2 100644 --- a/test/testing/test_factory.py +++ b/test/testing/test_factory.py @@ -2,7 +2,6 @@ from haystack.dataclasses import Document from haystack.testing.factory import document_store_class, component_class -from haystack.document_stores.decorator import document_store from haystack.core.component import component @@ -23,11 +22,6 @@ def test_document_store_from_dict(): assert isinstance(store, MyStore) -def test_document_store_class_is_registered(): - MyStore = document_store_class("MyStore") - assert document_store.registry["haystack.testing.factory.MyStore"] == MyStore - - def test_document_store_class_with_documents(): doc = Document(id="fake_id", content="This is a document") MyStore = document_store_class("MyStore", documents=[doc]) From 4a23a5dc801fd33a3ec418f3f5f5e0f5f9e04972 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 10:07:46 +0100 Subject: [PATCH 03/10] improve tests --- test/components/retrievers/test_in_memory_bm25_retriever.py | 5 +++-- test/components/writers/test_document_writer.py | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/components/retrievers/test_in_memory_bm25_retriever.py b/test/components/retrievers/test_in_memory_bm25_retriever.py index bf8e0125d6..de2a1e9c4b 100644 --- a/test/components/retrievers/test_in_memory_bm25_retriever.py +++ b/test/components/retrievers/test_in_memory_bm25_retriever.py @@ -74,12 +74,13 @@ def test_to_dict_with_custom_init_parameters(self): }, } + # + def test_from_dict(self): - document_store_class("MyFakeStore", bases=(InMemoryDocumentStore,)) data = { "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", "init_parameters": { - "document_store": {"type": "haystack.testing.factory.MyFakeStore", "init_parameters": {}}, + "document_store": {"type": "haystack.document_stores.InMemoryDocumentStore", "init_parameters": {}}, "filters": {"name": "test.txt"}, "top_k": 5, }, diff --git a/test/components/writers/test_document_writer.py b/test/components/writers/test_document_writer.py index 9d873cc15d..cc88ccfaf6 100644 --- a/test/components/writers/test_document_writer.py +++ b/test/components/writers/test_document_writer.py @@ -33,16 +33,15 @@ def test_to_dict_with_custom_init_parameters(self): } def test_from_dict(self): - mocked_docstore_class = document_store_class("MockedDocumentStore") data = { "type": "haystack.components.writers.document_writer.DocumentWriter", "init_parameters": { - "document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}}, + "document_store": {"type": "haystack.document_stores.InMemoryDocumentStore", "init_parameters": {}}, "policy": "SKIP", }, } component = DocumentWriter.from_dict(data) - assert isinstance(component.document_store, mocked_docstore_class) + assert isinstance(component.document_store, InMemoryDocumentStore) assert component.policy == DuplicatePolicy.SKIP def test_from_dict_without_docstore(self): From b68256f89ee0e89eb3a7b0c10fc666c85a566f1a Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 10:09:35 +0100 Subject: [PATCH 04/10] fix pylint --- haystack/components/writers/document_writer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index 3ee161a28c..f23570be73 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -1,11 +1,11 @@ from typing import List, Optional, Dict, Any -from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError -from haystack.document_stores import DocumentStore, DuplicatePolicy import importlib - import logging +from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError +from haystack.document_stores import DocumentStore, DuplicatePolicy + logger = logging.getLogger(__name__) From f06aae7119f31810ea954dcbc4ac33e8ec0c268e Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 10:13:53 +0100 Subject: [PATCH 05/10] rm decorator from module init --- haystack/document_stores/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/haystack/document_stores/__init__.py b/haystack/document_stores/__init__.py index 1b9b896d3e..7d5080ddb1 100644 --- a/haystack/document_stores/__init__.py +++ b/haystack/document_stores/__init__.py @@ -1,7 +1,6 @@ from haystack.document_stores.protocol import DocumentStore, DuplicatePolicy from haystack.document_stores.in_memory.document_store import InMemoryDocumentStore from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError -from haystack.document_stores.decorator import document_store __all__ = [ "DocumentStore", @@ -10,5 +9,4 @@ "DocumentStoreError", "DuplicateDocumentError", "MissingDocumentError", - "document_store", ] From 4e583844e3cced38202b150f8e35dacb38293733 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 10:20:30 +0100 Subject: [PATCH 06/10] rm decorator --- haystack/document_stores/in_memory/document_store.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/haystack/document_stores/in_memory/document_store.py b/haystack/document_stores/in_memory/document_store.py index 605ef168e6..1b8878a3e5 100644 --- a/haystack/document_stores/in_memory/document_store.py +++ b/haystack/document_stores/in_memory/document_store.py @@ -8,7 +8,6 @@ from tqdm.auto import tqdm from haystack import default_from_dict, default_to_dict -from haystack.document_stores.decorator import document_store from haystack.dataclasses import Document from haystack.document_stores.protocol import DuplicatePolicy from haystack.utils.filters import document_matches_filter, convert @@ -27,7 +26,6 @@ DOT_PRODUCT_SCALING_FACTOR = 100 -@document_store class InMemoryDocumentStore: """ Stores data in-memory. It's ephemeral and cannot be saved to disk. From 8c5808e0c923d803fab8700a341dda0848aaff36 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 1 Dec 2023 10:28:09 +0100 Subject: [PATCH 07/10] rm decorator from factory --- haystack/testing/factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/haystack/testing/factory.py b/haystack/testing/factory.py index 99dd0c375e..35b937e81d 100644 --- a/haystack/testing/factory.py +++ b/haystack/testing/factory.py @@ -1,7 +1,7 @@ from typing import Any, Dict, Optional, Tuple, Type, List, Union from haystack.dataclasses import Document -from haystack.document_stores import document_store, DocumentStore, DuplicatePolicy +from haystack.document_stores import DocumentStore, DuplicatePolicy from haystack.core.component import component, Component from haystack.core.serialization import default_to_dict, default_from_dict @@ -117,7 +117,7 @@ def to_dict(self) -> Dict[str, Any]: bases = (object,) cls = type(name, bases, fields) - return document_store(cls) + return cls def component_class( From ef15e6bbe603fc3177ce31ca89be801d84f07bdc Mon Sep 17 00:00:00 2001 From: anakin87 Date: Sun, 3 Dec 2023 17:15:52 +0100 Subject: [PATCH 08/10] fix tests --- .../components/caching/url_cache_checker.py | 2 +- .../retrievers/in_memory_bm25_retriever.py | 7 ++++++- .../in_memory_embedding_retriever.py | 7 ++++++- .../components/writers/document_writer.py | 2 +- haystack/core/serialization.py | 2 ++ .../caching/test_url_cache_checker.py | 12 +++++++----- .../test_in_memory_bm25_retriever.py | 19 +++++++++++-------- .../test_in_memory_embedding_retriever.py | 12 +++++++----- .../writers/test_document_writer.py | 9 ++++++--- 9 files changed, 47 insertions(+), 25 deletions(-) diff --git a/haystack/components/caching/url_cache_checker.py b/haystack/components/caching/url_cache_checker.py index e86f3e1f91..7ec740594a 100644 --- a/haystack/components/caching/url_cache_checker.py +++ b/haystack/components/caching/url_cache_checker.py @@ -48,7 +48,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "UrlCacheChecker": module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: raise DeserializationError( - f"DocumentStore type '{init_params['document_store']['type']}' not correctly imported" + f"DocumentStore of type '{init_params['document_store']['type']}' not correctly imported" ) from e docstore_class = getattr(module, type_) diff --git a/haystack/components/retrievers/in_memory_bm25_retriever.py b/haystack/components/retrievers/in_memory_bm25_retriever.py index 9399ac0a0b..79ced8d0bf 100644 --- a/haystack/components/retrievers/in_memory_bm25_retriever.py +++ b/haystack/components/retrievers/in_memory_bm25_retriever.py @@ -1,6 +1,6 @@ from typing import Dict, List, Any, Optional -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError from haystack.document_stores import InMemoryDocumentStore @@ -62,6 +62,11 @@ def from_dict(cls, data: Dict[str, Any]) -> "InMemoryBM25Retriever": """ Deserialize this component from a dictionary. """ + init_params = data.get("init_parameters", {}) + if "document_store" not in init_params: + raise DeserializationError("Missing 'document_store' in serialization data") + if "type" not in init_params["document_store"]: + raise DeserializationError("Missing 'type' in document store's serialization data") data["init_parameters"]["document_store"] = InMemoryDocumentStore.from_dict( data["init_parameters"]["document_store"] ) diff --git a/haystack/components/retrievers/in_memory_embedding_retriever.py b/haystack/components/retrievers/in_memory_embedding_retriever.py index 4a06892cf9..f3433703af 100644 --- a/haystack/components/retrievers/in_memory_embedding_retriever.py +++ b/haystack/components/retrievers/in_memory_embedding_retriever.py @@ -1,6 +1,6 @@ from typing import Dict, List, Any, Optional -from haystack import component, Document, default_to_dict, default_from_dict +from haystack import component, Document, default_to_dict, default_from_dict, DeserializationError from haystack.document_stores import InMemoryDocumentStore @@ -70,6 +70,11 @@ def from_dict(cls, data: Dict[str, Any]) -> "InMemoryEmbeddingRetriever": """ Deserialize this component from a dictionary. """ + init_params = data.get("init_parameters", {}) + if "document_store" not in init_params: + raise DeserializationError("Missing 'document_store' in serialization data") + if "type" not in init_params["document_store"]: + raise DeserializationError("Missing 'type' in document store's serialization data") data["init_parameters"]["document_store"] = InMemoryDocumentStore.from_dict( data["init_parameters"]["document_store"] ) diff --git a/haystack/components/writers/document_writer.py b/haystack/components/writers/document_writer.py index f23570be73..f4debb170e 100644 --- a/haystack/components/writers/document_writer.py +++ b/haystack/components/writers/document_writer.py @@ -53,7 +53,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "DocumentWriter": module = importlib.import_module(module_name) except (ImportError, DeserializationError) as e: raise DeserializationError( - f"DocumentStore type '{init_params['document_store']['type']}' not correctly imported" + f"DocumentStore of type '{init_params['document_store']['type']}' not correctly imported" ) from e docstore_class = getattr(module, type_) diff --git a/haystack/core/serialization.py b/haystack/core/serialization.py index 1020375e10..1dde4a6e33 100644 --- a/haystack/core/serialization.py +++ b/haystack/core/serialization.py @@ -99,5 +99,7 @@ def default_from_dict(cls: Type[object], data: Dict[str, Any]) -> Any: if "type" not in data: raise DeserializationError("Missing 'type' in serialization data") if data["type"] != f"{cls.__module__}.{cls.__name__}": + print(data["type"]) + print(f"{cls.__module__}.{cls.__name__}") raise DeserializationError(f"Class '{data['type']}' can't be deserialized as '{cls.__name__}'") return cls(**init_params) diff --git a/test/components/caching/test_url_cache_checker.py b/test/components/caching/test_url_cache_checker.py index 8803a09040..f43f6c9036 100644 --- a/test/components/caching/test_url_cache_checker.py +++ b/test/components/caching/test_url_cache_checker.py @@ -32,16 +32,18 @@ def test_to_dict_with_custom_init_parameters(self): } def test_from_dict(self): - mocked_docstore_class = document_store_class("MockedDocumentStore") data = { "type": "haystack.components.caching.url_cache_checker.UrlCacheChecker", "init_parameters": { - "document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}}, + "document_store": { + "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", + "init_parameters": {}, + }, "url_field": "my_url_field", }, } component = UrlCacheChecker.from_dict(data) - assert isinstance(component.document_store, mocked_docstore_class) + assert isinstance(component.document_store, InMemoryDocumentStore) assert component.url_field == "my_url_field" def test_from_dict_without_docstore(self): @@ -60,9 +62,9 @@ def test_from_dict_without_docstore_type(self): def test_from_dict_nonexisting_docstore(self): data = { "type": "haystack.components.caching.url_cache_checker.UrlCacheChecker", - "init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}}, + "init_parameters": {"document_store": {"type": "Nonexisting.DocumentStore", "init_parameters": {}}}, } - with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."): + with pytest.raises(DeserializationError): UrlCacheChecker.from_dict(data) def test_run(self): diff --git a/test/components/retrievers/test_in_memory_bm25_retriever.py b/test/components/retrievers/test_in_memory_bm25_retriever.py index de2a1e9c4b..1ca21d57b4 100644 --- a/test/components/retrievers/test_in_memory_bm25_retriever.py +++ b/test/components/retrievers/test_in_memory_bm25_retriever.py @@ -57,17 +57,17 @@ def test_to_dict(self): } def test_to_dict_with_custom_init_parameters(self): - MyFakeStore = document_store_class("MyFakeStore", bases=(InMemoryDocumentStore,)) - document_store = MyFakeStore() - document_store.to_dict = lambda: {"type": "MyFakeStore", "init_parameters": {}} + ds = InMemoryDocumentStore() + serialized_ds = ds.to_dict() + component = InMemoryBM25Retriever( - document_store=document_store, filters={"name": "test.txt"}, top_k=5, scale_score=True + document_store=InMemoryDocumentStore(), filters={"name": "test.txt"}, top_k=5, scale_score=True ) data = component.to_dict() assert data == { "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", "init_parameters": { - "document_store": {"type": "MyFakeStore", "init_parameters": {}}, + "document_store": serialized_ds, "filters": {"name": "test.txt"}, "top_k": 5, "scale_score": True, @@ -80,7 +80,10 @@ def test_from_dict(self): data = { "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", "init_parameters": { - "document_store": {"type": "haystack.document_stores.InMemoryDocumentStore", "init_parameters": {}}, + "document_store": { + "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", + "init_parameters": {}, + }, "filters": {"name": "test.txt"}, "top_k": 5, }, @@ -104,9 +107,9 @@ def test_from_dict_without_docstore_type(self): def test_from_dict_nonexisting_docstore(self): data = { "type": "haystack.components.retrievers.in_memory_bm25_retriever.InMemoryBM25Retriever", - "init_parameters": {"document_store": {"type": "NonexistingDocstore", "init_parameters": {}}}, + "init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}}, } - with pytest.raises(DeserializationError, match="DocumentStore type 'NonexistingDocstore' not found"): + with pytest.raises(DeserializationError): InMemoryBM25Retriever.from_dict(data) def test_retriever_valid_run(self, mock_docs): diff --git a/test/components/retrievers/test_in_memory_embedding_retriever.py b/test/components/retrievers/test_in_memory_embedding_retriever.py index b344f14e30..e3282cd9b9 100644 --- a/test/components/retrievers/test_in_memory_embedding_retriever.py +++ b/test/components/retrievers/test_in_memory_embedding_retriever.py @@ -71,11 +71,13 @@ def test_to_dict_with_custom_init_parameters(self): } def test_from_dict(self): - document_store_class("MyFakeStore", bases=(InMemoryDocumentStore,)) data = { "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": { - "document_store": {"type": "haystack.testing.factory.MyFakeStore", "init_parameters": {}}, + "document_store": { + "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", + "init_parameters": {}, + }, "filters": {"name": "test.txt"}, "top_k": 5, }, @@ -99,15 +101,15 @@ def test_from_dict_without_docstore_type(self): "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", "init_parameters": {"document_store": {"init_parameters": {}}}, } - with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"): + with pytest.raises(DeserializationError): InMemoryEmbeddingRetriever.from_dict(data) def test_from_dict_nonexisting_docstore(self): data = { "type": "haystack.components.retrievers.in_memory_embedding_retriever.InMemoryEmbeddingRetriever", - "init_parameters": {"document_store": {"type": "NonexistingDocstore", "init_parameters": {}}}, + "init_parameters": {"document_store": {"type": "Nonexisting.Docstore", "init_parameters": {}}}, } - with pytest.raises(DeserializationError, match="DocumentStore type 'NonexistingDocstore' not found"): + with pytest.raises(DeserializationError): InMemoryEmbeddingRetriever.from_dict(data) def test_valid_run(self): diff --git a/test/components/writers/test_document_writer.py b/test/components/writers/test_document_writer.py index cc88ccfaf6..9858538f98 100644 --- a/test/components/writers/test_document_writer.py +++ b/test/components/writers/test_document_writer.py @@ -36,7 +36,10 @@ def test_from_dict(self): data = { "type": "haystack.components.writers.document_writer.DocumentWriter", "init_parameters": { - "document_store": {"type": "haystack.document_stores.InMemoryDocumentStore", "init_parameters": {}}, + "document_store": { + "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore", + "init_parameters": {}, + }, "policy": "SKIP", }, } @@ -57,9 +60,9 @@ def test_from_dict_without_docstore_type(self): def test_from_dict_nonexisting_docstore(self): data = { "type": "DocumentWriter", - "init_parameters": {"document_store": {"type": "NonexistingDocumentStore", "init_parameters": {}}}, + "init_parameters": {"document_store": {"type": "Nonexisting.DocumentStore", "init_parameters": {}}}, } - with pytest.raises(DeserializationError, match="DocumentStore of type 'NonexistingDocumentStore' not found."): + with pytest.raises(DeserializationError): DocumentWriter.from_dict(data) def test_run(self): From 93b2c746c8dcad9dacccf28569be5d8ca5f96d8d Mon Sep 17 00:00:00 2001 From: anakin87 Date: Sun, 3 Dec 2023 22:53:58 +0100 Subject: [PATCH 09/10] release note --- .../notes/rm-docstore-decorator-d8d2ebfdf1d9702e.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 releasenotes/notes/rm-docstore-decorator-d8d2ebfdf1d9702e.yaml diff --git a/releasenotes/notes/rm-docstore-decorator-d8d2ebfdf1d9702e.yaml b/releasenotes/notes/rm-docstore-decorator-d8d2ebfdf1d9702e.yaml new file mode 100644 index 0000000000..b35d3b376d --- /dev/null +++ b/releasenotes/notes/rm-docstore-decorator-d8d2ebfdf1d9702e.yaml @@ -0,0 +1,5 @@ +--- +enhancements: + - | + Improve the deserialization logic for components that use a Document Store. + Remove the @document_store decorator and the registry of Document Stores. From a108b51d94de988463b696eeb0d5021b8576689f Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci <44616784+anakin87@users.noreply.github.com> Date: Mon, 4 Dec 2023 00:14:03 +0100 Subject: [PATCH 10/10] rm print --- haystack/core/serialization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/haystack/core/serialization.py b/haystack/core/serialization.py index 1dde4a6e33..1020375e10 100644 --- a/haystack/core/serialization.py +++ b/haystack/core/serialization.py @@ -99,7 +99,5 @@ def default_from_dict(cls: Type[object], data: Dict[str, Any]) -> Any: if "type" not in data: raise DeserializationError("Missing 'type' in serialization data") if data["type"] != f"{cls.__module__}.{cls.__name__}": - print(data["type"]) - print(f"{cls.__module__}.{cls.__name__}") raise DeserializationError(f"Class '{data['type']}' can't be deserialized as '{cls.__name__}'") return cls(**init_params)