From 49e800b402f0b4dd0a2278a0b7c613490aa2e76b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:21:51 +0100 Subject: [PATCH 01/17] rename model parameter in the openai doc embedder --- .../components/embedders/openai_document_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/haystack/components/embedders/openai_document_embedder.py b/haystack/components/embedders/openai_document_embedder.py index eee8d729a6..33df57406f 100644 --- a/haystack/components/embedders/openai_document_embedder.py +++ b/haystack/components/embedders/openai_document_embedder.py @@ -31,7 +31,7 @@ class OpenAIDocumentEmbedder: def __init__( self, api_key: Optional[str] = None, - model_name: str = "text-embedding-ada-002", + model: str = "text-embedding-ada-002", api_base_url: Optional[str] = None, organization: Optional[str] = None, prefix: str = "", @@ -45,7 +45,7 @@ def __init__( Create a OpenAIDocumentEmbedder component. :param api_key: The OpenAI API key. It can be explicitly provided or automatically read from the environment variable OPENAI_API_KEY (recommended). - :param model_name: The name of the model to use. + :param model: The name of the model to use. :param api_base_url: The OpenAI API Base url, defaults to None. For more details, see OpenAI [docs](https://platform.openai.com/docs/api-reference/audio). :param organization: The Organization ID, defaults to `None`. See [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization). @@ -57,7 +57,7 @@ def __init__( :param meta_fields_to_embed: List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. """ - self.model_name = model_name + self.model = model self.api_base_url = api_base_url self.organization = organization self.prefix = prefix @@ -73,7 +73,7 @@ def _get_telemetry_data(self) -> Dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ - return {"model": self.model_name} + return {"model": self.model} def to_dict(self) -> Dict[str, Any]: """ @@ -82,7 +82,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model_name=self.model_name, + model=self.model, organization=self.organization, api_base_url=self.api_base_url, prefix=self.prefix, @@ -124,7 +124,7 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List range(0, len(texts_to_embed), batch_size), disable=not self.progress_bar, desc="Calculating embeddings" ): batch = texts_to_embed[i : i + batch_size] - response = self.client.embeddings.create(model=self.model_name, input=batch) + response = self.client.embeddings.create(model=self.model, input=batch) embeddings = [el.embedding for el in response.data] all_embeddings.extend(embeddings) From b6b5efe504a1ace520c16cc1ecd1a51c5aac5c4b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:22:09 +0100 Subject: [PATCH 02/17] fix tests for openai doc embedder --- .../embedders/test_openai_document_embedder.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/components/embedders/test_openai_document_embedder.py b/test/components/embedders/test_openai_document_embedder.py index 390c559d3e..97eb31444f 100644 --- a/test/components/embedders/test_openai_document_embedder.py +++ b/test/components/embedders/test_openai_document_embedder.py @@ -26,7 +26,7 @@ class TestOpenAIDocumentEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "fake-api-key") embedder = OpenAIDocumentEmbedder() - assert embedder.model_name == "text-embedding-ada-002" + assert embedder.model == "text-embedding-ada-002" assert embedder.organization is None assert embedder.prefix == "" assert embedder.suffix == "" @@ -38,7 +38,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): embedder = OpenAIDocumentEmbedder( api_key="fake-api-key", - model_name="model", + model="model", organization="my-org", prefix="prefix", suffix="suffix", @@ -48,7 +48,7 @@ def test_init_with_parameters(self): embedding_separator=" | ", ) assert embedder.organization == "my-org" - assert embedder.model_name == "model" + assert embedder.model == "model" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 64 @@ -68,7 +68,7 @@ def test_to_dict(self): "type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder", "init_parameters": { "api_base_url": None, - "model_name": "text-embedding-ada-002", + "model": "text-embedding-ada-002", "organization": None, "prefix": "", "suffix": "", @@ -82,7 +82,7 @@ def test_to_dict(self): def test_to_dict_with_custom_init_parameters(self): component = OpenAIDocumentEmbedder( api_key="fake-api-key", - model_name="model", + model="model", organization="my-org", prefix="prefix", suffix="suffix", @@ -96,7 +96,7 @@ def test_to_dict_with_custom_init_parameters(self): "type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder", "init_parameters": { "api_base_url": None, - "model_name": "model", + "model": "model", "organization": "my-org", "prefix": "prefix", "suffix": "suffix", @@ -174,7 +174,7 @@ def test_run(self): model = "text-embedding-ada-002" - embedder = OpenAIDocumentEmbedder(model_name=model, meta_fields_to_embed=["topic"], embedding_separator=" | ") + embedder = OpenAIDocumentEmbedder(model=model, meta_fields_to_embed=["topic"], embedding_separator=" | ") result = embedder.run(documents=docs) documents_with_embeddings = result["documents"] From cd1aeb498afd07353dddc8efd8398c033c5321c5 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:24:22 +0100 Subject: [PATCH 03/17] rename model parameter in the openai text embedder --- .../components/embedders/openai_text_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/haystack/components/embedders/openai_text_embedder.py b/haystack/components/embedders/openai_text_embedder.py index 59a1d2e239..7b5b4f4aaf 100644 --- a/haystack/components/embedders/openai_text_embedder.py +++ b/haystack/components/embedders/openai_text_embedder.py @@ -29,7 +29,7 @@ class OpenAITextEmbedder: def __init__( self, api_key: Optional[str] = None, - model_name: str = "text-embedding-ada-002", + model: str = "text-embedding-ada-002", api_base_url: Optional[str] = None, organization: Optional[str] = None, prefix: str = "", @@ -40,7 +40,7 @@ def __init__( :param api_key: The OpenAI API key. It can be explicitly provided or automatically read from the environment variable OPENAI_API_KEY (recommended). - :param model_name: The name of the OpenAI model to use. For more details on the available models, + :param model: The name of the OpenAI model to use. For more details on the available models, see [OpenAI documentation](https://platform.openai.com/docs/guides/embeddings/embedding-models). :param organization: The Organization ID, defaults to `None`. See [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization). @@ -48,7 +48,7 @@ def __init__( :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. """ - self.model_name = model_name + self.model = model self.organization = organization self.prefix = prefix self.suffix = suffix @@ -59,7 +59,7 @@ def _get_telemetry_data(self) -> Dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ - return {"model": self.model_name} + return {"model": self.model} def to_dict(self) -> Dict[str, Any]: """ @@ -68,7 +68,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( - self, model_name=self.model_name, organization=self.organization, prefix=self.prefix, suffix=self.suffix + self, model=self.model, organization=self.organization, prefix=self.prefix, suffix=self.suffix ) @component.output_types(embedding=List[float], meta=Dict[str, Any]) @@ -86,7 +86,7 @@ def run(self, text: str): # replace newlines, which can negatively affect performance. text_to_embed = text_to_embed.replace("\n", " ") - response = self.client.embeddings.create(model=self.model_name, input=text_to_embed) + response = self.client.embeddings.create(model=self.model, input=text_to_embed) meta = {"model": response.model, "usage": dict(response.usage)} return {"embedding": response.data[0].embedding, "meta": meta} From 5e04b2505cdc3318582694740d236fe042aa08ef Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:24:36 +0100 Subject: [PATCH 04/17] fix tests for openai text embedder --- .../embedders/test_openai_text_embedder.py | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/test/components/embedders/test_openai_text_embedder.py b/test/components/embedders/test_openai_text_embedder.py index c9757d88ae..b6c16fc37b 100644 --- a/test/components/embedders/test_openai_text_embedder.py +++ b/test/components/embedders/test_openai_text_embedder.py @@ -12,21 +12,17 @@ def test_init_default(self, monkeypatch): embedder = OpenAITextEmbedder() assert embedder.client.api_key == "fake-api-key" - assert embedder.model_name == "text-embedding-ada-002" + assert embedder.model == "text-embedding-ada-002" assert embedder.organization is None assert embedder.prefix == "" assert embedder.suffix == "" def test_init_with_parameters(self): embedder = OpenAITextEmbedder( - api_key="fake-api-key", - model_name="model", - organization="fake-organization", - prefix="prefix", - suffix="suffix", + api_key="fake-api-key", model="model", organization="fake-organization", prefix="prefix", suffix="suffix" ) assert embedder.client.api_key == "fake-api-key" - assert embedder.model_name == "model" + assert embedder.model == "model" assert embedder.organization == "fake-organization" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" @@ -41,27 +37,18 @@ def test_to_dict(self): data = component.to_dict() assert data == { "type": "haystack.components.embedders.openai_text_embedder.OpenAITextEmbedder", - "init_parameters": { - "model_name": "text-embedding-ada-002", - "organization": None, - "prefix": "", - "suffix": "", - }, + "init_parameters": {"model": "text-embedding-ada-002", "organization": None, "prefix": "", "suffix": ""}, } def test_to_dict_with_custom_init_parameters(self): component = OpenAITextEmbedder( - api_key="fake-api-key", - model_name="model", - organization="fake-organization", - prefix="prefix", - suffix="suffix", + api_key="fake-api-key", model="model", organization="fake-organization", prefix="prefix", suffix="suffix" ) data = component.to_dict() assert data == { "type": "haystack.components.embedders.openai_text_embedder.OpenAITextEmbedder", "init_parameters": { - "model_name": "model", + "model": "model", "organization": "fake-organization", "prefix": "prefix", "suffix": "suffix", @@ -81,7 +68,7 @@ def test_run_wrong_input_format(self): def test_run(self): model = "text-embedding-ada-002" - embedder = OpenAITextEmbedder(model_name=model, prefix="prefix ", suffix=" suffix") + embedder = OpenAITextEmbedder(model=model, prefix="prefix ", suffix=" suffix") result = embedder.run(text="The food was delicious") assert len(result["embedding"]) == 1536 From 96fc525790a972eb65917fa6123119c9fc8b48fe Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:26:30 +0100 Subject: [PATCH 05/17] rename model parameter in the st doc embedder --- .../sentence_transformers_document_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/haystack/components/embedders/sentence_transformers_document_embedder.py b/haystack/components/embedders/sentence_transformers_document_embedder.py index ee7d2fa526..ed29740c24 100644 --- a/haystack/components/embedders/sentence_transformers_document_embedder.py +++ b/haystack/components/embedders/sentence_transformers_document_embedder.py @@ -29,7 +29,7 @@ class SentenceTransformersDocumentEmbedder: def __init__( self, - model_name_or_path: str = "sentence-transformers/all-mpnet-base-v2", + model: str = "sentence-transformers/all-mpnet-base-v2", device: Optional[str] = None, token: Union[bool, str, None] = None, prefix: str = "", @@ -43,7 +43,7 @@ def __init__( """ Create a SentenceTransformersDocumentEmbedder component. - :param model_name_or_path: Local path or name of the model in Hugging Face's model hub, + :param model: Local path or name of the model in Hugging Face's model hub, such as ``'sentence-transformers/all-mpnet-base-v2'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. Defaults to CPU. @@ -61,7 +61,7 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document content. """ - self.model_name_or_path = model_name_or_path + self.model = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.token = token @@ -77,7 +77,7 @@ def _get_telemetry_data(self) -> Dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ - return {"model": self.model_name_or_path} + return {"model": self.model} def to_dict(self) -> Dict[str, Any]: """ @@ -85,7 +85,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model_name_or_path=self.model_name_or_path, + model=self.model, device=self.device, token=self.token if not isinstance(self.token, str) else None, # don't serialize valid tokens prefix=self.prefix, @@ -103,7 +103,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.token + model=self.model, device=self.device, use_auth_token=self.token ) @component.output_types(documents=List[Document]) From 93fa4391f7ffcccecae9938e5d297290584a5836 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 13:26:45 +0100 Subject: [PATCH 06/17] fix tests for st doc embedder --- ...sentence_transformers_document_embedder.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/test/components/embedders/test_sentence_transformers_document_embedder.py b/test/components/embedders/test_sentence_transformers_document_embedder.py index 896ada124c..3495ee1c18 100644 --- a/test/components/embedders/test_sentence_transformers_document_embedder.py +++ b/test/components/embedders/test_sentence_transformers_document_embedder.py @@ -8,8 +8,8 @@ class TestSentenceTransformersDocumentEmbedder: def test_init_default(self): - embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") - assert embedder.model_name_or_path == "model" + embedder = SentenceTransformersDocumentEmbedder(model="model") + assert embedder.model == "model" assert embedder.device == "cpu" assert embedder.token is None assert embedder.prefix == "" @@ -22,7 +22,7 @@ def test_init_default(self): def test_init_with_parameters(self): embedder = SentenceTransformersDocumentEmbedder( - model_name_or_path="model", + model="model", device="cuda", token=True, prefix="prefix", @@ -33,7 +33,7 @@ def test_init_with_parameters(self): meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) - assert embedder.model_name_or_path == "model" + assert embedder.model == "model" assert embedder.device == "cuda" assert embedder.token is True assert embedder.prefix == "prefix" @@ -45,12 +45,12 @@ def test_init_with_parameters(self): assert embedder.embedding_separator == " | " def test_to_dict(self): - component = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + component = SentenceTransformersDocumentEmbedder(model="model") data = component.to_dict() assert data == { "type": "haystack.components.embedders.sentence_transformers_document_embedder.SentenceTransformersDocumentEmbedder", "init_parameters": { - "model_name_or_path": "model", + "model": "model", "device": "cpu", "token": None, "prefix": "", @@ -65,7 +65,7 @@ def test_to_dict(self): def test_to_dict_with_custom_init_parameters(self): component = SentenceTransformersDocumentEmbedder( - model_name_or_path="model", + model="model", device="cuda", token="the-token", prefix="prefix", @@ -81,7 +81,7 @@ def test_to_dict_with_custom_init_parameters(self): assert data == { "type": "haystack.components.embedders.sentence_transformers_document_embedder.SentenceTransformersDocumentEmbedder", "init_parameters": { - "model_name_or_path": "model", + "model": "model", "device": "cuda", "token": None, # the token is not serialized "prefix": "prefix", @@ -98,25 +98,23 @@ def test_to_dict_with_custom_init_parameters(self): "haystack.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" ) def test_warmup(self, mocked_factory): - embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + embedder = SentenceTransformersDocumentEmbedder(model="model") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() - mocked_factory.get_embedding_backend.assert_called_once_with( - model_name_or_path="model", device="cpu", use_auth_token=None - ) + mocked_factory.get_embedding_backend.assert_called_once_with(model="model", device="cpu", use_auth_token=None) @patch( "haystack.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" ) def test_warmup_doesnt_reload(self, mocked_factory): - embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + embedder = SentenceTransformersDocumentEmbedder(model="model") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once() def test_run(self): - embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + embedder = SentenceTransformersDocumentEmbedder(model="model") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() @@ -132,7 +130,7 @@ def test_run(self): assert isinstance(doc.embedding[0], float) def test_run_wrong_input_format(self): - embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + embedder = SentenceTransformersDocumentEmbedder(model="model") string_input = "text" list_integers_input = [1, 2, 3] @@ -149,7 +147,7 @@ def test_run_wrong_input_format(self): def test_embed_metadata(self): embedder = SentenceTransformersDocumentEmbedder( - model_name_or_path="model", meta_fields_to_embed=["meta_field"], embedding_separator="\n" + model="model", meta_fields_to_embed=["meta_field"], embedding_separator="\n" ) embedder.embedding_backend = MagicMock() @@ -172,7 +170,7 @@ def test_embed_metadata(self): def test_prefix_suffix(self): embedder = SentenceTransformersDocumentEmbedder( - model_name_or_path="model", + model="model", prefix="my_prefix ", suffix=" my_suffix", meta_fields_to_embed=["meta_field"], From 13a96b763d65ae3a133d87c62a2c238f6d5b423f Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:04:49 +0100 Subject: [PATCH 07/17] rename model parameter in the st backend --- .../backends/sentence_transformers_backend.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/haystack/components/embedders/backends/sentence_transformers_backend.py b/haystack/components/embedders/backends/sentence_transformers_backend.py index b9d80a1570..08ebf9d6a0 100644 --- a/haystack/components/embedders/backends/sentence_transformers_backend.py +++ b/haystack/components/embedders/backends/sentence_transformers_backend.py @@ -14,15 +14,13 @@ class _SentenceTransformersEmbeddingBackendFactory: _instances: Dict[str, "_SentenceTransformersEmbeddingBackend"] = {} @staticmethod - def get_embedding_backend( - model_name_or_path: str, device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None - ): - embedding_backend_id = f"{model_name_or_path}{device}{use_auth_token}" + def get_embedding_backend(model: str, device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None): + embedding_backend_id = f"{model}{device}{use_auth_token}" if embedding_backend_id in _SentenceTransformersEmbeddingBackendFactory._instances: return _SentenceTransformersEmbeddingBackendFactory._instances[embedding_backend_id] embedding_backend = _SentenceTransformersEmbeddingBackend( - model_name_or_path=model_name_or_path, device=device, use_auth_token=use_auth_token + model=model, device=device, use_auth_token=use_auth_token ) _SentenceTransformersEmbeddingBackendFactory._instances[embedding_backend_id] = embedding_backend return embedding_backend @@ -33,13 +31,9 @@ class _SentenceTransformersEmbeddingBackend: Class to manage Sentence Transformers embeddings. """ - def __init__( - self, model_name_or_path: str, device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None - ): + def __init__(self, model: str, device: Optional[str] = None, use_auth_token: Union[bool, str, None] = None): sentence_transformers_import.check() - self.model = SentenceTransformer( - model_name_or_path=model_name_or_path, device=device, use_auth_token=use_auth_token - ) + self.model = SentenceTransformer(model_name_or_path=model, device=device, use_auth_token=use_auth_token) def embed(self, data: List[str], **kwargs) -> List[List[float]]: embeddings = self.model.encode(data, **kwargs).tolist() From ff0bc039ccfb71ab863e6c350df0f679125c42fc Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:05:07 +0100 Subject: [PATCH 08/17] fix tests for st backend --- .../test_sentence_transformers_embedding_backend.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/components/embedders/test_sentence_transformers_embedding_backend.py b/test/components/embedders/test_sentence_transformers_embedding_backend.py index 64f96cd734..36e396d314 100644 --- a/test/components/embedders/test_sentence_transformers_embedding_backend.py +++ b/test/components/embedders/test_sentence_transformers_embedding_backend.py @@ -8,11 +8,11 @@ @patch("haystack.components.embedders.backends.sentence_transformers_backend.SentenceTransformer") def test_factory_behavior(mock_sentence_transformer): embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path="my_model", device="cpu" + model="my_model", device="cpu" ) same_embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend("my_model", "cpu") another_embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path="another_model", device="cpu" + model="another_model", device="cpu" ) assert same_embedding_backend is embedding_backend @@ -22,7 +22,7 @@ def test_factory_behavior(mock_sentence_transformer): @patch("haystack.components.embedders.backends.sentence_transformers_backend.SentenceTransformer") def test_model_initialization(mock_sentence_transformer): _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path="model", device="cpu", use_auth_token="my_token" + model="model", device="cpu", use_auth_token="my_token" ) mock_sentence_transformer.assert_called_once_with( model_name_or_path="model", device="cpu", use_auth_token="my_token" @@ -31,7 +31,7 @@ def test_model_initialization(mock_sentence_transformer): @patch("haystack.components.embedders.backends.sentence_transformers_backend.SentenceTransformer") def test_embedding_function_with_kwargs(mock_sentence_transformer): - embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend(model_name_or_path="model") + embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend(model="model") data = ["sentence1", "sentence2"] embedding_backend.embed(data=data, normalize_embeddings=True) From 57ebc48f89eaf05d72f2692b91c226244babe651 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:07:09 +0100 Subject: [PATCH 09/17] rename model parameter in the st text embedder --- .../embedders/sentence_transformers_text_embedder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/haystack/components/embedders/sentence_transformers_text_embedder.py b/haystack/components/embedders/sentence_transformers_text_embedder.py index f54331449f..721afaa25c 100644 --- a/haystack/components/embedders/sentence_transformers_text_embedder.py +++ b/haystack/components/embedders/sentence_transformers_text_embedder.py @@ -28,7 +28,7 @@ class SentenceTransformersTextEmbedder: def __init__( self, - model_name_or_path: str = "sentence-transformers/all-mpnet-base-v2", + model: str = "sentence-transformers/all-mpnet-base-v2", device: Optional[str] = None, token: Union[bool, str, None] = None, prefix: str = "", @@ -40,7 +40,7 @@ def __init__( """ Create a SentenceTransformersTextEmbedder component. - :param model_name_or_path: Local path or name of the model in Hugging Face's model hub, + :param model: Local path or name of the model in Hugging Face's model hub, such as ``'sentence-transformers/all-mpnet-base-v2'``. :param device: Device (like 'cuda' / 'cpu') that should be used for computation. Defaults to CPU. @@ -56,7 +56,7 @@ def __init__( :param normalize_embeddings: If set to true, returned vectors will have length 1. """ - self.model_name_or_path = model_name_or_path + self.model = model # TODO: remove device parameter and use Haystack's device management once migrated self.device = device or "cpu" self.token = token @@ -70,7 +70,7 @@ def _get_telemetry_data(self) -> Dict[str, Any]: """ Data that is sent to Posthog for usage analytics. """ - return {"model": self.model_name_or_path} + return {"model": self.model} def to_dict(self) -> Dict[str, Any]: """ @@ -78,7 +78,7 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model_name_or_path=self.model_name_or_path, + model=self.model, device=self.device, token=self.token if not isinstance(self.token, str) else None, # don't serialize valid tokens prefix=self.prefix, @@ -94,7 +94,7 @@ def warm_up(self): """ if not hasattr(self, "embedding_backend"): self.embedding_backend = _SentenceTransformersEmbeddingBackendFactory.get_embedding_backend( - model_name_or_path=self.model_name_or_path, device=self.device, use_auth_token=self.token + model=self.model, device=self.device, use_auth_token=self.token ) @component.output_types(embedding=List[float]) From da82f83299af7141eb1bff7ada977aec775d0333 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:07:35 +0100 Subject: [PATCH 10/17] fix tests for st text embedder --- ...est_sentence_transformers_text_embedder.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/test/components/embedders/test_sentence_transformers_text_embedder.py b/test/components/embedders/test_sentence_transformers_text_embedder.py index ba763bfb91..53ac0fa6cf 100644 --- a/test/components/embedders/test_sentence_transformers_text_embedder.py +++ b/test/components/embedders/test_sentence_transformers_text_embedder.py @@ -8,8 +8,8 @@ class TestSentenceTransformersTextEmbedder: def test_init_default(self): - embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") - assert embedder.model_name_or_path == "model" + embedder = SentenceTransformersTextEmbedder(model="model") + assert embedder.model == "model" assert embedder.device == "cpu" assert embedder.token is None assert embedder.prefix == "" @@ -20,7 +20,7 @@ def test_init_default(self): def test_init_with_parameters(self): embedder = SentenceTransformersTextEmbedder( - model_name_or_path="model", + model="model", device="cuda", token=True, prefix="prefix", @@ -29,7 +29,7 @@ def test_init_with_parameters(self): progress_bar=False, normalize_embeddings=True, ) - assert embedder.model_name_or_path == "model" + assert embedder.model == "model" assert embedder.device == "cuda" assert embedder.token is True assert embedder.prefix == "prefix" @@ -39,12 +39,12 @@ def test_init_with_parameters(self): assert embedder.normalize_embeddings is True def test_to_dict(self): - component = SentenceTransformersTextEmbedder(model_name_or_path="model") + component = SentenceTransformersTextEmbedder(model="model") data = component.to_dict() assert data == { "type": "haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder", "init_parameters": { - "model_name_or_path": "model", + "model": "model", "device": "cpu", "token": None, "prefix": "", @@ -57,7 +57,7 @@ def test_to_dict(self): def test_to_dict_with_custom_init_parameters(self): component = SentenceTransformersTextEmbedder( - model_name_or_path="model", + model="model", device="cuda", token=True, prefix="prefix", @@ -70,7 +70,7 @@ def test_to_dict_with_custom_init_parameters(self): assert data == { "type": "haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder", "init_parameters": { - "model_name_or_path": "model", + "model": "model", "device": "cuda", "token": True, "prefix": "prefix", @@ -82,12 +82,12 @@ def test_to_dict_with_custom_init_parameters(self): } def test_to_dict_not_serialize_token(self): - component = SentenceTransformersTextEmbedder(model_name_or_path="model", token="awesome-token") + component = SentenceTransformersTextEmbedder(model="model", token="awesome-token") data = component.to_dict() assert data == { "type": "haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder", "init_parameters": { - "model_name_or_path": "model", + "model": "model", "device": "cpu", "token": None, "prefix": "", @@ -102,25 +102,23 @@ def test_to_dict_not_serialize_token(self): "haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory" ) def test_warmup(self, mocked_factory): - embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") + embedder = SentenceTransformersTextEmbedder(model="model") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() - mocked_factory.get_embedding_backend.assert_called_once_with( - model_name_or_path="model", device="cpu", use_auth_token=None - ) + mocked_factory.get_embedding_backend.assert_called_once_with(model="model", device="cpu", use_auth_token=None) @patch( "haystack.components.embedders.sentence_transformers_text_embedder._SentenceTransformersEmbeddingBackendFactory" ) def test_warmup_doesnt_reload(self, mocked_factory): - embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") + embedder = SentenceTransformersTextEmbedder(model="model") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once() def test_run(self): - embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") + embedder = SentenceTransformersTextEmbedder(model="model") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() @@ -133,7 +131,7 @@ def test_run(self): assert all(isinstance(el, float) for el in embedding) def test_run_wrong_input_format(self): - embedder = SentenceTransformersTextEmbedder(model_name_or_path="model") + embedder = SentenceTransformersTextEmbedder(model="model") embedder.embedding_backend = MagicMock() list_integers_input = [1, 2, 3] From af396ad4b9ed4375e311d912a345582a00bc0c8d Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:12:15 +0100 Subject: [PATCH 11/17] fix docstring --- haystack/components/joiners/document_joiner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/components/joiners/document_joiner.py b/haystack/components/joiners/document_joiner.py index 4a73c0923a..0cf2535d5e 100644 --- a/haystack/components/joiners/document_joiner.py +++ b/haystack/components/joiners/document_joiner.py @@ -31,7 +31,7 @@ class DocumentJoiner: p = Pipeline() p.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="bm25_retriever") p.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder", ) p.add_component(instance=InMemoryEmbeddingRetriever(document_store=document_store), name="embedding_retriever") From 4a842bb9ec1a93a351aa8f96665a34bf40359883 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:12:32 +0100 Subject: [PATCH 12/17] fix pipeline utils --- haystack/pipeline_utils/rag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/pipeline_utils/rag.py b/haystack/pipeline_utils/rag.py index 0f468a2888..fda4afcb6d 100644 --- a/haystack/pipeline_utils/rag.py +++ b/haystack/pipeline_utils/rag.py @@ -92,7 +92,7 @@ def resolve_embedder(embedding_model: str) -> SentenceTransformersTextEmbedder: :param embedding_model: The embedding model to use. """ try: - embedder = SentenceTransformersTextEmbedder(model_name_or_path=embedding_model) + embedder = SentenceTransformersTextEmbedder(model=embedding_model) except Exception: raise ValueError( f"Embedding model: {embedding_model} is not supported. Please provide a SentenceTransformers model." From 0c526b6f8200d9fb2bfa22e979870b786147100a Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:13:06 +0100 Subject: [PATCH 13/17] fix e2e --- e2e/pipelines/test_dense_doc_search.py | 6 ++---- e2e/pipelines/test_eval_dense_doc_search.py | 6 ++---- e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py | 3 +-- e2e/pipelines/test_eval_rag_pipelines.py | 7 +++---- e2e/pipelines/test_hybrid_doc_search_pipeline.py | 3 +-- e2e/pipelines/test_preprocessing_pipeline.py | 3 +-- e2e/pipelines/test_rag_pipelines.py | 5 ++--- examples/pipelines/indexing_pipeline.py | 3 +-- 8 files changed, 13 insertions(+), 23 deletions(-) diff --git a/e2e/pipelines/test_dense_doc_search.py b/e2e/pipelines/test_dense_doc_search.py index b83722b442..b116817aee 100644 --- a/e2e/pipelines/test_dense_doc_search.py +++ b/e2e/pipelines/test_dense_doc_search.py @@ -25,8 +25,7 @@ def test_dense_doc_search_pipeline(tmp_path, samples_path): instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter" ) indexing_pipeline.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="embedder", + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" ) indexing_pipeline.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="writer") @@ -60,8 +59,7 @@ def test_dense_doc_search_pipeline(tmp_path, samples_path): # Create the querying pipeline query_pipeline = Pipeline() query_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) query_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=filled_document_store, top_k=20), name="embedding_retriever" diff --git a/e2e/pipelines/test_eval_dense_doc_search.py b/e2e/pipelines/test_eval_dense_doc_search.py index d1631f4e2e..d70a4c5170 100644 --- a/e2e/pipelines/test_eval_dense_doc_search.py +++ b/e2e/pipelines/test_eval_dense_doc_search.py @@ -25,8 +25,7 @@ def test_dense_doc_search_pipeline(samples_path): instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter" ) indexing_pipeline.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="embedder", + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" ) indexing_pipeline.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="writer") @@ -45,8 +44,7 @@ def test_dense_doc_search_pipeline(samples_path): # Create the querying pipeline query_pipeline = Pipeline() query_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) query_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=filled_document_store, top_k=20), name="embedding_retriever" diff --git a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py b/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py index 8d942583b1..0cd838bf67 100644 --- a/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py +++ b/e2e/pipelines/test_eval_hybrid_doc_search_pipeline.py @@ -13,8 +13,7 @@ def test_hybrid_doc_search_pipeline(): hybrid_pipeline = Pipeline() hybrid_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="bm25_retriever") hybrid_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) hybrid_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=document_store), name="embedding_retriever" diff --git a/e2e/pipelines/test_eval_rag_pipelines.py b/e2e/pipelines/test_eval_rag_pipelines.py index 1a9f57f6be..85d44c8ac8 100644 --- a/e2e/pipelines/test_eval_rag_pipelines.py +++ b/e2e/pipelines/test_eval_rag_pipelines.py @@ -93,8 +93,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): """ rag_pipeline = Pipeline() rag_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) rag_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever" @@ -102,7 +101,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder") rag_pipeline.add_component( instance=HuggingFaceLocalGenerator( - model_name_or_path="google/flan-t5-small", + model="google/flan-t5-small", task="text2text-generation", generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True}, ), @@ -124,7 +123,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): document_store = rag_pipeline.get_component("retriever").document_store indexing_pipeline = Pipeline() indexing_pipeline.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="document_embedder", ) indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="document_writer") diff --git a/e2e/pipelines/test_hybrid_doc_search_pipeline.py b/e2e/pipelines/test_hybrid_doc_search_pipeline.py index cca572b285..477e9f731f 100644 --- a/e2e/pipelines/test_hybrid_doc_search_pipeline.py +++ b/e2e/pipelines/test_hybrid_doc_search_pipeline.py @@ -14,8 +14,7 @@ def test_hybrid_doc_search_pipeline(tmp_path): hybrid_pipeline = Pipeline() hybrid_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=document_store), name="bm25_retriever") hybrid_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) hybrid_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=document_store), name="embedding_retriever" diff --git a/e2e/pipelines/test_preprocessing_pipeline.py b/e2e/pipelines/test_preprocessing_pipeline.py index ebc246fb29..95bfc11245 100644 --- a/e2e/pipelines/test_preprocessing_pipeline.py +++ b/e2e/pipelines/test_preprocessing_pipeline.py @@ -25,8 +25,7 @@ def test_preprocessing_pipeline(tmp_path): instance=DocumentSplitter(split_by="sentence", split_length=1), name="splitter" ) preprocessing_pipeline.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="embedder", + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" ) preprocessing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="writer") preprocessing_pipeline.connect("file_type_router.text/plain", "text_file_converter.sources") diff --git a/e2e/pipelines/test_rag_pipelines.py b/e2e/pipelines/test_rag_pipelines.py index 2d4cb30066..4fdc99ef7f 100644 --- a/e2e/pipelines/test_rag_pipelines.py +++ b/e2e/pipelines/test_rag_pipelines.py @@ -95,8 +95,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): """ rag_pipeline = Pipeline() rag_pipeline.add_component( - instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="text_embedder", + instance=SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="text_embedder" ) rag_pipeline.add_component( instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever" @@ -131,7 +130,7 @@ def test_embedding_retrieval_rag_pipeline(tmp_path): document_store = rag_pipeline.get_component("retriever").document_store indexing_pipeline = Pipeline() indexing_pipeline.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="document_embedder", ) indexing_pipeline.add_component(instance=DocumentWriter(document_store=document_store), name="document_writer") diff --git a/examples/pipelines/indexing_pipeline.py b/examples/pipelines/indexing_pipeline.py index 38ed6e081d..782e0dc584 100644 --- a/examples/pipelines/indexing_pipeline.py +++ b/examples/pipelines/indexing_pipeline.py @@ -20,8 +20,7 @@ p.add_component(instance=DocumentCleaner(), name="cleaner") p.add_component(instance=DocumentSplitter(split_by="sentence", split_length=250, split_overlap=30), name="splitter") p.add_component( - instance=SentenceTransformersDocumentEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"), - name="embedder", + instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="embedder" ) p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="writer") From ba531f83425ed91c5b08c8f269f68844ffff48bc Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:19:56 +0100 Subject: [PATCH 14/17] reno --- .../notes/rename-model-param-embedders-7cc87a768554724d.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 releasenotes/notes/rename-model-param-embedders-7cc87a768554724d.yaml diff --git a/releasenotes/notes/rename-model-param-embedders-7cc87a768554724d.yaml b/releasenotes/notes/rename-model-param-embedders-7cc87a768554724d.yaml new file mode 100644 index 0000000000..e5a2587321 --- /dev/null +++ b/releasenotes/notes/rename-model-param-embedders-7cc87a768554724d.yaml @@ -0,0 +1,3 @@ +--- +upgrade: + - Rename the embedder parameters `model_name` and `model_name_or_path` to `model`. This change affects all Embedder classes. From c556146bd3370ec26e51a623de06ae94bc906378 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:46:55 +0100 Subject: [PATCH 15/17] fix the indexing pipeline _create_embedder function --- haystack/pipeline_utils/indexing.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/haystack/pipeline_utils/indexing.py b/haystack/pipeline_utils/indexing.py index 21d5ad79bd..fb6c389ff4 100644 --- a/haystack/pipeline_utils/indexing.py +++ b/haystack/pipeline_utils/indexing.py @@ -197,22 +197,10 @@ def _find_embedder(self, embedding_model: str, init_kwargs: Optional[Dict[str, A ) return self._create_embedder(embedder_class, embedding_model, init_kwargs) - def _create_embedder( - self, embedder_class: Type, model_name: str, init_kwargs: Optional[Dict[str, Any]] = None - ) -> Any: - init_signature = inspect.signature(embedder_class.__init__) - - kwargs = {**(init_kwargs or {})} - - # Determine the correct parameter name and set it - if "model_name_or_path" in init_signature.parameters: - kwargs["model_name_or_path"] = model_name - elif "model_name" in init_signature.parameters: - kwargs["model_name"] = model_name - else: - raise ValueError(f"Could not find a parameter for the model name in the embedder class {embedder_class}") - - # Instantiate the class + def _create_embedder(self, embedder_class: Type, model: str, init_kwargs: Optional[Dict[str, Any]] = None) -> Any: + # Note: here we assume the embedder accepts a parameter called `model` that takes the model's name or path. + # See https://github.com/deepset-ai/haystack/issues/6534 + kwargs = {**(init_kwargs or {}), "model": model} return embedder_class(**kwargs) def _list_files_recursively(self, path: Union[str, Path]) -> List[str]: From d7250e52c8239ca5c65f37416f38cd933c7cd08b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 14:49:29 +0100 Subject: [PATCH 16/17] fix e2e eval rag pipeline --- e2e/pipelines/test_eval_rag_pipelines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/pipelines/test_eval_rag_pipelines.py b/e2e/pipelines/test_eval_rag_pipelines.py index 85d44c8ac8..5fa50be4e5 100644 --- a/e2e/pipelines/test_eval_rag_pipelines.py +++ b/e2e/pipelines/test_eval_rag_pipelines.py @@ -28,7 +28,7 @@ def test_bm25_rag_pipeline(tmp_path): rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder") rag_pipeline.add_component( instance=HuggingFaceLocalGenerator( - model_name_or_path="google/flan-t5-small", + model="google/flan-t5-small", task="text2text-generation", generation_kwargs={"max_new_tokens": 100, "temperature": 0.5, "do_sample": True}, ), From 04591c24ed30b0f521aefb89b33c9a074245bc43 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 12 Jan 2024 15:04:21 +0100 Subject: [PATCH 17/17] pytest --- haystack/pipeline_utils/indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/pipeline_utils/indexing.py b/haystack/pipeline_utils/indexing.py index fb6c389ff4..bccdf526ac 100644 --- a/haystack/pipeline_utils/indexing.py +++ b/haystack/pipeline_utils/indexing.py @@ -1,4 +1,3 @@ -import inspect import os import re from pathlib import Path