From 826667281f7c19d51187c8a7c05ee9a9c7a3eec0 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 14 Nov 2024 16:15:24 +0100 Subject: [PATCH 01/11] Lots of fixes --- .../embedders/nvidia/document_embedder.py | 19 ++++++-- .../embedders/nvidia/text_embedder.py | 28 +++++++++-- .../components/generators/nvidia/generator.py | 11 ++++- .../components/rankers/nvidia/ranker.py | 48 +++++++++++++++++-- .../utils/nvidia/nim_backend.py | 23 +++++---- .../nvidia/tests/test_document_embedder.py | 6 ++- integrations/nvidia/tests/test_ranker.py | 4 ++ 7 files changed, 116 insertions(+), 23 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 606ec78fd..39c24a77b 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -2,16 +2,19 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Tuple, Union -from haystack import Document, component, default_from_dict, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace from tqdm import tqdm from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation +logger = logging.getLogger(__name__) + _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -47,6 +50,7 @@ def __init__( meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaTextEmbedder component. @@ -76,6 +80,9 @@ def __init__( :param truncate: Specifies how inputs longer that the maximum token length should be truncated. If None the behavior is model-dependent, see the official documentation for more information. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self.api_key = api_key @@ -98,6 +105,10 @@ def __init__( if is_hosted(api_url) and not self.model: # manually set default model self.model = "nvidia/nv-embedqa-e5-v5" + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def default_model(self): """Set default model in local NIM mode.""" valid_models = [ @@ -128,10 +139,11 @@ def warm_up(self): if self.truncate is not None: model_kwargs["truncate"] = str(self.truncate) self.backend = NimBackend( - self.model, + model=self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) self._initialized = True @@ -238,8 +250,7 @@ def run(self, documents: List[Document]): for doc in documents: if not doc.content: - msg = f"Document '{doc.id}' has no content to embed." - raise ValueError(msg) + logger.warning(f"Document '{doc.id}' has no content to embed.") texts_to_embed = self._prepare_texts_to_embed(documents) embeddings, metadata = self._embed_batch(texts_to_embed, self.batch_size) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index 4b7072f33..d9cbc84f5 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -2,15 +2,18 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Union -from haystack import component, default_from_dict, default_to_dict +from haystack import component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation +logger = logging.getLogger(__name__) + _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -44,6 +47,7 @@ def __init__( prefix: str = "", suffix: str = "", truncate: Optional[Union[EmbeddingTruncateMode, str]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaTextEmbedder component. @@ -64,6 +68,9 @@ def __init__( :param truncate: Specifies how inputs longer that the maximum token length should be truncated. If None the behavior is model-dependent, see the official documentation for more information. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self.api_key = api_key @@ -82,6 +89,10 @@ def __init__( if is_hosted(api_url) and not self.model: # manually set default model self.model = "nvidia/nv-embedqa-e5-v5" + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def default_model(self): """Set default model in local NIM mode.""" valid_models = [ @@ -89,6 +100,12 @@ def default_model(self): ] name = next(iter(valid_models), None) if name: + logger.warning( + "Default model is set as: {model_name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + model_name=name + ) warnings.warn( f"Default model is set as: {name}. \n" "Set model using model parameter. \n" @@ -112,10 +129,11 @@ def warm_up(self): if self.truncate is not None: model_kwargs["truncate"] = str(self.truncate) self.backend = NimBackend( - self.model, + model=self.model, api_url=self.api_url, api_key=self.api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) self._initialized = True @@ -150,7 +168,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": :returns: The deserialized component. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) + init_parameters = data.get("init_parameters", {}) + if init_parameters: + deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @component.output_types(embedding=List[float], meta=Dict[str, Any]) @@ -162,7 +182,7 @@ def run(self, text: str): The text to embed. :returns: A dictionary with the following keys and values: - - `embedding` - Embeddng of the text. + - `embedding` - Embedding of the text. - `meta` - Metadata on usage statistics, etc. :raises RuntimeError: If the component was not initialized. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 5bf71a9e1..5047d0682 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional @@ -49,6 +50,7 @@ def __init__( api_url: str = _DEFAULT_API_URL, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, ): """ Create a NvidiaGenerator component. @@ -70,6 +72,9 @@ def __init__( specific to a model. Search your model in the [NVIDIA NIM](https://ai.nvidia.com) to find the arguments it accepts. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ self._model = model self._api_url = url_validation(api_url, _DEFAULT_API_URL, ["v1/chat/completions"]) @@ -79,6 +84,9 @@ def __init__( self._backend: Optional[Any] = None self.is_hosted = is_hosted(api_url) + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout def default_model(self): """Set default model in local NIM mode.""" @@ -110,10 +118,11 @@ def warm_up(self): msg = "API key is required for hosted NVIDIA NIMs." raise ValueError(msg) self._backend = NimBackend( - self._model, + model=self._model, api_url=self._api_url, api_key=self._api_key, model_kwargs=self._model_arguments, + timeout=self.timeout, ) if not self.is_hosted and not self._model: diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 9938b37d1..ca4181dc5 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import warnings from typing import Any, Dict, List, Optional, Union @@ -58,6 +59,11 @@ def __init__( api_url: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), top_k: int = 5, + query_prefix: str = "", + document_prefix: str = "", + meta_fields_to_embed: Optional[List[str]] = None, + embedding_separator: str = "\n", + timeout: Optional[float] = None, ): """ Create a NvidiaRanker component. @@ -72,6 +78,19 @@ def __init__( Custom API URL for the NVIDIA NIM. :param top_k: Number of documents to return. + :param query_prefix: + A string to add at the beginning of the query text before ranking. + Use it to prepend the text with an instruction, as required by reranking models like `bge`. + :param document_prefix: + A string to add at the beginning of each document before ranking. You can use it to prepend the document + with an instruction, as required by embedding models like `bge`. + :param meta_fields_to_embed: + List of metadata fields to embed with the document. + :param embedding_separator: + Separator to concatenate metadata fields to the document. + :param timeout: + Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable + or set to 60 by default. """ if model is not None and not isinstance(model, str): msg = "Ranker expects the `model` parameter to be a string." @@ -107,6 +126,14 @@ def __init__( self._initialized = False self._backend: Optional[Any] = None + self.query_prefix = query_prefix + self.document_prefix = document_prefix + self.meta_fields_to_embed = meta_fields_to_embed or [] + self.embedding_separator = embedding_separator + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0)) + self.timeout = timeout + def to_dict(self) -> Dict[str, Any]: """ Serialize the ranker to a dictionary. @@ -120,6 +147,10 @@ def to_dict(self) -> Dict[str, Any]: truncate=self._truncate, api_url=self._api_url, api_key=self._api_key.to_dict() if self._api_key else None, + query_prefix=self.query_prefix, + document_prefix=self.document_prefix, + meta_fields_to_embed=self.meta_fields_to_embed, + embedding_separator=self.embedding_separator, ) @classmethod @@ -146,10 +177,11 @@ def warm_up(self): if self._truncate is not None: model_kwargs.update(truncate=str(self._truncate)) self._backend = NimBackend( - self._model, + model=self._model, api_url=self._api_url, api_key=self._api_key, model_kwargs=model_kwargs, + timeout=self.timeout, ) if not self._model: self._model = _DEFAULT_MODEL @@ -200,10 +232,20 @@ def run( return {"documents": []} assert self._backend is not None + + query_text = self.query_prefix + query + document_texts = [] + for doc in documents: + meta_values_to_embed = [ + str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta[key] + ] + text_to_embed = self.embedding_separator.join(meta_values_to_embed + [doc.content or ""]) + document_texts.append(self.document_prefix + text_to_embed) + # rank result is list[{index: int, logit: float}] sorted by logit sorted_indexes_and_scores = self._backend.rank( - query, - documents, + query_text=query_text, + document_texts=document_texts, endpoint=self._endpoint, ) sorted_documents = [] diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 0279cf608..290242354 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import os from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple @@ -9,7 +10,7 @@ from haystack import Document from haystack.utils import Secret -REQUEST_TIMEOUT = 60 +REQUEST_TIMEOUT = 60.0 @dataclass @@ -35,6 +36,7 @@ def __init__( api_url: str, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_kwargs: Optional[Dict[str, Any]] = None, + timeout: Optional[float] = None, ): headers = { "Content-Type": "application/json", @@ -50,6 +52,9 @@ def __init__( self.model = model self.api_url = api_url self.model_kwargs = model_kwargs or {} + if timeout is None: + timeout = float(os.environ.get("NVIDIA_TIMEOUT", REQUEST_TIMEOUT)) + self.timeout = timeout def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: url = f"{self.api_url}/embeddings" @@ -62,7 +67,7 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: "input": texts, **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: @@ -94,7 +99,7 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: ], **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: @@ -132,7 +137,7 @@ def models(self) -> List[Model]: res = self.session.get( url, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() @@ -145,8 +150,8 @@ def models(self) -> List[Model]: def rank( self, - query: str, - documents: List[Document], + query_text: str, + document_texts: List[str], endpoint: Optional[str] = None, ) -> List[Dict[str, Any]]: url = endpoint or f"{self.api_url}/ranking" @@ -156,11 +161,11 @@ def rank( url, json={ "model": self.model, - "query": {"text": query}, - "passages": [{"text": doc.content} for doc in documents], + "query": {"text": query_text}, + "passages": [{"text": text} for text in document_texts], **self.model_kwargs, }, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) res.raise_for_status() except requests.HTTPError as e: diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 7e0e02f3d..3b03d0115 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -347,7 +347,7 @@ def test_run_wrong_input_format(self): with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=list_integers_input) - def test_run_empty_document(self): + def test_run_empty_document(self, caplog): model = "playground_nvolveqa_40k" api_key = Secret.from_token("fake-api-key") embedder = NvidiaDocumentEmbedder(model, api_key=api_key) @@ -355,8 +355,10 @@ def test_run_empty_document(self): embedder.warm_up() embedder.backend = MockBackend(model=model, api_key=api_key) - with pytest.raises(ValueError, match="no content to embed"): + # Write check using caplog that a logger.warning is raised + with caplog.at_level("WARNING"): embedder.run(documents=[Document(content="")]) + assert "has no content to embed." in caplog.text def test_run_on_empty_list(self): model = "playground_nvolveqa_40k" diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index d66bb0f65..50b9651e0 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -271,6 +271,10 @@ def test_to_dict(self) -> None: "truncate": None, "api_url": None, "api_key": {"type": "env_var", "env_vars": ["NVIDIA_API_KEY"], "strict": True}, + "query_prefix": "", + "document_prefix": "", + "meta_fields_to_embed": [], + "embedding_separator": "\n", }, } From 7794e8fe4f2bfcf961f4f8d925b4d3ffb31d01c9 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 14 Nov 2024 16:20:14 +0100 Subject: [PATCH 02/11] Remove unused import --- .../nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 290242354..19f8302c3 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -7,7 +7,6 @@ from typing import Any, Dict, List, Optional, Tuple import requests -from haystack import Document from haystack.utils import Secret REQUEST_TIMEOUT = 60.0 From 0976f64b8f75c6e05c54d9640a87019e8942974e Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 14 Nov 2024 16:22:30 +0100 Subject: [PATCH 03/11] Fix readme --- integrations/nvidia/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/nvidia/README.md b/integrations/nvidia/README.md index e28f0ede9..558c34d28 100644 --- a/integrations/nvidia/README.md +++ b/integrations/nvidia/README.md @@ -38,7 +38,7 @@ hatch run test To only run unit tests: ``` -hatch run test -m"not integration" +hatch run test -m "not integration" ``` To run the linters `ruff` and `mypy`: From 5bd1e78872daecdc8a1fe5ed804622397eae0740 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 14 Nov 2024 16:27:07 +0100 Subject: [PATCH 04/11] linting --- .../components/embedders/nvidia/text_embedder.py | 2 +- .../components/rankers/nvidia/ranker.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index d9cbc84f5..a0bc0f203 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -104,7 +104,7 @@ def default_model(self): "Default model is set as: {model_name}. \n" "Set model using model parameter. \n" "To get available models use available_models property.", - model_name=name + model_name=name, ) warnings.warn( f"Default model is set as: {name}. \n" diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index ca4181dc5..ba360b5a7 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -237,9 +237,11 @@ def run( document_texts = [] for doc in documents: meta_values_to_embed = [ - str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta[key] + str(doc.meta[key]) + for key in self.meta_fields_to_embed + if key in doc.meta and doc.meta[key] # noqa: RUF019 ] - text_to_embed = self.embedding_separator.join(meta_values_to_embed + [doc.content or ""]) + text_to_embed = self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) document_texts.append(self.document_prefix + text_to_embed) # rank result is list[{index: int, logit: float}] sorted by logit From 80e38d4b02a7dd0cba76c215c109ec41902682ba Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 15 Nov 2024 11:51:03 +0100 Subject: [PATCH 05/11] Add more logging --- .../utils/nvidia/nim_backend.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 19f8302c3..15b35e4b2 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -7,8 +7,11 @@ from typing import Any, Dict, List, Optional, Tuple import requests +from haystack import logging from haystack.utils import Secret +logger = logging.getLogger(__name__) + REQUEST_TIMEOUT = 60.0 @@ -70,6 +73,7 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM embedding endpoint: Error - {error}", error=e.response.text) msg = f"Failed to query embedding endpoint: Error - {e.response.text}" raise ValueError(msg) from e @@ -102,6 +106,7 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM chat completion endpoint: Error - {error}", error=e.response.text) msg = f"Failed to query chat completion endpoint: Error - {e.response.text}" raise ValueError(msg) from e @@ -143,6 +148,7 @@ def models(self) -> List[Model]: data = res.json()["data"] models = [Model(element["id"]) for element in data if "id" in element] if not models: + logger.error("No hosted model were found at URL '{u}'.", u=url) msg = f"No hosted model were found at URL '{url}'." raise ValueError(msg) return models @@ -168,10 +174,14 @@ def rank( ) res.raise_for_status() except requests.HTTPError as e: + logger.error("Error when calling NIM ranking endpoint: Error - {error}", error=e.response.text) msg = f"Failed to rank endpoint: Error - {e.response.text}" raise ValueError(msg) from e data = res.json() - assert "rankings" in data, f"Expected 'rankings' in response, got {data}" + if "rankings" not in data: + logger.error("Expected 'rankings' in response, got {d}", d=data) + msg = f"Expected 'rankings' in response, got {data}" + raise ValueError(msg) return data["rankings"] From b7e0149aee7b7d6d70f834b0643480955907cb2d Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 15 Nov 2024 11:58:57 +0100 Subject: [PATCH 06/11] Follow same private/public attribute as other components --- .../components/rankers/nvidia/ranker.py | 46 +++++++++---------- integrations/nvidia/tests/test_ranker.py | 34 +++++++------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index ba360b5a7..9bd2acf43 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -105,24 +105,24 @@ def __init__( raise TypeError(msg) # todo: detect default in non-hosted case (when api_url is provided) - self._model = model or _DEFAULT_MODEL - self._truncate = truncate - self._api_key = api_key + self.model = model or _DEFAULT_MODEL + self.truncate = truncate + self.api_key = api_key # if no api_url is provided, we're using a hosted model and can # - assume the default url will work, because there's only one model # - assume we won't call backend.models() if api_url is not None: - self._api_url = url_validation(api_url, None, ["v1/ranking"]) - self._endpoint = None # we let backend.rank() handle the endpoint + self.api_url = url_validation(api_url, None, ["v1/ranking"]) + self.endpoint = None # we let backend.rank() handle the endpoint else: - if self._model not in _MODEL_ENDPOINT_MAP: + if self.model not in _MODEL_ENDPOINT_MAP: msg = f"Model '{model}' is unknown. Please provide an api_url to access it." raise ValueError(msg) - self._api_url = None # we handle the endpoint - self._endpoint = _MODEL_ENDPOINT_MAP[self._model] + self.api_url = None # we handle the endpoint + self.endpoint = _MODEL_ENDPOINT_MAP[self.model] if api_key is None: self._api_key = Secret.from_env_var("NVIDIA_API_KEY") - self._top_k = top_k + self.top_k = top_k self._initialized = False self._backend: Optional[Any] = None @@ -142,11 +142,11 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - model=self._model, - top_k=self._top_k, - truncate=self._truncate, - api_url=self._api_url, - api_key=self._api_key.to_dict() if self._api_key else None, + model=self.model, + top_k=self.top_k, + truncate=self.truncate, + api_url=self.api_url, + api_key=self.api_key.to_dict() if self.api_key else None, query_prefix=self.query_prefix, document_prefix=self.document_prefix, meta_fields_to_embed=self.meta_fields_to_embed, @@ -174,17 +174,17 @@ def warm_up(self): """ if not self._initialized: model_kwargs = {} - if self._truncate is not None: - model_kwargs.update(truncate=str(self._truncate)) + if self.truncate is not None: + model_kwargs.update(truncate=str(self.truncate)) self._backend = NimBackend( - model=self._model, - api_url=self._api_url, - api_key=self._api_key, + model=self.model, + api_url=self.api_url, + api_key=self.api_key, model_kwargs=model_kwargs, timeout=self.timeout, ) - if not self._model: - self._model = _DEFAULT_MODEL + if not self.model: + self.model = _DEFAULT_MODEL self._initialized = True @component.output_types(documents=List[Document]) @@ -225,7 +225,7 @@ def run( if len(documents) == 0: return {"documents": []} - top_k = top_k if top_k is not None else self._top_k + top_k = top_k if top_k is not None else self.top_k if top_k < 1: logger.warning("top_k should be at least 1, returning nothing") warnings.warn("top_k should be at least 1, returning nothing", stacklevel=2) @@ -248,7 +248,7 @@ def run( sorted_indexes_and_scores = self._backend.rank( query_text=query_text, document_texts=document_texts, - endpoint=self._endpoint, + endpoint=self.endpoint, ) sorted_documents = [] for item in sorted_indexes_and_scores[:top_k]: diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index 50b9651e0..8b51bb59c 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -19,8 +19,8 @@ class TestNvidiaRanker: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") client = NvidiaRanker() - assert client._model == _DEFAULT_MODEL - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == _DEFAULT_MODEL + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") def test_init_with_parameters(self): client = NvidiaRanker( @@ -29,10 +29,10 @@ def test_init_with_parameters(self): top_k=3, truncate="END", ) - assert client._api_key == Secret.from_token("fake-api-key") - assert client._model == _DEFAULT_MODEL - assert client._top_k == 3 - assert client._truncate == RankerTruncateMode.END + assert client.api_key == Secret.from_token("fake-api-key") + assert client.model == _DEFAULT_MODEL + assert client.top_k == 3 + assert client.truncate == RankerTruncateMode.END def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) @@ -43,7 +43,7 @@ def test_init_fail_wo_api_key(self, monkeypatch): def test_init_pass_wo_api_key_w_api_url(self): url = "https://url.bogus/v1" client = NvidiaRanker(api_url=url) - assert client._api_url == url + assert client.api_url == url def test_warm_up_required(self): client = NvidiaRanker() @@ -291,11 +291,11 @@ def test_from_dict(self) -> None: }, } ) - assert client._model == "nvidia/nv-rerankqa-mistral-4b-v3" - assert client._top_k == 5 - assert client._truncate is None - assert client._api_url is None - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == "nvidia/nv-rerankqa-mistral-4b-v3" + assert client.top_k == 5 + assert client.truncate is None + assert client.api_url is None + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") def test_from_dict_defaults(self) -> None: client = NvidiaRanker.from_dict( @@ -304,8 +304,8 @@ def test_from_dict_defaults(self) -> None: "init_parameters": {}, } ) - assert client._model == "nvidia/nv-rerankqa-mistral-4b-v3" - assert client._top_k == 5 - assert client._truncate is None - assert client._api_url is None - assert client._api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.model == "nvidia/nv-rerankqa-mistral-4b-v3" + assert client.top_k == 5 + assert client.truncate is None + assert client.api_url is None + assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") From 8929f588d161e1ae1c699683678ffe401f75a79c Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 15 Nov 2024 14:50:55 +0100 Subject: [PATCH 07/11] Add tests --- integrations/nvidia/tests/test_document_embedder.py | 13 +++++++++++++ integrations/nvidia/tests/test_generator.py | 13 +++++++++++++ integrations/nvidia/tests/test_ranker.py | 13 +++++++++++++ integrations/nvidia/tests/test_text_embedder.py | 13 +++++++++++++ 4 files changed, 52 insertions(+) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 3b03d0115..c84f05494 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -374,6 +374,19 @@ def test_run_on_empty_list(self): assert result["documents"] is not None assert not result["documents"] # empty list + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + embedder = NvidiaDocumentEmbedder(timeout=10.) + embedder.warm_up() + assert embedder.backend.timeout == 10. + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + embedder = NvidiaDocumentEmbedder() + embedder.warm_up() + assert embedder.backend.timeout == 45. + @pytest.mark.skipif( not os.environ.get("NVIDIA_API_KEY", None), reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 055830ae5..3acaf9e63 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -124,6 +124,19 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): }, } + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + generator = NvidiaGenerator(timeout=10.) + generator.warm_up() + assert generator._backend.timeout == 10. + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + generator = NvidiaGenerator() + generator.warm_up() + assert generator._backend.timeout == 45. + @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and " diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index 8b51bb59c..ac56d388e 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -309,3 +309,16 @@ def test_from_dict_defaults(self) -> None: assert client.truncate is None assert client.api_url is None assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") + + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + client = NvidiaRanker(timeout=10.) + client.warm_up() + assert client._backend.timeout == 10. + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + client = NvidiaRanker() + client.warm_up() + assert client._backend.timeout == 45. diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 278fa5191..c3bc0d9e1 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -175,6 +175,19 @@ def test_run_empty_string(self): with pytest.raises(ValueError, match="empty string"): embedder.run(text="") + def test_setting_timeout(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + embedder = NvidiaTextEmbedder(timeout=10.) + embedder.warm_up() + assert embedder.backend.timeout == 10. + + def test_setting_timeout_env(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") + monkeypatch.setenv("NVIDIA_TIMEOUT", "45") + embedder = NvidiaTextEmbedder() + embedder.warm_up() + assert embedder.backend.timeout == 45. + @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " From e17944584d9cd0765a1791166e84c2c9ff2bcfc1 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 15 Nov 2024 14:53:10 +0100 Subject: [PATCH 08/11] Linting --- integrations/nvidia/tests/test_document_embedder.py | 6 +++--- integrations/nvidia/tests/test_generator.py | 6 +++--- integrations/nvidia/tests/test_ranker.py | 6 +++--- integrations/nvidia/tests/test_text_embedder.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index c84f05494..a8ecdf568 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -376,16 +376,16 @@ def test_run_on_empty_list(self): def test_setting_timeout(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaDocumentEmbedder(timeout=10.) + embedder = NvidiaDocumentEmbedder(timeout=10.0) embedder.warm_up() - assert embedder.backend.timeout == 10. + assert embedder.backend.timeout == 10.0 def test_setting_timeout_env(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") monkeypatch.setenv("NVIDIA_TIMEOUT", "45") embedder = NvidiaDocumentEmbedder() embedder.warm_up() - assert embedder.backend.timeout == 45. + assert embedder.backend.timeout == 45.0 @pytest.mark.skipif( not os.environ.get("NVIDIA_API_KEY", None), diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 3acaf9e63..414de4884 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -126,16 +126,16 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): def test_setting_timeout(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - generator = NvidiaGenerator(timeout=10.) + generator = NvidiaGenerator(timeout=10.0) generator.warm_up() - assert generator._backend.timeout == 10. + assert generator._backend.timeout == 10.0 def test_setting_timeout_env(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") monkeypatch.setenv("NVIDIA_TIMEOUT", "45") generator = NvidiaGenerator() generator.warm_up() - assert generator._backend.timeout == 45. + assert generator._backend.timeout == 45.0 @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index ac56d388e..799d12638 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -312,13 +312,13 @@ def test_from_dict_defaults(self) -> None: def test_setting_timeout(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - client = NvidiaRanker(timeout=10.) + client = NvidiaRanker(timeout=10.0) client.warm_up() - assert client._backend.timeout == 10. + assert client._backend.timeout == 10.0 def test_setting_timeout_env(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") monkeypatch.setenv("NVIDIA_TIMEOUT", "45") client = NvidiaRanker() client.warm_up() - assert client._backend.timeout == 45. + assert client._backend.timeout == 45.0 diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index c3bc0d9e1..a4867e766 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -177,16 +177,16 @@ def test_run_empty_string(self): def test_setting_timeout(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaTextEmbedder(timeout=10.) + embedder = NvidiaTextEmbedder(timeout=10.0) embedder.warm_up() - assert embedder.backend.timeout == 10. + assert embedder.backend.timeout == 10.0 def test_setting_timeout_env(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") monkeypatch.setenv("NVIDIA_TIMEOUT", "45") embedder = NvidiaTextEmbedder() embedder.warm_up() - assert embedder.backend.timeout == 45. + assert embedder.backend.timeout == 45.0 @pytest.mark.skipif( not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), From aa516068f3fd8c2c5cfa88d295ce0da240dd92db Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 15 Nov 2024 15:02:42 +0100 Subject: [PATCH 09/11] Add another test --- .../components/rankers/nvidia/ranker.py | 22 ++++++++++-------- integrations/nvidia/tests/test_ranker.py | 23 +++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 9bd2acf43..57a4faf40 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -187,6 +187,18 @@ def warm_up(self): self.model = _DEFAULT_MODEL self._initialized = True + def _prepare_documents_to_embed(self, documents: List[Document]) -> List[str]: + document_texts = [] + for doc in documents: + meta_values_to_embed = [ + str(doc.meta[key]) + for key in self.meta_fields_to_embed + if key in doc.meta and doc.meta[key] # noqa: RUF019 + ] + text_to_embed = self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) + document_texts.append(self.document_prefix + text_to_embed) + return document_texts + @component.output_types(documents=List[Document]) def run( self, @@ -234,15 +246,7 @@ def run( assert self._backend is not None query_text = self.query_prefix + query - document_texts = [] - for doc in documents: - meta_values_to_embed = [ - str(doc.meta[key]) - for key in self.meta_fields_to_embed - if key in doc.meta and doc.meta[key] # noqa: RUF019 - ] - text_to_embed = self.embedding_separator.join([*meta_values_to_embed, doc.content or ""]) - document_texts.append(self.document_prefix + text_to_embed) + document_texts = self._prepare_documents_to_embed(documents=documents) # rank result is list[{index: int, logit: float}] sorted by logit sorted_indexes_and_scores = self._backend.rank( diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index 799d12638..e1a2ad674 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -322,3 +322,26 @@ def test_setting_timeout_env(self, monkeypatch): client = NvidiaRanker() client.warm_up() assert client._backend.timeout == 45.0 + + def test_prepare_texts_to_embed_w_metadata(self): + documents = [ + Document(content=f"document number {i}:\ncontent", meta={"meta_field": f"meta_value {i}"}) for i in range(5) + ] + + ranker = NvidiaRanker( + model=None, + api_key=Secret.from_token("fake-api-key"), + meta_fields_to_embed=["meta_field"], + embedding_separator=" | ", + ) + + prepared_texts = ranker._prepare_documents_to_embed(documents) + + # note that newline is replaced by space + assert prepared_texts == [ + "meta_value 0 | document number 0:\ncontent", + "meta_value 1 | document number 1:\ncontent", + "meta_value 2 | document number 2:\ncontent", + "meta_value 3 | document number 3:\ncontent", + "meta_value 4 | document number 4:\ncontent", + ] From 9d2afcc6429cddfb256feda3ef8b8690176c96a3 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 21 Nov 2024 08:26:15 +0100 Subject: [PATCH 10/11] Add timeout to to_dict --- .../embedders/nvidia/document_embedder.py | 1 + .../components/embedders/nvidia/text_embedder.py | 1 + .../components/rankers/nvidia/ranker.py | 1 + .../nvidia/tests/test_document_embedder.py | 6 ++++++ integrations/nvidia/tests/test_ranker.py | 16 ++++++++++++++++ integrations/nvidia/tests/test_text_embedder.py | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 39c24a77b..c8f0ed318 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -170,6 +170,7 @@ def to_dict(self) -> Dict[str, Any]: meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, truncate=str(self.truncate) if self.truncate is not None else None, + timeout=self.timeout, ) @classmethod diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index a0bc0f203..a93aa8caa 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -156,6 +156,7 @@ def to_dict(self) -> Dict[str, Any]: prefix=self.prefix, suffix=self.suffix, truncate=str(self.truncate) if self.truncate is not None else None, + timeout=self.timeout, ) @classmethod diff --git a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py index 57a4faf40..66203a490 100644 --- a/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py +++ b/integrations/nvidia/src/haystack_integrations/components/rankers/nvidia/ranker.py @@ -151,6 +151,7 @@ def to_dict(self) -> Dict[str, Any]: document_prefix=self.document_prefix, meta_fields_to_embed=self.meta_fields_to_embed, embedding_separator=self.embedding_separator, + timeout=self.timeout, ) @classmethod diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index a8ecdf568..8c01f0759 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -75,6 +75,7 @@ def test_to_dict(self, monkeypatch): "meta_fields_to_embed": [], "embedding_separator": "\n", "truncate": None, + "timeout": 60.0, }, } @@ -90,6 +91,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): meta_fields_to_embed=["test_field"], embedding_separator=" | ", truncate=EmbeddingTruncateMode.END, + timeout=45.0, ) data = component.to_dict() assert data == { @@ -105,6 +107,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", "truncate": "END", + "timeout": 45.0, }, } @@ -123,6 +126,7 @@ def test_from_dict(self, monkeypatch): "meta_fields_to_embed": ["test_field"], "embedding_separator": " | ", "truncate": "START", + "timeout": 45.0, }, } component = NvidiaDocumentEmbedder.from_dict(data) @@ -135,6 +139,7 @@ def test_from_dict(self, monkeypatch): assert component.meta_fields_to_embed == ["test_field"] assert component.embedding_separator == " | " assert component.truncate == EmbeddingTruncateMode.START + assert component.timeout == 45.0 def test_from_dict_defaults(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -152,6 +157,7 @@ def test_from_dict_defaults(self, monkeypatch): assert component.meta_fields_to_embed == [] assert component.embedding_separator == "\n" assert component.truncate is None + assert component.timeout == 60.0 def test_prepare_texts_to_embed_w_metadata(self): documents = [ diff --git a/integrations/nvidia/tests/test_ranker.py b/integrations/nvidia/tests/test_ranker.py index e1a2ad674..3d93dc028 100644 --- a/integrations/nvidia/tests/test_ranker.py +++ b/integrations/nvidia/tests/test_ranker.py @@ -275,6 +275,7 @@ def test_to_dict(self) -> None: "document_prefix": "", "meta_fields_to_embed": [], "embedding_separator": "\n", + "timeout": 60.0, }, } @@ -288,6 +289,11 @@ def test_from_dict(self) -> None: "truncate": None, "api_url": None, "api_key": {"type": "env_var", "env_vars": ["NVIDIA_API_KEY"], "strict": True}, + "query_prefix": "", + "document_prefix": "", + "meta_fields_to_embed": [], + "embedding_separator": "\n", + "timeout": 45.0, }, } ) @@ -296,6 +302,11 @@ def test_from_dict(self) -> None: assert client.truncate is None assert client.api_url is None assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.query_prefix == "" + assert client.document_prefix == "" + assert client.meta_fields_to_embed == [] + assert client.embedding_separator == "\n" + assert client.timeout == 45.0 def test_from_dict_defaults(self) -> None: client = NvidiaRanker.from_dict( @@ -309,6 +320,11 @@ def test_from_dict_defaults(self) -> None: assert client.truncate is None assert client.api_url is None assert client.api_key == Secret.from_env_var("NVIDIA_API_KEY") + assert client.query_prefix == "" + assert client.document_prefix == "" + assert client.meta_fields_to_embed == [] + assert client.embedding_separator == "\n" + assert client.timeout == 60.0 def test_setting_timeout(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index a4867e766..b572cc046 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -56,6 +56,7 @@ def test_to_dict(self, monkeypatch): "prefix": "", "suffix": "", "truncate": None, + "timeout": 60.0, }, } @@ -67,6 +68,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): prefix="prefix", suffix="suffix", truncate=EmbeddingTruncateMode.START, + timeout=10.0, ) data = component.to_dict() assert data == { @@ -78,6 +80,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "prefix": "prefix", "suffix": "suffix", "truncate": "START", + "timeout": 10.0, }, } @@ -92,6 +95,7 @@ def test_from_dict(self, monkeypatch): "prefix": "prefix", "suffix": "suffix", "truncate": "START", + "timeout": 10.0, }, } component = NvidiaTextEmbedder.from_dict(data) @@ -100,6 +104,7 @@ def test_from_dict(self, monkeypatch): assert component.prefix == "prefix" assert component.suffix == "suffix" assert component.truncate == EmbeddingTruncateMode.START + assert component.timeout == 10.0 def test_from_dict_defaults(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") From 6be4d1146d83bbb2e2482c5939f83a3eb3eb748c Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Thu, 21 Nov 2024 10:30:29 +0100 Subject: [PATCH 11/11] Update integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py Co-authored-by: David S. Batista --- .../components/embedders/nvidia/document_embedder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index c8f0ed318..6519efbab 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -78,7 +78,7 @@ def __init__( :param embedding_separator: Separator used to concatenate the meta fields to the Document text. :param truncate: - Specifies how inputs longer that the maximum token length should be truncated. + Specifies how inputs longer than the maximum token length should be truncated. If None the behavior is model-dependent, see the official documentation for more information. :param timeout: Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable