From 70ba8f6bd12ab24ca41223a273769dc7a65a7cb7 Mon Sep 17 00:00:00 2001 From: Rashmi Pawar <168514198+raspawar@users.noreply.github.com> Date: Mon, 12 Aug 2024 14:30:16 +0530 Subject: [PATCH] Add default model for NVIDIA HayStack local NIM endpoints (#915) * initial embedder code * default model code * docs: update model docstring * tests: add userwarning * docs: literal lint fix * review changes * remove pydantic dependency * move backend, nim_backend under utils * move is_hosted to warm_up * test cases, docstring fix * error message updation Co-authored-by: Madeesh Kannan * move is_hosted code to util * remove backend code * update import for is_hosted * remove util and move code to utils * fix api key issue for failing test cases * Update integrations/nvidia/tests/conftest.py --------- Co-authored-by: Madeesh Kannan --- integrations/nvidia/pyproject.toml | 4 +- .../embedders/nvidia/_nim_backend.py | 52 ----------- .../components/embedders/nvidia/backend.py | 29 ------- .../embedders/nvidia/document_embedder.py | 36 ++++++-- .../embedders/nvidia/text_embedder.py | 36 ++++++-- .../components/generators/nvidia/backend.py | 29 ------- .../components/generators/nvidia/generator.py | 38 ++++++-- .../utils/nvidia/__init__.py | 5 +- .../nvidia/nim_backend.py} | 57 +++++++++++- .../utils/nvidia/utils.py | 8 ++ integrations/nvidia/tests/__init__.py | 3 + integrations/nvidia/tests/conftest.py | 44 ++++++++++ .../nvidia/tests/test_document_embedder.py | 86 ++++++++++++++----- integrations/nvidia/tests/test_generator.py | 55 ++++++++++++ .../nvidia/tests/test_text_embedder.py | 48 +++++++---- 15 files changed, 358 insertions(+), 172 deletions(-) delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py rename integrations/nvidia/src/haystack_integrations/{components/generators/nvidia/_nim_backend.py => utils/nvidia/nim_backend.py} (61%) create mode 100644 integrations/nvidia/tests/conftest.py diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index 504077b4e..f35485e9c 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -42,7 +42,7 @@ root = "../.." git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"' [tool.hatch.envs.default] -dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] +dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"] [tool.hatch.envs.default.scripts] test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" @@ -147,6 +147,8 @@ module = [ "haystack_integrations.*", "pytest.*", "numpy.*", + "requests_mock.*", + "pydantic.*" ] ignore_missing_imports = true diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py deleted file mode 100644 index ee25df7fd..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Any, Dict, List, Optional, Tuple - -import requests -from haystack.utils import Secret - -from .backend import EmbedderBackend - -REQUEST_TIMEOUT = 60 - - -class NimBackend(EmbedderBackend): - def __init__( - self, - model: str, - api_url: str, - api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - model_kwargs: Optional[Dict[str, Any]] = None, - ): - headers = { - "Content-Type": "application/json", - "accept": "application/json", - } - - if api_key: - headers["authorization"] = f"Bearer {api_key.resolve_value()}" - - self.session = requests.Session() - self.session.headers.update(headers) - - self.model = model - self.api_url = api_url - self.model_kwargs = model_kwargs or {} - - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: - url = f"{self.api_url}/embeddings" - - res = self.session.post( - url, - json={ - "model": self.model, - "input": texts, - **self.model_kwargs, - }, - timeout=REQUEST_TIMEOUT, - ) - res.raise_for_status() - - data = res.json() - # Sort the embeddings by index, we don't know whether they're out of order or not - embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] - - return embeddings, {"usage": data["usage"]} diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py deleted file mode 100644 index 09e9b7c80..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py +++ /dev/null @@ -1,29 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple - - -class EmbedderBackend(ABC): - def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): - """ - Initialize the backend. - - :param model: - The name of the model to use. - :param model_kwargs: - Additional keyword arguments to pass to the model. - """ - self.model_name = model - self.model_kwargs = model_kwargs or {} - - @abstractmethod - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: - """ - Invoke the backend and embed the given texts. - - :param texts: - Texts to embed. - :return: - Vector representation of the texts and - metadata returned by the service. - """ - pass diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 4cc805c01..f5d1747b8 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -1,12 +1,11 @@ +import warnings from typing import Any, Dict, List, Optional, Tuple, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation from tqdm import tqdm -from ._nim_backend import NimBackend -from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -34,7 +33,7 @@ class NvidiaDocumentEmbedder: def __init__( self, - model: str = "NV-Embed-QA", + model: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), api_url: str = _DEFAULT_API_URL, prefix: str = "", @@ -50,6 +49,8 @@ def __init__( :param model: Embedding model to use. + If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. :param api_key: API key for the NVIDIA NIM. :param api_url: @@ -87,9 +88,31 @@ def __init__( truncate = EmbeddingTruncateMode.from_str(truncate) self.truncate = truncate - self.backend: Optional[EmbedderBackend] = None + self.backend: Optional[Any] = None self._initialized = False + if is_hosted(api_url) and not self.model: # manually set default model + self.model = "NV-Embed-QA" + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self.model = self.backend.model = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) + def warm_up(self): """ Initializes the component. @@ -109,6 +132,9 @@ def warm_up(self): self._initialized = True + if not self.model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index e1a8c36dd..1c4a7c5c9 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,11 +1,10 @@ +import warnings from typing import Any, Dict, List, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation -from ._nim_backend import NimBackend -from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -35,7 +34,7 @@ class NvidiaTextEmbedder: def __init__( self, - model: str = "NV-Embed-QA", + model: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), api_url: str = _DEFAULT_API_URL, prefix: str = "", @@ -47,6 +46,8 @@ def __init__( :param model: Embedding model to use. + If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. :param api_key: API key for the NVIDIA NIM. :param api_url: @@ -71,9 +72,31 @@ def __init__( truncate = EmbeddingTruncateMode.from_str(truncate) self.truncate = truncate - self.backend: Optional[EmbedderBackend] = None + self.backend: Optional[Any] = None self._initialized = False + if is_hosted(api_url) and not self.model: # manually set default model + self.model = "NV-Embed-QA" + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self.model = self.backend.model = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) + def warm_up(self): """ Initializes the component. @@ -93,6 +116,9 @@ def warm_up(self): self._initialized = True + if not self.model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py deleted file mode 100644 index d14199daf..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py +++ /dev/null @@ -1,29 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple - - -class GeneratorBackend(ABC): - def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): - """ - Initialize the backend. - - :param model: - The name of the model to use. - :param model_kwargs: - Additional keyword arguments to pass to the model. - """ - self.model_name = model - self.model_kwargs = model_kwargs or {} - - @abstractmethod - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: - """ - Invoke the backend and prompt the model. - - :param prompt: - Prompt text. - :return: - Vector representation of the generated texts related - metadata returned by the service. - """ - pass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index f11ef8aaf..a286400ab 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -1,14 +1,12 @@ # SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +import warnings from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation - -from ._nim_backend import NimBackend -from .backend import GeneratorBackend +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1" @@ -45,7 +43,7 @@ class NvidiaGenerator: def __init__( self, - model: str, + model: Optional[str] = None, api_url: str = _DEFAULT_API_URL, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, @@ -55,6 +53,10 @@ def __init__( :param model: Name of the model to use for text generation. + See the [NVIDIA NIMs](https://ai.nvidia.com) + for more information on the supported models. + `Note`: If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. Check supported models at [NVIDIA NIM](https://ai.nvidia.com). :param api_key: API key for the NVIDIA NIM. Set it as the `NVIDIA_API_KEY` environment @@ -72,7 +74,28 @@ def __init__( self._api_key = api_key self._model_arguments = model_arguments or {} - self._backend: Optional[GeneratorBackend] = None + self._backend: Optional[Any] = None + + self.is_hosted = is_hosted(api_url) + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self._backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self._model = self._backend.model_name = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) def warm_up(self): """ @@ -91,6 +114,9 @@ def warm_up(self): model_kwargs=self._model_arguments, ) + if not self.is_hosted and not self._model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py index 9863e4a38..da301d29d 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py @@ -1,3 +1,4 @@ -from .utils import url_validation +from .nim_backend import Model, NimBackend +from .utils import is_hosted, url_validation -__all__ = ["url_validation"] +__all__ = ["NimBackend", "Model", "is_hosted", "url_validation"] diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py similarity index 61% rename from integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py rename to integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 5253b3254..f69862f0e 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -1,14 +1,29 @@ +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple import requests from haystack.utils import Secret -from .backend import GeneratorBackend - REQUEST_TIMEOUT = 60 -class NimBackend(GeneratorBackend): +@dataclass +class Model: + """ + Model information. + + id: unique identifier for the model, passed as model parameter for requests + aliases: list of aliases for the model + base_model: root model for the model + All aliases are deprecated and will trigger a warning when used. + """ + + id: str + aliases: Optional[List[str]] = field(default_factory=list) + base_model: Optional[str] = None + + +class NimBackend: def __init__( self, model: str, @@ -31,6 +46,26 @@ def __init__( self.api_url = api_url self.model_kwargs = model_kwargs or {} + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + url = f"{self.api_url}/embeddings" + + res = self.session.post( + url, + json={ + "model": self.model, + "input": texts, + **self.model_kwargs, + }, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + data = res.json() + # Sort the embeddings by index, we don't know whether they're out of order or not + embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] + + return embeddings, {"usage": data["usage"]} + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: # We're using the chat completion endpoint as the NIM API doesn't support # the /completions endpoint. So both the non-chat and chat generator will use this. @@ -78,3 +113,19 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: meta.append(choice_meta) return replies, meta + + def models(self) -> List[Model]: + url = f"{self.api_url}/models" + + res = self.session.get( + url, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + data = res.json()["data"] + models = [Model(element["id"]) for element in data if "id" in element] + if not models: + msg = f"No hosted model were found at URL '{url}'." + raise ValueError(msg) + return models diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py index 4f8e14b09..7d4dfc3b4 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py @@ -37,3 +37,11 @@ def url_validation(api_url: str, default_api_url: str, allowed_paths: List[str]) base_url = urlunparse((result.scheme, result.netloc, "v1", "", "", "")) return base_url + + +def is_hosted(api_url: str): + """""" + return urlparse(api_url).netloc in [ + "integrate.api.nvidia.com", + "ai.api.nvidia.com", + ] diff --git a/integrations/nvidia/tests/__init__.py b/integrations/nvidia/tests/__init__.py index e873bc332..47611e0b9 100644 --- a/integrations/nvidia/tests/__init__.py +++ b/integrations/nvidia/tests/__init__.py @@ -1,3 +1,6 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from .conftest import MockBackend + +__all__ = ["MockBackend"] diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py new file mode 100644 index 000000000..794c994ff --- /dev/null +++ b/integrations/nvidia/tests/conftest.py @@ -0,0 +1,44 @@ +from typing import Any, Dict, List, Optional, Tuple + +import pytest +from haystack.utils import Secret +from haystack_integrations.utils.nvidia import Model, NimBackend +from requests_mock import Mocker + + +class MockBackend(NimBackend): + def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None): + api_key = api_key or Secret.from_env_var("NVIDIA_API_KEY") + super().__init__(model, api_url="", api_key=api_key, model_kwargs=model_kwargs or {}) + + def embed(self, texts): + inputs = texts + data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] + return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} + + def models(self): + return [Model(id="aa")] + + def generate(self) -> Tuple[List[str], List[Dict[str, Any]]]: + return ( + ["This is a mocked response."], + [{"role": "assistant", "usage": {"prompt_tokens": 5, "total_tokens": 10, "completion_tokens": 5}}], + ) + + +@pytest.fixture +def mock_local_models(requests_mock: Mocker) -> None: + requests_mock.get( + "http://localhost:8080/v1/models", + json={ + "data": [ + { + "id": "model1", + "object": "model", + "created": 1234567890, + "owned_by": "OWNER", + "root": "model1", + }, + ] + }, + ) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 856ae4652..6562a0ea9 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -4,17 +4,8 @@ from haystack import Document from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder -from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend - -class MockBackend(EmbedderBackend): - def __init__(self, model, model_kwargs): - super().__init__(model, model_kwargs) - - def embed(self, texts): - inputs = texts - data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] - return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} +from . import MockBackend class TestNvidiaDocumentEmbedder: @@ -185,14 +176,18 @@ def test_prepare_texts_to_embed_w_suffix(self): def test_embed_batch(self): texts = ["text 1", "text 2", "text 3", "text 4", "text 5"] - + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") embedder = NvidiaDocumentEmbedder( - "playground_nvolveqa_40k", - api_key=Secret.from_token("fake-api-key"), + model, + api_key=api_key, ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend( + model=model, + api_key=api_key, + ) embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2) @@ -205,15 +200,55 @@ def test_embed_batch(self): assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}} - def test_run(self): + @pytest.mark.usefixtures("mock_local_models") + def test_run_default_model(self): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), ] + api_key = Secret.from_token("fake-api-key") + + embedder = NvidiaDocumentEmbedder( + api_key=api_key, + model=None, + api_url="http://localhost:8080/v1", + prefix="prefix ", + suffix=" suffix", + meta_fields_to_embed=["topic"], + embedding_separator=" | ", + ) + + with pytest.warns(UserWarning) as record: + embedder.warm_up() + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert embedder.model == "model1" + + embedder.backend = MockBackend(model=embedder.model, api_key=api_key) + + result = embedder.run(documents=docs) + documents_with_embeddings = result["documents"] + metadata = result["meta"] + + assert isinstance(documents_with_embeddings, list) + assert len(documents_with_embeddings) == len(docs) + for doc in documents_with_embeddings: + assert isinstance(doc, Document) + assert isinstance(doc.embedding, list) + assert len(doc.embedding) == 3 + assert all(isinstance(x, float) for x in doc.embedding) + assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}} + + def test_run(self): + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + api_key = Secret.from_token("fake-api-key") model = "playground_nvolveqa_40k" embedder = NvidiaDocumentEmbedder( - api_key=Secret.from_token("fake-api-key"), + api_key=api_key, model=model, prefix="prefix ", suffix=" suffix", @@ -222,7 +257,7 @@ def test_run(self): ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) result = embedder.run(documents=docs) @@ -243,9 +278,10 @@ def test_run_custom_batch_size(self): Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), ] + api_key = Secret.from_token("fake-api-key") model = "playground_nvolveqa_40k" embedder = NvidiaDocumentEmbedder( - api_key=Secret.from_token("fake-api-key"), + api_key=api_key, model=model, prefix="prefix ", suffix=" suffix", @@ -255,7 +291,7 @@ def test_run_custom_batch_size(self): ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) result = embedder.run(documents=docs) @@ -273,10 +309,12 @@ def test_run_custom_batch_size(self): assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}} def test_run_wrong_input_format(self): - embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaDocumentEmbedder(model, api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) string_input = "text" list_integers_input = [1, 2, 3] @@ -288,10 +326,12 @@ def test_run_wrong_input_format(self): embedder.run(documents=list_integers_input) def test_run_on_empty_list(self): - embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaDocumentEmbedder(model, api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) empty_list_input = [] result = embedder.run(documents=empty_list_input) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 3ddeebe88..9fff9c2e8 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -6,6 +6,35 @@ import pytest from haystack.utils import Secret from haystack_integrations.components.generators.nvidia import NvidiaGenerator +from requests_mock import Mocker + + +@pytest.fixture +def mock_local_chat_completion(requests_mock: Mocker) -> None: + requests_mock.post( + "http://localhost:8080/v1/chat/completions", + json={ + "choices": [ + { + "message": {"content": "Hello!", "role": "system"}, + "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9}, + "finish_reason": "stop", + "index": 0, + }, + { + "message": {"content": "How are you?", "role": "system"}, + "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9}, + "finish_reason": "stop", + "index": 1, + }, + ], + "usage": { + "prompt_tokens": 3, + "total_tokens": 5, + "completion_tokens": 9, + }, + }, + ) class TestNvidiaGenerator: @@ -116,6 +145,32 @@ def test_run_integration_with_nim_backend(self): assert result["replies"] assert result["meta"] + @pytest.mark.integration + @pytest.mark.usefixtures("mock_local_models") + @pytest.mark.usefixtures("mock_local_chat_completion") + def test_run_integration_with_default_model_nim_backend(self): + model = None + url = "http://localhost:8080/v1" + generator = NvidiaGenerator( + model=model, + api_url=url, + api_key=None, + model_arguments={ + "temperature": 0.2, + }, + ) + with pytest.warns(UserWarning) as record: + generator.warm_up() + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert generator._model == "model1" + assert not generator.is_hosted + + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] + @pytest.mark.skipif( not os.environ.get("NVIDIA_API_KEY", None), reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.", diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 42d60dee2..7c0a7000d 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -3,17 +3,8 @@ import pytest from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder -from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend - -class MockBackend(EmbedderBackend): - def __init__(self, model, model_kwargs): - super().__init__(model, model_kwargs) - - def embed(self, texts): - inputs = texts - data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] - return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} +from . import MockBackend class TestNvidiaTextEmbedder: @@ -22,7 +13,6 @@ def test_init_default(self, monkeypatch): embedder = NvidiaTextEmbedder() assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == "NV-Embed-QA" assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia" assert embedder.prefix == "" assert embedder.suffix == "" @@ -106,13 +96,36 @@ def from_dict(self, monkeypatch): assert component.suffix == "suffix" assert component.truncate == "START" + @pytest.mark.usefixtures("mock_local_models") + def test_run_default_model(self): + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder(api_url="http://localhost:8080/v1", api_key=api_key) + + assert embedder.model is None + + with pytest.warns(UserWarning) as record: + embedder.warm_up() + + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert embedder.model == "model1" + + embedder.backend = MockBackend(model=embedder.model, api_key=api_key) + + result = embedder.run(text="The food was delicious") + + assert len(result["embedding"]) == 3 + assert all(isinstance(x, float) for x in result["embedding"]) + assert result["meta"] == { + "usage": {"prompt_tokens": 4, "total_tokens": 4}, + } + def test_run(self): - embedder = NvidiaTextEmbedder( - "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix" - ) + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key, prefix="prefix ", suffix=" suffix") embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key) result = embedder.run(text="The food was delicious") @@ -123,9 +136,10 @@ def test_run(self): } def test_run_wrong_input_format(self): - embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key) list_integers_input = [1, 2, 3]