From 5a8796b769fcf9a4d560b8fa157819c6f28946d4 Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Mon, 18 Mar 2024 18:25:27 +0100 Subject: [PATCH] Add NIM backend support (#597) * Abstract service endpoint backend * Abstract generators backend * Implement NimBackend * Implement NimBackend for embedders * Fix embedders backends arguments * Fix text embedder backend arguments * Make embedders nim backend consistent with nvcf one * Fix tests * Update headers, the generator endpoint, and the embedders input_type param * Update docstrings * Make api_key optional in generator * Remove api_key from NIM backend * Move usage in metadata in generator * Update tests * Remove OPENAI_API_KEY env var from workflow * Fix integration tests * Fix linting * Fix linting again * Address PR comments * Fix NVCF backend --------- Co-authored-by: shadeMe --- integrations/nvidia/pyproject.toml | 2 +- .../components/embedders/nvidia/__init__.py | 2 - .../embedders/nvidia/_nim_backend.py | 46 ++++++ .../nvidia/{_schema.py => _nvcf_backend.py} | 41 ++++- .../components/embedders/nvidia/backend.py | 29 ++++ .../embedders/nvidia/document_embedder.py | 74 +++++---- .../components/embedders/nvidia/models.py | 31 ---- .../embedders/nvidia/text_embedder.py | 58 +++---- .../components/generators/nvidia/__init__.py | 3 +- .../generators/nvidia/_nim_backend.py | 69 +++++++++ .../generators/nvidia/_nvcf_backend.py | 117 ++++++++++++++ .../components/generators/nvidia/backend.py | 29 ++++ .../components/generators/nvidia/generator.py | 92 +++++------ .../components/generators/nvidia/models.py | 35 ----- .../nvidia/tests/test_document_embedder.py | 143 +++++++++++++----- integrations/nvidia/tests/test_generator.py | 94 ++++++++---- .../nvidia/tests/test_text_embedder.py | 94 ++++++++---- 17 files changed, 662 insertions(+), 297 deletions(-) create mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py rename integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/{_schema.py => _nvcf_backend.py} (56%) create mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py create mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index f443e91f9..05830e350 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.0.0b6"] +dependencies = ["haystack-ai", "requests"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme" diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py index 6ad2f9f6b..588aca2e6 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/__init__.py @@ -1,9 +1,7 @@ from .document_embedder import NvidiaDocumentEmbedder -from .models import NvidiaEmbeddingModel from .text_embedder import NvidiaTextEmbedder __all__ = [ "NvidiaDocumentEmbedder", - "NvidiaEmbeddingModel", "NvidiaTextEmbedder", ] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py new file mode 100644 index 000000000..27e0dbeac --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py @@ -0,0 +1,46 @@ +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from .backend import EmbedderBackend + +REQUEST_TIMEOUT = 60 + + +class NimBackend(EmbedderBackend): + def __init__( + self, + model: str, + api_url: str, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + headers = { + "Content-Type": "application/json", + "accept": "application/json", + } + self.session = requests.Session() + self.session.headers.update(headers) + + self.model = model + self.api_url = api_url + self.model_kwargs = model_kwargs or {} + + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + url = f"{self.api_url}/embeddings" + + res = self.session.post( + url, + json={ + "model": self.model, + "input": texts, + **self.model_kwargs, + }, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + data = res.json() + # Sort the embeddings by index, we don't know whether they're out of order or not + embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] + + return embeddings, {"usage": data["usage"]} diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py similarity index 56% rename from integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py rename to integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py index fc4e0e5bf..7d4b07dca 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_schema.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nvcf_backend.py @@ -1,10 +1,49 @@ from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Literal, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from haystack.utils.auth import Secret +from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient + +from .backend import EmbedderBackend MAX_INPUT_STRING_LENGTH = 2048 MAX_INPUTS = 50 +class NvcfBackend(EmbedderBackend): + def __init__( + self, + model: str, + api_key: Secret, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + if not model.startswith("playground_"): + model = f"playground_{model}" + + super().__init__(model=model, model_kwargs=model_kwargs) + + self.api_key = api_key + self.client = NvidiaCloudFunctionsClient( + api_key=api_key, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + }, + ) + self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) + + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + request = EmbeddingsRequest(input=texts, **self.model_kwargs).to_dict() + json_response = self.client.query_function(self.nvcf_id, request) + response = EmbeddingsResponse.from_dict(json_response) + + # Sort resulting embeddings by index + assert all(isinstance(r.embedding, list) for r in response.data) + sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore + metadata = {"usage": response.usage.to_dict()} + return sorted_embeddings, metadata + + @dataclass class EmbeddingsRequest: input: Union[str, List[str]] diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py new file mode 100644 index 000000000..09e9b7c80 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + + +class EmbedderBackend(ABC): + def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): + """ + Initialize the backend. + + :param model: + The name of the model to use. + :param model_kwargs: + Additional keyword arguments to pass to the model. + """ + self.model_name = model + self.model_kwargs = model_kwargs or {} + + @abstractmethod + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + """ + Invoke the backend and embed the given texts. + + :param texts: + Texts to embed. + :return: + Vector representation of the texts and + metadata returned by the service. + """ + pass diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 25c104b97..5b62da87b 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -1,19 +1,20 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient from tqdm import tqdm -from ._schema import MAX_INPUTS, EmbeddingsRequest, EmbeddingsResponse, Usage -from .models import NvidiaEmbeddingModel +from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import EmbedderBackend @component class NvidiaDocumentEmbedder: """ A component for embedding documents using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). + [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) + and NVIDIA NeMo Inference Microservices. Usage example: ```python @@ -21,7 +22,7 @@ class NvidiaDocumentEmbedder: doc = Document(content="I love pizza!") - text_embedder = NvidiaDocumentEmbedder(model=NvidiaEmbeddingModel.NVOLVE_40K) + text_embedder = NvidiaDocumentEmbedder(model="nvolveqa_40k") text_embedder.warm_up() result = document_embedder.run([doc]) @@ -31,8 +32,9 @@ class NvidiaDocumentEmbedder: def __init__( self, - model: Union[str, NvidiaEmbeddingModel], - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model: str, + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), + api_url: Optional[str] = None, prefix: str = "", suffix: str = "", batch_size: int = 32, @@ -47,6 +49,8 @@ def __init__( Embedding model to use. :param api_key: API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param prefix: A string to add to the beginning of each text. :param suffix: @@ -62,16 +66,9 @@ def __init__( Separator used to concatenate the meta fields to the Document text. """ - if isinstance(model, str): - model = NvidiaEmbeddingModel.from_str(model) - - # Upper-limit for the endpoint. - if batch_size > MAX_INPUTS: - msg = f"NVIDIA Cloud Functions currently support a maximum batch size of {MAX_INPUTS}." - raise ValueError(msg) - self.api_key = api_key self.model = model + self.api_url = api_url self.prefix = prefix self.suffix = suffix self.batch_size = batch_size @@ -79,14 +76,7 @@ def __init__( self.meta_fields_to_embed = meta_fields_to_embed or [] self.embedding_separator = embedding_separator - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = None + self.backend: Optional[EmbedderBackend] = None self._initialized = False def warm_up(self): @@ -96,7 +86,15 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) + if self.api_url is None: + if self.api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + + self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "passage"}) + else: + self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "passage"}) + self._initialized = True def to_dict(self) -> Dict[str, Any]: @@ -108,8 +106,9 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - api_key=self.api_key.to_dict(), - model=str(self.model), + api_key=self.api_key.to_dict() if self.api_key else None, + model=self.model, + api_url=self.api_url, prefix=self.prefix, suffix=self.suffix, batch_size=self.batch_size, @@ -128,7 +127,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaDocumentEmbedder": :returns: The deserialized component. """ - data["init_parameters"]["model"] = NvidiaEmbeddingModel.from_str(data["init_parameters"]["model"]) deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @@ -147,27 +145,23 @@ def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]: def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List[List[float]], Dict[str, Any]]: all_embeddings: List[List[float]] = [] - usage = Usage(prompt_tokens=0, total_tokens=0) - assert self.nvcf_id is not None + usage_prompt_tokens = 0 + usage_total_tokens = 0 + + assert self.backend is not None for i in tqdm( range(0, len(texts_to_embed), batch_size), disable=not self.progress_bar, desc="Calculating embeddings" ): batch = texts_to_embed[i : i + batch_size] - request = EmbeddingsRequest(input=batch, model="passage").to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = EmbeddingsResponse.from_dict(json_response) - - # Sort resulting embeddings by index - assert all(isinstance(r.embedding, list) for r in response.data) - sorted_embeddings: List[List[float]] = [r.embedding for r in sorted(response.data, key=lambda e: e.index)] # type: ignore + sorted_embeddings, meta = self.backend.embed(batch) all_embeddings.extend(sorted_embeddings) - usage.prompt_tokens += response.usage.prompt_tokens - usage.total_tokens += response.usage.total_tokens + usage_prompt_tokens += meta.get("usage", {}).get("prompt_tokens", 0) + usage_total_tokens += meta.get("usage", {}).get("total_tokens", 0) - return all_embeddings, {"usage": usage.to_dict()} + return all_embeddings, {"usage": {"prompt_tokens": usage_prompt_tokens, "total_tokens": usage_total_tokens}} @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document]): diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py deleted file mode 100644 index dd11ac727..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/models.py +++ /dev/null @@ -1,31 +0,0 @@ -from enum import Enum - - -class NvidiaEmbeddingModel(Enum): - """ - [NVIDIA AI Foundation models](https://catalog.ngc.nvidia.com/ai-foundation-models) - used for generating embeddings. - """ - - #: [Retrieval QA Embedding Model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/nvolve-40k). - NVOLVE_40K = "playground_nvolveqa_40k" - - def __str__(self): - return self.value - - @classmethod - def from_str(cls, string: str) -> "NvidiaEmbeddingModel": - """ - Create an embedding model from a string. - - :param string: - String to convert. - :returns: - Embedding model. - """ - enum_map = {e.value: e for e in NvidiaEmbeddingModel} - emb_model = enum_map.get(string) - if emb_model is None: - msg = f"Unknown embedding model '{string}'. Supported modes are: {list(enum_map.keys())}" - raise ValueError(msg) - return emb_model diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index a377934e3..79dda0d81 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,18 +1,19 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient -from ._schema import EmbeddingsRequest, EmbeddingsResponse -from .models import NvidiaEmbeddingModel +from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import EmbedderBackend @component class NvidiaTextEmbedder: """ A component for embedding strings using embedding models provided by - [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/). + [NVIDIA AI Foundation Endpoints](https://www.nvidia.com/en-us/ai-data-science/foundation-models/) + and NVIDIA NeMo Inference Microservices. For models that differentiate between query and document inputs, this component embeds the input string as a query. @@ -23,7 +24,7 @@ class NvidiaTextEmbedder: text_to_embed = "I love pizza!" - text_embedder = NvidiaTextEmbedder(model=NvidiaEmbeddingModel.NVOLVE_40K) + text_embedder = NvidiaTextEmbedder(model="nvolveqa_40k") text_embedder.warm_up() print(text_embedder.run(text_to_embed)) @@ -32,8 +33,9 @@ class NvidiaTextEmbedder: def __init__( self, - model: Union[str, NvidiaEmbeddingModel], - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model: str, + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), + api_url: Optional[str] = None, prefix: str = "", suffix: str = "", ): @@ -44,27 +46,21 @@ def __init__( Embedding model to use. :param api_key: API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. """ - if isinstance(model, str): - model = NvidiaEmbeddingModel.from_str(model) - self.api_key = api_key self.model = model + self.api_url = api_url self.prefix = prefix self.suffix = suffix - self.client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) - self.nvcf_id = None + + self.backend: Optional[EmbedderBackend] = None self._initialized = False def warm_up(self): @@ -74,7 +70,15 @@ def warm_up(self): if self._initialized: return - self.nvcf_id = self.client.get_model_nvcf_id(str(self.model)) + if self.api_url is None: + if self.api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + + self.backend = NvcfBackend(self.model, api_key=self.api_key, model_kwargs={"model": "query"}) + else: + self.backend = NimBackend(self.model, api_url=self.api_url, model_kwargs={"input_type": "query"}) + self._initialized = True def to_dict(self) -> Dict[str, Any]: @@ -86,8 +90,9 @@ def to_dict(self) -> Dict[str, Any]: """ return default_to_dict( self, - api_key=self.api_key.to_dict(), - model=str(self.model), + api_key=self.api_key.to_dict() if self.api_key else None, + model=self.model, + api_url=self.api_url, prefix=self.prefix, suffix=self.suffix, ) @@ -102,7 +107,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder": :returns: The deserialized component. """ - data["init_parameters"]["model"] = NvidiaEmbeddingModel.from_str(data["init_parameters"]["model"]) deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @@ -132,10 +136,8 @@ def run(self, text: str): ) raise TypeError(msg) - assert self.nvcf_id is not None + assert self.backend is not None text_to_embed = self.prefix + text + self.suffix - request = EmbeddingsRequest(input=text_to_embed, model="query").to_dict() - json_response = self.client.query_function(self.nvcf_id, request) - response = EmbeddingsResponse.from_dict(json_response) + sorted_embeddings, meta = self.backend.embed([text_to_embed]) - return {"embedding": response.data[0].embedding, "meta": {"usage": response.usage.to_dict()}} + return {"embedding": sorted_embeddings[0], "meta": meta} diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py index 3a315843d..18354ea17 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py @@ -2,6 +2,5 @@ # # SPDX-License-Identifier: Apache-2.0 from .generator import NvidiaGenerator -from .models import NvidiaGeneratorModel -__all__ = ["NvidiaGenerator", "NvidiaGeneratorModel"] +__all__ = ["NvidiaGenerator"] diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py new file mode 100644 index 000000000..499a60b78 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py @@ -0,0 +1,69 @@ +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from .backend import GeneratorBackend + +REQUEST_TIMEOUT = 60 + + +class NimBackend(GeneratorBackend): + def __init__( + self, + model: str, + api_url: str, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + headers = { + "Content-Type": "application/json", + "accept": "application/json", + } + self.session = requests.Session() + self.session.headers.update(headers) + + self.model = model + self.api_url = api_url + self.model_kwargs = model_kwargs or {} + + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: + # We're using the chat completion endpoint as the local containers don't support + # the /completions endpoint. So both the non-chat and chat generator will use this. + url = f"{self.api_url}/chat/completions" + + res = self.session.post( + url, + json={ + "model": self.model, + "messages": [ + { + "role": "user", + "content": prompt, + }, + ], + **self.model_kwargs, + }, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + completions = res.json() + choices = completions["choices"] + # Sort the choices by index, we don't know whether they're out of order or not + choices.sort(key=lambda c: c["index"]) + replies = [] + meta = [] + for choice in choices: + message = choice["message"] + replies.append(message["content"]) + choice_meta = { + "role": message["role"], + "finish_reason": choice["finish_reason"], + "usage": { + "prompt_tokens": completions["usage"]["prompt_tokens"], + "completion_tokens": completions["usage"]["completion_tokens"], + "total_tokens": completions["usage"]["total_tokens"], + }, + } + meta.append(choice_meta) + + return replies, meta diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py new file mode 100644 index 000000000..c0686c132 --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nvcf_backend.py @@ -0,0 +1,117 @@ +from dataclasses import asdict, dataclass +from typing import Any, Dict, List, Optional, Tuple + +from haystack.utils.auth import Secret +from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient + +from .backend import GeneratorBackend + + +class NvcfBackend(GeneratorBackend): + def __init__( + self, + model: str, + api_key: Secret, + model_kwargs: Optional[Dict[str, Any]] = None, + ): + if not model.startswith("playground_"): + model = f"playground_{model}" + + super().__init__(model=model, model_kwargs=model_kwargs) + + self.api_key = api_key + self.client = NvidiaCloudFunctionsClient( + api_key=api_key, + headers={ + "Content-Type": "application/json", + "Accept": "application/json", + }, + ) + self.nvcf_id = self.client.get_model_nvcf_id(self.model_name) + + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: + messages = [Message(role="user", content=prompt)] + request = GenerationRequest(messages=messages, **self.model_kwargs).to_dict() + json_response = self.client.query_function(self.nvcf_id, request) + response = GenerationResponse.from_dict(json_response) + + replies = [] + meta = [] + for choice in response.choices: + replies.append(choice.message.content) + meta.append( + { + "role": choice.message.role, + "finish_reason": choice.finish_reason, + "usage": { + "completion_tokens": response.usage.completion_tokens, + "prompt_tokens": response.usage.prompt_tokens, + "total_tokens": response.usage.total_tokens, + }, + } + ) + return replies, meta + + +@dataclass +class Message: + content: str + role: str + + +@dataclass +class GenerationRequest: + messages: List[Message] + temperature: float = 0.2 + top_p: float = 0.7 + max_tokens: int = 1024 + seed: Optional[int] = None + bad: Optional[List[str]] = None + stop: Optional[List[str]] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class Choice: + index: int + message: Message + finish_reason: str + + +@dataclass +class Usage: + completion_tokens: int + prompt_tokens: int + total_tokens: int + + +@dataclass +class GenerationResponse: + id: str + choices: List[Choice] + usage: Usage + + @classmethod + def from_dict(cls, data: dict) -> "GenerationResponse": + try: + return cls( + id=data["id"], + choices=[ + Choice( + index=choice["index"], + message=Message(content=choice["message"]["content"], role=choice["message"]["role"]), + finish_reason=choice["finish_reason"], + ) + for choice in data["choices"] + ], + usage=Usage( + completion_tokens=data["usage"]["completion_tokens"], + prompt_tokens=data["usage"]["prompt_tokens"], + total_tokens=data["usage"]["total_tokens"], + ), + ) + except (KeyError, TypeError) as e: + msg = f"Failed to parse {cls.__name__} from data: {data}" + raise ValueError(msg) from e diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py new file mode 100644 index 000000000..d14199daf --- /dev/null +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + + +class GeneratorBackend(ABC): + def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): + """ + Initialize the backend. + + :param model: + The name of the model to use. + :param model_kwargs: + Additional keyword arguments to pass to the model. + """ + self.model_name = model + self.model_kwargs = model_kwargs or {} + + @abstractmethod + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: + """ + Invoke the backend and prompt the model. + + :param prompt: + Prompt text. + :return: + Vector representation of the generated texts related + metadata returned by the service. + """ + pass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index 46550baab..f2f94c3a2 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -1,14 +1,14 @@ # SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import NvidiaCloudFunctionsClient -from ._schema import GenerationRequest, GenerationResponse, Message -from .models import NvidiaGeneratorModel +from ._nim_backend import NimBackend +from ._nvcf_backend import NvcfBackend +from .backend import GeneratorBackend @component @@ -22,14 +22,11 @@ class NvidiaGenerator: from haystack_integrations.components.generators.nvidia import NvidiaGenerator, NvidiaGeneratorModel generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model="nv_llama2_rlhf_70b", model_arguments={ "temperature": 0.2, "top_p": 0.7, "max_tokens": 1024, - "seed": None, - "bad": None, - "stop": None, }, ) generator.warm_up() @@ -37,13 +34,15 @@ class NvidiaGenerator: result = generator.run(prompt="What is the answer?") print(result["replies"]) print(result["meta"]) + print(result["usage"]) ``` """ def __init__( self, - model: Union[str, NvidiaGeneratorModel], - api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"), + model: str, + api_url: Optional[str] = None, + api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, ): """ @@ -54,38 +53,39 @@ def __init__( See the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) for more information on the supported models. :param api_key: - Nvidia API key to use for authentication. + API key for the NVIDIA AI Foundation Endpoints. + :param api_url: + Custom API URL for the NVIDIA NeMo Inference Microservices. :param model_arguments: Additional arguments to pass to the model provider. Different models accept different arguments. Search your model in the [Nvidia catalog](https://catalog.ngc.nvidia.com/ai-foundation-models) to know the supported arguments. - - :raises ValueError: If `model` is not supported. """ - if isinstance(model, str): - model = NvidiaGeneratorModel.from_str(model) - self._model = model + self._api_url = api_url self._api_key = api_key self._model_arguments = model_arguments or {} - # This is initialized in warm_up - self._model_id = None - - self._client = NvidiaCloudFunctionsClient( - api_key=api_key, - headers={ - "Content-Type": "application/json", - "Accept": "application/json", - }, - ) + + self._backend: Optional[GeneratorBackend] = None def warm_up(self): """ Initializes the component. """ - if self._model_id is not None: + if self._backend is not None: return - self._model_id = self._client.get_model_nvcf_id(str(self._model)) + + if self._api_url is None: + if self._api_key is None: + msg = "API key is required for NVIDIA AI Foundation Endpoints." + raise ValueError(msg) + self._backend = NvcfBackend(self._model, api_key=self._api_key, model_kwargs=self._model_arguments) + else: + self._backend = NimBackend( + self._model, + api_url=self._api_url, + model_kwargs=self._model_arguments, + ) def to_dict(self) -> Dict[str, Any]: """ @@ -95,7 +95,11 @@ def to_dict(self) -> Dict[str, Any]: Dictionary with serialized data. """ return default_to_dict( - self, model=str(self._model), api_key=self._api_key.to_dict(), model_arguments=self._model_arguments + self, + model=self._model, + api_url=self._api_url, + api_key=self._api_key.to_dict() if self._api_key else None, + model_arguments=self._model_arguments, ) @classmethod @@ -112,7 +116,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaGenerator": deserialize_secrets_inplace(init_params, ["api_key"]) return default_from_dict(cls, data) - @component.output_types(replies=List[str], meta=List[Dict[str, Any]], usage=Dict[str, int]) + @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) def run(self, prompt: str): """ Queries the model with the provided prompt. @@ -123,32 +127,12 @@ def run(self, prompt: str): A dictionary with the following keys: - `replies` - Replies generated by the model. - `meta` - Metadata for each reply. - - `usage` - Usage statistics for the model. """ - if self._model_id is None: + if self._backend is None: msg = "The generation model has not been loaded. Call warm_up() before running." raise RuntimeError(msg) - messages = [Message(role="user", content=prompt)] - request = GenerationRequest(messages=messages, **self._model_arguments).to_dict() - json_response = self._client.query_function(self._model_id, request) - - replies = [] - meta = [] - data = GenerationResponse.from_dict(json_response) - for choice in data.choices: - replies.append(choice.message.content) - meta.append( - { - "role": choice.message.role, - "finish_reason": choice.finish_reason, - } - ) - - usage = { - "completion_tokens": data.usage.completion_tokens, - "prompt_tokens": data.usage.prompt_tokens, - "total_tokens": data.usage.total_tokens, - } + assert self._backend is not None + replies, meta = self._backend.generate(prompt=prompt) - return {"replies": replies, "meta": meta, "usage": usage} + return {"replies": replies, "meta": meta} diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py deleted file mode 100644 index 448fb7aec..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/models.py +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from enum import Enum - - -class NvidiaGeneratorModel(Enum): - """ - Generator models supported by NvidiaGenerator and NvidiaChatGenerator. - """ - - NV_LLAMA2_RLHF_70B = "playground_nv_llama2_rlhf_70b" - STEERLM_LLAMA_70B = "playground_steerlm_llama_70b" - NEMOTRON_STEERLM_8B = "playground_nemotron_steerlm_8b" - NEMOTRON_QA_8B = "playground_nemotron_qa_8b" - - def __str__(self): - return self.value - - @classmethod - def from_str(cls, string: str) -> "NvidiaGeneratorModel": - """ - Create a generator model from a string. - - :param string: - String to convert. - :returns: - A generator model. - """ - enum_map = {e.value: e for e in NvidiaGeneratorModel} - models = enum_map.get(string) - if models is None: - msg = f"Unknown model '{string}'. Supported models are: {list(enum_map.keys())}" - raise ValueError(msg) - return models diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index ed8af93c9..7ac89d5e2 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -1,36 +1,19 @@ import os +from unittest.mock import Mock, patch import pytest from haystack import Document from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder, NvidiaEmbeddingModel -from haystack_integrations.utils.nvidia.client import AvailableNvidiaCloudFunctions - - -class MockClient: - def query_function(self, func_id, payload): - inputs = payload["input"] - data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - def available_functions(self): - return { - NvidiaEmbeddingModel.NVOLVE_40K.value: AvailableNvidiaCloudFunctions( - name=NvidiaEmbeddingModel.NVOLVE_40K.value, id="fake-id", status="ACTIVE" - ) - } - - def get_model_nvcf_id(self, model): - return "fake-id" +from haystack_integrations.components.embedders.nvidia import NvidiaDocumentEmbedder class TestNvidiaDocumentEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaDocumentEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder = NvidiaDocumentEmbedder("nvolveqa_40k") assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "" assert embedder.suffix == "" assert embedder.batch_size == 32 @@ -41,7 +24,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): embedder = NvidiaDocumentEmbedder( api_key=Secret.from_token("fake-api-key"), - model="playground_nvolveqa_40k", + model="nvolveqa_40k", prefix="prefix", suffix="suffix", batch_size=30, @@ -51,7 +34,7 @@ def test_init_with_parameters(self): ) assert embedder.api_key == Secret.from_token("fake-api-key") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" assert embedder.batch_size == 30 @@ -61,12 +44,9 @@ def test_init_with_parameters(self): def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + embedder = NvidiaDocumentEmbedder("nvolveqa_40k") with pytest.raises(ValueError): - NvidiaDocumentEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) - - def test_init_fail_batch_size(self, monkeypatch): - with pytest.raises(ValueError): - NvidiaDocumentEmbedder(model="playground_nvolveqa_40k", batch_size=55) + embedder.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") @@ -76,6 +56,7 @@ def test_to_dict(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": None, "model": "playground_nvolveqa_40k", "prefix": "", "suffix": "", @@ -90,6 +71,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") component = NvidiaDocumentEmbedder( model="playground_nvolveqa_40k", + api_url="https://example.com", prefix="prefix", suffix="suffix", batch_size=10, @@ -102,6 +84,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.document_embedder.NvidiaDocumentEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": "https://example.com", "model": "playground_nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", @@ -155,14 +138,25 @@ def test_prepare_texts_to_embed_w_suffix(self): "my_prefix document number 4 my_suffix", ] - def test_embed_batch(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_embed_batch(self, mock_client_class): texts = ["text 1", "text 2", "text 3", "text 4", "text 5"] embedder = NvidiaDocumentEmbedder( "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2) @@ -176,7 +170,8 @@ def test_embed_batch(self): assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}} - def test_run(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -191,7 +186,17 @@ def test_run(self): meta_fields_to_embed=["topic"], embedding_separator=" | ", ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(documents=docs) @@ -208,7 +213,8 @@ def test_run(self): assert all(isinstance(x, float) for x in doc.embedding) assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}} - def test_run_custom_batch_size(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_custom_batch_size(self, mock_client_class): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), @@ -223,7 +229,17 @@ def test_run_custom_batch_size(self): embedding_separator=" | ", batch_size=1, ) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(documents=docs) @@ -241,9 +257,20 @@ def test_run_custom_batch_size(self): assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}} - def test_run_wrong_input_format(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_wrong_input_format(self, mock_client_class): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() string_input = "text" @@ -255,9 +282,20 @@ def test_run_wrong_input_format(self): with pytest.raises(TypeError, match="NvidiaDocumentEmbedder expects a list of Documents as input"): embedder.run(documents=list_integers_input) - def test_run_on_empty_list(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_on_empty_list(self, mock_client_class): embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + + def mock_query_function(_, payload): + inputs = payload["input"] + data = [{"index": i, "embedding": [0.1, 0.2, 0.3]} for i in range(len(inputs))] + return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} + + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=mock_query_function, + ) + mock_client_class.return_value = mock_client embedder.warm_up() empty_list_input = [] @@ -288,3 +326,32 @@ def test_run_integration(self): for doc in docs_with_embeddings: assert isinstance(doc.embedding, list) assert isinstance(doc.embedding[0], float) + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + model = os.environ["NVIDIA_NIM_EMBEDDER_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] + embedder = NvidiaDocumentEmbedder( + model=model, + api_url=url, + api_key=None, + ) + embedder.warm_up() + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + + result = embedder.run(docs) + docs_with_embeddings = result["documents"] + + assert isinstance(docs_with_embeddings, list) + assert len(docs_with_embeddings) == len(docs) + for doc in docs_with_embeddings: + assert isinstance(doc.embedding, list) + assert isinstance(doc.embedding[0], float) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index b10b60951..9a157a9d1 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -2,21 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 import os -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest from haystack.utils import Secret from haystack_integrations.components.generators.nvidia import NvidiaGenerator -from haystack_integrations.components.generators.nvidia.models import NvidiaGeneratorModel class TestNvidiaGenerator: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - generator = NvidiaGenerator(NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B) + generator = NvidiaGenerator("playground_nv_llama2_rlhf_70b") assert generator._api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert generator._model == NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B + assert generator._model == "playground_nv_llama2_rlhf_70b" assert generator._model_arguments == {} def test_init_with_parameters(self): @@ -33,7 +32,7 @@ def test_init_with_parameters(self): }, ) assert generator._api_key == Secret.from_token("fake-api-key") - assert generator._model == NvidiaGeneratorModel.NEMOTRON_STEERLM_8B + assert generator._model == "playground_nemotron_steerlm_8b" assert generator._model_arguments == { "temperature": 0.2, "top_p": 0.7, @@ -45,16 +44,18 @@ def test_init_with_parameters(self): def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + generator = NvidiaGenerator("playground_nemotron_steerlm_8b") with pytest.raises(ValueError): - NvidiaGenerator("playground_nemotron_steerlm_8b") + generator.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - generator = NvidiaGenerator(NvidiaGeneratorModel.NEMOTRON_STEERLM_8B) + generator = NvidiaGenerator("playground_nemotron_steerlm_8b") data = generator.to_dict() assert data == { "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", "init_parameters": { + "api_url": None, "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, "model": "playground_nemotron_steerlm_8b", "model_arguments": {}, @@ -64,7 +65,8 @@ def test_to_dict(self, monkeypatch): def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + model="playground_nemotron_steerlm_8b", + api_url="https://my.url.com", model_arguments={ "temperature": 0.2, "top_p": 0.7, @@ -79,6 +81,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.generators.nvidia.generator.NvidiaGenerator", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, + "api_url": "https://my.url.com", "model": "playground_nemotron_steerlm_8b", "model_arguments": { "temperature": 0.2, @@ -91,10 +94,10 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): }, } - @patch("haystack_integrations.components.generators.nvidia.generator.NvidiaCloudFunctionsClient") - def test_run(self, mock_client): + @patch("haystack_integrations.components.generators.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NEMOTRON_STEERLM_8B, + model="playground_nemotron_steerlm_8b", api_key=Secret.from_token("fake-api-key"), model_arguments={ "temperature": 0.2, @@ -105,22 +108,25 @@ def test_run(self, mock_client): "stop": None, }, ) - mock_client.get_model_nvcf_id.return_value = "some_id" - generator._client = mock_client + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "id": "some_id", + "choices": [ + { + "index": 0, + "message": {"content": "42", "role": "assistant"}, + "finish_reason": "stop", + } + ], + "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, + } + ), + ) + mock_client_class.return_value = mock_client generator.warm_up() - mock_client.get_model_nvcf_id.assert_called_once_with("playground_nemotron_steerlm_8b") - mock_client.query_function.return_value = { - "id": "some_id", - "choices": [ - { - "index": 0, - "message": {"content": "42", "role": "assistant"}, - "finish_reason": "stop", - } - ], - "usage": {"total_tokens": 21, "prompt_tokens": 19, "completion_tokens": 2}, - } result = generator.run(prompt="What is the answer?") mock_client.query_function.assert_called_once_with( "some_id", @@ -142,13 +148,13 @@ def test_run(self, mock_client): { "finish_reason": "stop", "role": "assistant", + "usage": { + "total_tokens": 21, + "prompt_tokens": 19, + "completion_tokens": 2, + }, }, ], - "usage": { - "total_tokens": 21, - "prompt_tokens": 19, - "completion_tokens": 2, - }, } @pytest.mark.skipif( @@ -156,9 +162,9 @@ def test_run(self, mock_client): reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", ) @pytest.mark.integration - def test_run_integration(self): + def test_run_integration_with_nvcf_backend(self): generator = NvidiaGenerator( - model=NvidiaGeneratorModel.NV_LLAMA2_RLHF_70B, + model="playground_nv_llama2_rlhf_70b", model_arguments={ "temperature": 0.2, "top_p": 0.7, @@ -173,4 +179,26 @@ def test_run_integration(self): assert result["replies"] assert result["meta"] - assert result["usage"] + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_NIM_GENERATOR_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_GENERATOR_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + model = os.environ["NVIDIA_NIM_GENERATOR_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] + generator = NvidiaGenerator( + model=model, + api_url=url, + api_key=None, + model_arguments={ + "temperature": 0.2, + }, + ) + generator.warm_up() + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 8ba2f6783..39ee02206 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -1,63 +1,49 @@ import os +from unittest.mock import Mock, patch import pytest from haystack.utils import Secret -from haystack_integrations.components.embedders.nvidia import NvidiaEmbeddingModel, NvidiaTextEmbedder -from haystack_integrations.utils.nvidia.client import AvailableNvidiaCloudFunctions - - -class MockClient: - def query_function(self, func_id, payload): - data = [{"index": 0, "embedding": [0.1, 0.2, 0.3]}] - return {"data": data, "usage": {"total_tokens": 4, "prompt_tokens": 4}} - - def available_functions(self): - return { - NvidiaEmbeddingModel.NVOLVE_40K.value: AvailableNvidiaCloudFunctions( - name=NvidiaEmbeddingModel.NVOLVE_40K.value, id="fake-id", status="ACTIVE" - ) - } - - def get_model_nvcf_id(self, model): - return "fake-id" +from haystack_integrations.components.embedders.nvidia import NvidiaTextEmbedder class TestNvidiaTextEmbedder: def test_init_default(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - embedder = NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder = NvidiaTextEmbedder("nvolveqa_40k") assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "" assert embedder.suffix == "" def test_init_with_parameters(self): embedder = NvidiaTextEmbedder( api_key=Secret.from_token("fake-api-key"), - model="playground_nvolveqa_40k", + model="nvolveqa_40k", prefix="prefix", suffix="suffix", ) assert embedder.api_key == Secret.from_token("fake-api-key") - assert embedder.model == NvidiaEmbeddingModel.NVOLVE_40K + assert embedder.model == "nvolveqa_40k" assert embedder.prefix == "prefix" assert embedder.suffix == "suffix" def test_init_fail_wo_api_key(self, monkeypatch): monkeypatch.delenv("NVIDIA_API_KEY", raising=False) + embedder = NvidiaTextEmbedder("nvolveqa_40k") with pytest.raises(ValueError): - NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + embedder.warm_up() def test_to_dict(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") - component = NvidiaTextEmbedder(NvidiaEmbeddingModel.NVOLVE_40K) + component = NvidiaTextEmbedder("nvolveqa_40k") data = component.to_dict() assert data == { "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "model": "playground_nvolveqa_40k", + "api_url": None, + "model": "nvolveqa_40k", "prefix": "", "suffix": "", }, @@ -66,7 +52,7 @@ def test_to_dict(self, monkeypatch): def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setenv("NVIDIA_API_KEY", "fake-api-key") component = NvidiaTextEmbedder( - model=NvidiaEmbeddingModel.NVOLVE_40K, + model="nvolveqa_40k", prefix="prefix", suffix="suffix", ) @@ -75,17 +61,28 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "haystack_integrations.components.embedders.nvidia.text_embedder.NvidiaTextEmbedder", "init_parameters": { "api_key": {"env_vars": ["NVIDIA_API_KEY"], "strict": True, "type": "env_var"}, - "model": "playground_nvolveqa_40k", + "api_url": None, + "model": "nvolveqa_40k", "prefix": "prefix", "suffix": "suffix", }, } - def test_run(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run(self, mock_client_class): embedder = NvidiaTextEmbedder( "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix" ) - embedder.client = MockClient() + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], + "usage": {"total_tokens": 4, "prompt_tokens": 4}, + } + ), + ) + mock_client_class.return_value = mock_client embedder.warm_up() result = embedder.run(text="The food was delicious") @@ -95,9 +92,19 @@ def test_run(self): "usage": {"prompt_tokens": 4, "total_tokens": 4}, } - def test_run_wrong_input_format(self): + @patch("haystack_integrations.components.embedders.nvidia._nvcf_backend.NvidiaCloudFunctionsClient") + def test_run_wrong_input_format(self, mock_client_class): embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) - embedder.client = MockClient() + mock_client = Mock( + get_model_nvcf_id=Mock(return_value="some_id"), + query_function=Mock( + return_value={ + "data": [{"index": 0, "embedding": [0.1, 0.2, 0.3]}], + "usage": {"total_tokens": 4, "prompt_tokens": 4}, + } + ), + ) + mock_client_class.return_value = mock_client embedder.warm_up() list_integers_input = [1, 2, 3] @@ -110,7 +117,7 @@ def test_run_wrong_input_format(self): reason="Export an env var called NVIDIA_API_KEY containing the Nvidia API key to run this test.", ) @pytest.mark.integration - def test_run_integration(self): + def test_run_integration_with_nvcf_backend(self): embedder = NvidiaTextEmbedder("playground_nvolveqa_40k") embedder.warm_up() @@ -120,3 +127,26 @@ def test_run_integration(self): assert all(isinstance(x, float) for x in embedding) assert "usage" in meta + + @pytest.mark.skipif( + not os.environ.get("NVIDIA_NIM_EMBEDDER_MODEL", None) or not os.environ.get("NVIDIA_NIM_ENDPOINT_URL", None), + reason="Export an env var called NVIDIA_NIM_EMBEDDER_MODEL containing the hosted model name and " + "NVIDIA_NIM_ENDPOINT_URL containing the local URL to call.", + ) + @pytest.mark.integration + def test_run_integration_with_nim_backend(self): + model = os.environ["NVIDIA_NIM_EMBEDDER_MODEL"] + url = os.environ["NVIDIA_NIM_ENDPOINT_URL"] + embedder = NvidiaTextEmbedder( + model=model, + api_url=url, + api_key=None, + ) + embedder.warm_up() + + result = embedder.run("A transformer is a deep learning architecture") + embedding = result["embedding"] + meta = result["meta"] + + assert all(isinstance(x, float) for x in embedding) + assert "usage" in meta