Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improvements to NvidiaRanker and adding user input timeout #1193

Merged
merged 16 commits into from
Nov 21, 2024
Merged
2 changes: 1 addition & 1 deletion integrations/nvidia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ hatch run test
To only run unit tests:

```
hatch run test -m"not integration"
hatch run test -m "not integration"
```

To run the linters `ruff` and `mypy`:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

from haystack import Document, component, default_from_dict, default_to_dict
from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.utils import Secret, deserialize_secrets_inplace
from tqdm import tqdm

from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation

logger = logging.getLogger(__name__)

_DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"


Expand Down Expand Up @@ -47,6 +50,7 @@ def __init__(
meta_fields_to_embed: Optional[List[str]] = None,
embedding_separator: str = "\n",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
timeout: Optional[float] = None,
):
"""
Create a NvidiaTextEmbedder component.
Expand Down Expand Up @@ -76,6 +80,9 @@ def __init__(
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
sjrl marked this conversation as resolved.
Show resolved Hide resolved
If None the behavior is model-dependent, see the official documentation for more information.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""

self.api_key = api_key
Expand All @@ -98,6 +105,10 @@ def __init__(
if is_hosted(api_url) and not self.model: # manually set default model
self.model = "nvidia/nv-embedqa-e5-v5"

if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
Expand Down Expand Up @@ -128,10 +139,11 @@ def warm_up(self):
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
model=self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
timeout=self.timeout,
)

self._initialized = True
Expand All @@ -158,6 +170,7 @@ def to_dict(self) -> Dict[str, Any]:
meta_fields_to_embed=self.meta_fields_to_embed,
embedding_separator=self.embedding_separator,
truncate=str(self.truncate) if self.truncate is not None else None,
timeout=self.timeout,
)

@classmethod
Expand Down Expand Up @@ -238,8 +251,7 @@ def run(self, documents: List[Document]):

for doc in documents:
if not doc.content:
msg = f"Document '{doc.id}' has no content to embed."
raise ValueError(msg)
logger.warning(f"Document '{doc.id}' has no content to embed.")

texts_to_embed = self._prepare_texts_to_embed(documents)
embeddings, metadata = self._embed_batch(texts_to_embed, self.batch_size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional, Union

from haystack import component, default_from_dict, default_to_dict
from haystack import component, default_from_dict, default_to_dict, logging
from haystack.utils import Secret, deserialize_secrets_inplace

from haystack_integrations.components.embedders.nvidia.truncate import EmbeddingTruncateMode
from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation

logger = logging.getLogger(__name__)

_DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"


Expand Down Expand Up @@ -44,6 +47,7 @@ def __init__(
prefix: str = "",
suffix: str = "",
truncate: Optional[Union[EmbeddingTruncateMode, str]] = None,
timeout: Optional[float] = None,
sjrl marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Create a NvidiaTextEmbedder component.
Expand All @@ -64,6 +68,9 @@ def __init__(
:param truncate:
Specifies how inputs longer that the maximum token length should be truncated.
If None the behavior is model-dependent, see the official documentation for more information.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""

self.api_key = api_key
Expand All @@ -82,13 +89,23 @@ def __init__(
if is_hosted(api_url) and not self.model: # manually set default model
self.model = "nvidia/nv-embedqa-e5-v5"

if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
]
name = next(iter(valid_models), None)
if name:
logger.warning(
"Default model is set as: {model_name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
model_name=name,
)
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
Expand All @@ -112,10 +129,11 @@ def warm_up(self):
if self.truncate is not None:
model_kwargs["truncate"] = str(self.truncate)
self.backend = NimBackend(
self.model,
model=self.model,
api_url=self.api_url,
api_key=self.api_key,
model_kwargs=model_kwargs,
timeout=self.timeout,
)

self._initialized = True
Expand All @@ -138,6 +156,7 @@ def to_dict(self) -> Dict[str, Any]:
prefix=self.prefix,
suffix=self.suffix,
truncate=str(self.truncate) if self.truncate is not None else None,
timeout=self.timeout,
)

@classmethod
Expand All @@ -150,7 +169,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "NvidiaTextEmbedder":
:returns:
The deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
init_parameters = data.get("init_parameters", {})
if init_parameters:
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

@component.output_types(embedding=List[float], meta=Dict[str, Any])
Expand All @@ -162,7 +183,7 @@ def run(self, text: str):
The text to embed.
:returns:
A dictionary with the following keys and values:
- `embedding` - Embeddng of the text.
- `embedding` - Embedding of the text.
- `meta` - Metadata on usage statistics, etc.
:raises RuntimeError:
If the component was not initialized.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import warnings
from typing import Any, Dict, List, Optional

Expand Down Expand Up @@ -49,6 +50,7 @@ def __init__(
api_url: str = _DEFAULT_API_URL,
api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
model_arguments: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
sjrl marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Create a NvidiaGenerator component.
Expand All @@ -70,6 +72,9 @@ def __init__(
specific to a model.
Search your model in the [NVIDIA NIM](https://ai.nvidia.com)
to find the arguments it accepts.
:param timeout:
Timeout for request calls, if not set it is inferred from the `NVIDIA_TIMEOUT` environment variable
or set to 60 by default.
"""
self._model = model
self._api_url = url_validation(api_url, _DEFAULT_API_URL, ["v1/chat/completions"])
Expand All @@ -79,6 +84,9 @@ def __init__(
self._backend: Optional[Any] = None

self.is_hosted = is_hosted(api_url)
if timeout is None:
timeout = float(os.environ.get("NVIDIA_TIMEOUT", 60.0))
self.timeout = timeout

def default_model(self):
"""Set default model in local NIM mode."""
Expand Down Expand Up @@ -110,10 +118,11 @@ def warm_up(self):
msg = "API key is required for hosted NVIDIA NIMs."
raise ValueError(msg)
self._backend = NimBackend(
self._model,
model=self._model,
api_url=self._api_url,
api_key=self._api_key,
model_kwargs=self._model_arguments,
timeout=self.timeout,
)

if not self.is_hosted and not self._model:
Expand Down
Loading