Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add default model for NVIDIA HayStack local NIM endpoints #915

Merged
merged 21 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion integrations/nvidia/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ root = "../.."
git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"'

[tool.hatch.envs.default]
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock", "pydantic"]
[tool.hatch.envs.default.scripts]
test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
Expand Down Expand Up @@ -147,6 +147,8 @@ module = [
"haystack_integrations.*",
"pytest.*",
"numpy.*",
"requests_mock.*",
"pydantic.*"
]
ignore_missing_imports = true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
from haystack.utils import Secret

from .backend import EmbedderBackend
from .backend import EmbedderBackend, Model

REQUEST_TIMEOUT = 60

Expand Down Expand Up @@ -50,3 +50,20 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])]

return embeddings, {"usage": data["usage"]}

def models(self) -> List[Model]:
url = f"{self.api_url}/models"

res = self.session.get(
url,
timeout=REQUEST_TIMEOUT,
)
res.raise_for_status()

data = res.json()["data"]
models = []
for element in data:
assert "id" in element, f"No id found in {element}"
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
models.append(Model(id=element["id"]))

return models
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

from pydantic import BaseModel, ConfigDict


class Model(BaseModel):
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
"""
Model information.

id: unique identifier for the model, passed as model parameter for requests
aliases: list of aliases for the model
base_model: root model for the model
All aliases are deprecated and will trigger a warning when used.
"""

id: str
model_config = ConfigDict(from_attributes=True, protected_namespaces=())
aliases: Optional[list] = None
base_model: Optional[str] = None


class EmbedderBackend(ABC):
def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
Expand All @@ -27,3 +45,13 @@ def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
metadata returned by the service.
"""
pass

@abstractmethod
def models(self) -> List[Model]:
"""
Invoke the backend to get available models.

:return:
Models available
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
"""
pass
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse

from haystack import Document, component, default_from_dict, default_to_dict
from haystack.utils import Secret, deserialize_secrets_inplace
Expand Down Expand Up @@ -31,7 +33,7 @@ class NvidiaDocumentEmbedder:

def __init__(
self,
model: str = "NV-Embed-QA",
model: Optional[str] = None,
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
prefix: str = "",
Expand Down Expand Up @@ -85,6 +87,32 @@ def __init__(

self.backend: Optional[EmbedderBackend] = None
self._initialized = False
self.is_hosted = urlparse(self.api_url).netloc in [
"integrate.api.nvidia.com",
"ai.api.nvidia.com",
]
if self.is_hosted and not self.model:
# manually set default model
self.model = "NV-Embed-QA"

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
]
name = next(iter(valid_models), None)
if name:
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
UserWarning,
stacklevel=2,
)
self.model = self.backend.model = name
else:
error_message = "No locally hosted model was found."
raise ValueError(error_message)
shadeMe marked this conversation as resolved.
Show resolved Hide resolved

def warm_up(self):
"""
Expand All @@ -104,6 +132,8 @@ def warm_up(self):
)

self._initialized = True
if not self.is_hosted and not self.model:
self.default_model()

def to_dict(self) -> Dict[str, Any]:
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import warnings
from typing import Any, Dict, List, Optional, Union
from urllib.parse import urlparse

from haystack import component, default_from_dict, default_to_dict
from haystack.utils import Secret, deserialize_secrets_inplace
Expand Down Expand Up @@ -32,7 +34,7 @@ class NvidiaTextEmbedder:

def __init__(
self,
model: str = "NV-Embed-QA",
model: Optional[str] = None,
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
api_url: str = "https://ai.api.nvidia.com/v1/retrieval/nvidia",
prefix: str = "",
Expand Down Expand Up @@ -70,6 +72,33 @@ def __init__(
self.backend: Optional[EmbedderBackend] = None
self._initialized = False

self.is_hosted = urlparse(self.api_url).netloc in [
"integrate.api.nvidia.com",
"ai.api.nvidia.com",
]
if self.is_hosted and not self.model:
# manually set default model
self.model = "NV-Embed-QA"
shadeMe marked this conversation as resolved.
Show resolved Hide resolved

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
]
name = next(iter(valid_models), None)
if name:
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
UserWarning,
stacklevel=2,
)
self.model = self.backend.model = name
else:
error_message = "No locally hosted model was found."
raise ValueError(error_message)

def warm_up(self):
"""
Initializes the component.
Expand All @@ -89,6 +118,9 @@ def warm_up(self):

self._initialized = True

if not self.is_hosted and not self.model:
self.default_model()

def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
from haystack.utils import Secret

from .backend import GeneratorBackend
from .backend import GeneratorBackend, Model

REQUEST_TIMEOUT = 60

Expand Down Expand Up @@ -78,3 +78,20 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
meta.append(choice_meta)

return replies, meta

def models(self) -> List[Model]:
url = f"{self.api_url}/models"

res = self.session.get(
url,
timeout=REQUEST_TIMEOUT,
)
res.raise_for_status()

data = res.json()["data"]
models = []
for element in data:
assert "id" in element, f"No id found in {element}"
models.append(Model(id=element["id"]))

return models
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

from pydantic import BaseModel, ConfigDict


class Model(BaseModel):
"""
Model information.

id: unique identifier for the model, passed as model parameter for requests
aliases: list of aliases for the model
base_model: root model for the model
All aliases are deprecated and will trigger a warning when used.
"""

id: str
model_config = ConfigDict(from_attributes=True, protected_namespaces=())
aliases: Optional[list] = None
base_model: Optional[str] = None


class GeneratorBackend(ABC):
def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
Expand All @@ -27,3 +45,13 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
metadata returned by the service.
"""
pass

@abstractmethod
def models(self) -> List[Model]:
"""
Invoke the backend to get available models.

:return:
Models available
"""
pass
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# SPDX-FileCopyrightText: 2024-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
import warnings
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse

from haystack import component, default_from_dict, default_to_dict
from haystack.utils.auth import Secret, deserialize_secrets_inplace
Expand Down Expand Up @@ -41,7 +43,7 @@ class NvidiaGenerator:

def __init__(
self,
model: str,
model: Optional[str] = None,
shadeMe marked this conversation as resolved.
Show resolved Hide resolved
api_url: str = _DEFAULT_API_URL,
api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
model_arguments: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -69,6 +71,30 @@ def __init__(

self._backend: Optional[GeneratorBackend] = None

self.is_hosted = urlparse(api_url).netloc in [
"integrate.api.nvidia.com",
"ai.api.nvidia.com",
]

def default_model(self):
"""Set default model in local NIM mode."""
valid_models = [
model.id for model in self._backend.models() if not model.base_model or model.base_model == model.id
]
name = next(iter(valid_models), None)
if name:
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
UserWarning,
stacklevel=2,
)
self._model = self._backend.model_name = name
else:
error_message = "No locally hosted model was found."
raise ValueError(error_message)

def warm_up(self):
"""
Initializes the component.
Expand All @@ -86,6 +112,9 @@ def warm_up(self):
model_kwargs=self._model_arguments,
)

if not self.is_hosted and not self._model:
self.default_model()

def to_dict(self) -> Dict[str, Any]:
"""
Serializes the component to a dictionary.
Expand Down
20 changes: 20 additions & 0 deletions integrations/nvidia/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from requests_mock import Mocker


@pytest.fixture
def mock_local_models(requests_mock: Mocker) -> None:
requests_mock.get(
"http://localhost:8080/v1/models",
json={
"data": [
{
"id": "model1",
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
"root": "model1",
},
]
},
)
Loading