Skip to content

Commit

Permalink
Merge pull request #51 from langchain-ai/raspawar/default-llm-model
Browse files Browse the repository at this point in the history
Provide default model in local NIM mode
  • Loading branch information
mattf authored Jun 25, 2024
2 parents 3727cd4 + af78e07 commit c16e959
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 19 deletions.
22 changes: 22 additions & 0 deletions libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,28 @@
"llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7d4a4e2e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/raspawar/langchain-nvidia/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py:583: UserWarning: Default model is set as: meta/llama3-8b-instruct. \n",
"Set model using model parameter. \n",
"To get available models use available_models property.\n",
" UserWarning,\n"
]
}
],
"source": [
"# OR connect to an embedding NIM running at localhost:8000, with default model(first available model)\n",
"llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\")"
]
},
{
"cell_type": "markdown",
"id": "71d37987-d568-4a73-9d2a-8bd86323f8bf",
Expand Down
27 changes: 24 additions & 3 deletions libs/ai-endpoints/docs/text_embedding/nvidia_ai_endpoints.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,35 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n",
"\n",
"# connect to an embedding NIM running at localhost:8080\n",
"embedder = NVIDIAEmbeddings(base_url=\"http://localhost:8080/v1\")"
"embedder = NVIDIAEmbeddings(base_url=\"http://localhost:9080/v1\", model=\"NV-Embed-QA\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/raspawar/langchain-nvidia/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py:579: UserWarning: Default model is set as: NV-Embed-QA. \n",
"Set model using model parameter. \n",
"To get available models use available_models property.\n",
" warnings.warn(\n"
]
}
],
"source": [
"# connect to an default embedding NIM running at localhost:8080\n",
"embedder = NVIDIAEmbeddings(base_url=\"http://localhost:9080/v1\")"
]
},
{
Expand Down Expand Up @@ -521,7 +542,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
51 changes: 40 additions & 11 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def available_models(self) -> list[Model]:
# so we'll let it through. use of this model will be
# accompanied by a warning.
model = Model(id=element["id"])

# add base model for local-nim mode
model.base_model = element.get("root")

self._available_models.append(model)

return self._available_models
Expand Down Expand Up @@ -510,7 +514,7 @@ class _NVIDIAClient(BaseModel):

client: NVEModel = Field(NVEModel)

model: str = Field(..., description="Name of the model to invoke")
model: Optional[str] = Field(..., description="Name of the model to invoke")
is_hosted: bool = Field(True)

####################################################################################
Expand All @@ -525,19 +529,22 @@ def _preprocess_args(cls, values: Any) -> Any:
"ai.api.nvidia.com",
]

# set default model for hosted endpoint
if values["is_hosted"] and not values["model"]:
values["model"] = values["default_model"]

return values

@root_validator
def _postprocess_args(cls, values: Any) -> Any:
name = values.get("model")
if values["is_hosted"]:
if not values["client"].api_key:
warnings.warn(
"An API key is required for the hosted NIM. "
"This will become an error in the future.",
UserWarning,
)

name = values.get("model")
if model := determine_model(name):
values["model"] = model.id
# not all models are on https://integrate.api.nvidia.com/v1,
Expand All @@ -558,7 +565,30 @@ def _postprocess_args(cls, values: Any) -> Any:
raise ValueError(
f"Model {name} is unknown, check `available_models`"
)

else:
# set default model
if not name:
if not (client := values.get("client")):
warnings.warn(f"Unable to determine validity of {name}")
else:
valid_models = [
model.id
for model in client.available_models
if model.base_model and model.id == model.base_model
]
name = next(iter(valid_models), None)
if name:
warnings.warn(
f"Default model is set as: {name}. \n"
"Set model using model parameter. \n"
"To get available models use available_models property.",
UserWarning,
)
values["model"] = name
else:
raise ValueError(
f"Model {name} is unknown, check `available_models`"
)
return values

@classmethod
Expand Down Expand Up @@ -586,18 +616,17 @@ def get_available_models(
**kwargs: Any,
) -> List[Model]:
"""Retrieve a list of available models."""
available = [
model for model in self.client.available_models if model.client == filter
]

available = self.client.available_models

# if we're talking to a hosted endpoint, we mix in the known models
# because they are not all discoverable by listing. for instance,
# the NV-Embed-QA and VLM models are hosted on ai.api.nvidia.com
# instead of integrate.api.nvidia.com.
if self.is_hosted:
known = set(
model for model in MODEL_TABLE.values() if model.client == filter
)
available = list(set(available) | known)
known = set(MODEL_TABLE.values())
available = [
model for model in set(available) | known if model.client == filter
]

return available
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Model(BaseModel):
client: Optional[str] = None
endpoint: Optional[str] = None
aliases: Optional[list] = None
base_model: Optional[str] = None

def __hash__(self) -> int:
return hash(self.id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class ChatNVIDIA(BaseChatModel):
"https://integrate.api.nvidia.com/v1",
description="Base url for model listing an invocation",
)
model: str = Field(_default_model, description="Name of the model to invoke")
model: Optional[str] = Field(description="Name of the model to invoke")
temperature: Optional[float] = Field(description="Sampling temperature in [0, 1]")
max_tokens: Optional[int] = Field(
1024, description="Maximum # of tokens to generate"
Expand Down Expand Up @@ -173,6 +173,7 @@ def __init__(self, **kwargs: Any):
self._client = _NVIDIAClient(
base_url=self.base_url,
model=self.model,
default_model=self._default_model,
api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
infer_path="{base_url}/chat/completions",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Config:
"https://integrate.api.nvidia.com/v1",
description="Base url for model listing an invocation",
)
model: str = Field(_default_model, description="Name of the model to invoke")
model: Optional[str] = Field(description="Name of the model to invoke")
truncate: Literal["NONE", "START", "END"] = Field(
default="NONE",
description=(
Expand Down Expand Up @@ -72,6 +72,7 @@ def __init__(self, **kwargs: Any):
self._client = _NVIDIAClient(
base_url=self.base_url,
model=self.model,
default_model=self._default_model,
api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
infer_path="{base_url}/embeddings",
)
Expand Down
5 changes: 2 additions & 3 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ class Config:
description="Base url for model listing an invocation",
)
top_n: int = Field(5, ge=0, description="The number of documents to return.")
model: str = Field(
_default_model_name, description="The model to use for reranking."
)
model: Optional[str] = Field(description="The model to use for reranking.")
max_batch_size: int = Field(
_default_batch_size, ge=1, description="The maximum batch size."
)
Expand Down Expand Up @@ -65,6 +63,7 @@ def __init__(self, **kwargs: Any):
self._client = _NVIDIAClient(
base_url=self.base_url,
model=self.model,
default_model=self._default_model_name,
api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
infer_path="{base_url}/ranking",
)
Expand Down
19 changes: 19 additions & 0 deletions libs/ai-endpoints/tests/unit_tests/test_api_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest
from langchain_core.pydantic_v1 import SecretStr
from requests_mock import Mocker


@contextmanager
Expand All @@ -17,6 +18,24 @@ def no_env_var(var: str) -> Generator[None, None, None]:
os.environ[var] = val


@pytest.fixture(autouse=True)
def mock_v1_local_models(requests_mock: Mocker) -> None:
requests_mock.get(
"https://test_url/v1/models",
json={
"data": [
{
"id": "model1",
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
"root": "model1",
},
]
},
)


def test_create_without_api_key(public_class: type) -> None:
with no_env_var("NVIDIA_API_KEY"):
with pytest.warns(UserWarning):
Expand Down
19 changes: 19 additions & 0 deletions libs/ai-endpoints/tests/unit_tests/test_base_url.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from requests_mock import Mocker


@pytest.mark.parametrize(
Expand All @@ -24,6 +25,24 @@ def test_param_base_url_hosted(public_class: type, base_url: str) -> None:
assert client._client.is_hosted


@pytest.fixture(autouse=True)
def mock_v1_local_models(requests_mock: Mocker, base_url: str) -> None:
requests_mock.get(
f"{base_url}/models",
json={
"data": [
{
"id": "model1",
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
"root": "model1",
},
]
},
)


@pytest.mark.parametrize(
"base_url",
[
Expand Down
44 changes: 44 additions & 0 deletions libs/ai-endpoints/tests/unit_tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,31 @@ def mock_v1_models(requests_mock: Mocker, known_unknown: str) -> None:
)


@pytest.fixture(autouse=True)
def mock_v1_local_models(requests_mock: Mocker, known_unknown: str) -> None:
requests_mock.get(
"http://localhost:8000/v1/models",
json={
"data": [
{
"id": known_unknown,
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
"root": known_unknown,
},
{
"id": "lora1",
"object": "model",
"created": 1234567890,
"owned_by": "OWNER",
"root": known_unknown,
},
]
},
)


@pytest.mark.parametrize(
"alias",
[
Expand Down Expand Up @@ -84,3 +109,22 @@ def test_unknown_unknown(public_class: type) -> None:
with pytest.raises(ValueError) as e:
public_class(model="test/unknown-unknown", nvidia_api_key="a-bogus-key")
assert "unknown" in str(e.value)


def test_default_known(public_class: type, known_unknown: str) -> None:
"""
Test that a model in the model table will be accepted.
"""
# check if default model is getting set
with pytest.warns(UserWarning):
x = public_class(base_url="http://localhost:8000/v1")
assert x.model == known_unknown


def test_default_lora(public_class: type) -> None:
"""
Test that a model in the model table will be accepted.
"""
# find a model that matches the public_class under test
x = public_class(base_url="http://localhost:8000/v1", model="lora1")
assert x.model == "lora1"

0 comments on commit c16e959

Please sign in to comment.