Skip to content

Commit

Permalink
Merge pull request #43 from langchain-ai/mattf/deprecate-mode
Browse files Browse the repository at this point in the history
Deprecate mode() in favor of __init__(base_url=...)
  • Loading branch information
mattf authored May 24, 2024
2 parents 178088f + 331ce23 commit 25be7fa
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 97 deletions.
46 changes: 39 additions & 7 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,18 @@
Tuple,
Union,
)
from urllib.parse import urlparse

import aiohttp
import requests
from langchain_core._api import deprecated
from langchain_core._api import deprecated, warn_deprecated
from langchain_core.pydantic_v1 import (
BaseModel,
Field,
PrivateAttr,
SecretStr,
root_validator,
validator,
)
from requests.models import Response

Expand Down Expand Up @@ -113,6 +115,17 @@ def headers(self) -> dict:
)
return headers_

@validator("base_url")
def validate_base_url(cls, v: str) -> str:
if v is not None:
result = urlparse(v)
# Ensure scheme and netloc (domain name) are present
if not (result.scheme and result.netloc):
raise ValueError(
f"Invalid base_url, minimally needs scheme and netloc: {v}"
)
return v

@root_validator(pre=True)
def validate_model(cls, values: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and update model arguments, including API key and formatting"""
Expand Down Expand Up @@ -534,7 +547,16 @@ class _NVIDIAClient(BaseModel):
_default_model: str = ""
model: str = Field(description="Name of the model to invoke")
infer_endpoint: str = Field("{base_url}/chat/completions")
curr_mode: _MODE_TYPE = Field("nvidia")
curr_mode: _MODE_TYPE = Field("nvidia") # todo: remove this in 0.1
is_hosted: bool = Field(True)

def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
if "base_url" in kwargs:
self.is_hosted = False
self.curr_mode = "nim"
self.client.endpoints["infer"] = self.infer_endpoint
self.client.endpoints["models"] = "{base_url}/models"

####################################################################################

Expand Down Expand Up @@ -595,9 +617,9 @@ def available_functions(self) -> List[dict]:
@property
def available_models(self) -> List[Model]:
"""Map the available models that can be invoked."""
if self.curr_mode == "nim":
if self.curr_mode == "nim" or not self.is_hosted:
return self.__class__.get_available_models(
client=self, mode="nim", base_url=self.client.base_url
client=self, base_url=self.client.base_url
)
else:
return self.__class__.get_available_models(client=self)
Expand Down Expand Up @@ -625,7 +647,12 @@ def get_available_models(
**kwargs: Any,
) -> List[Model]:
"""Map the available models that can be invoked. Callable from class"""
nveclient = (client or cls(**kwargs)).mode(mode, **kwargs).client
if mode is not None:
warn_deprecated(
name="mode", since="0.0.17", removal="0.1.0", alternative="`base_url`"
)
self = client or cls(**kwargs)
nveclient = self.client
nveclient.reset_method_cache()
out = sorted(
[
Expand All @@ -637,7 +664,7 @@ def get_available_models(
# nim model listing does not provide the type and we cannot know
# the model name ahead of time to guess the type.
# so we need to list all models.
if mode == "nim":
if mode == "nim" or not self.is_hosted:
list_all = True
if not filter:
filter = cls.__name__
Expand Down Expand Up @@ -668,6 +695,11 @@ def get_binding_model(self) -> Optional[str]:
return ""
return self.model

@deprecated(
since="0.0.17",
removal="0.1.0",
alternative="`base_url` in constructor",
)
def mode(
self,
mode: Optional[_MODE_TYPE] = "nvidia",
Expand All @@ -680,7 +712,7 @@ def mode(
force_clone: bool = True,
**kwargs: Any,
) -> Any: # todo: in python 3.11+ this should be typing.Self
"""Return a client swapped to a different mode"""
"""Deprecated: pass `base_url=...` to constructor instead."""
if isinstance(self, str):
raise ValueError("Please construct the model before calling mode()")
out = self if not force_clone else deepcopy(self)
Expand Down
59 changes: 37 additions & 22 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Any, Generator, List, Optional, Sequence

from langchain_core._api import deprecated, warn_deprecated
from langchain_core.callbacks.manager import Callbacks
from langchain_core.documents import Document
from langchain_core.documents.compressor import BaseDocumentCompressor
Expand Down Expand Up @@ -37,30 +38,50 @@ class Config:
max_batch_size: int = Field(
_default_batch_size, ge=1, description="The maximum batch size."
)
_is_hosted: bool = PrivateAttr(True)

def __init__(self, **kwargs: Any):
"""
Create a new NVIDIARerank document compressor.
Unless you plan to use the "nim" mode, you need to provide an API key. Your
options are -
0. Pass the key as the nvidia_api_key parameter.
1. Pass the key as the api_key parameter.
2. Set the NVIDIA_API_KEY environment variable, recommended.
Precedence is in the order listed above.
This class provides access to a NVIDIA NIM for reranking. By default, it
connects to a hosted NIM, but can be configured to connect to a local NIM
using the `base_url` parameter. An API key is required to connect to the
hosted NIM.
Args:
model (str): The model to use for reranking.
nvidia_api_key (str): The API key to use for connecting to the hosted NIM.
api_key (str): Alternative to nvidia_api_key.
base_url (str): The base URL of the NIM to connect to.
API Key:
- The recommended way to provide the API key is through the `NVIDIA_API_KEY`
environment variable.
"""
super().__init__(**kwargs)
self._client = _NVIDIAClient(
model=self.model,
api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
)
if base_url := kwargs.get("base_url", None):
# todo: detect if the base_url points to hosted NIM, this depends on
# moving from NVCF inference to API Catalog inference
self._is_hosted = False
self._client.client.base_url = base_url
self._client.client.endpoints["infer"] = "{base_url}/ranking"
self._client.client.endpoints = {
"infer": "{base_url}/ranking",
"status": None,
"models": None,
}

@property
def available_models(self) -> List[Model]:
"""
Get a list of available models that work with NVIDIARerank.
"""
if self._client.curr_mode == "nim":
if self._client.curr_mode == "nim" or not self._is_hosted:
# local NIM supports a single model and no /models endpoint
models = [
Model(
Expand Down Expand Up @@ -102,8 +123,10 @@ def get_available_models(
It is possible to get a list of all models, including those that are not
chat models, by setting the list_all parameter to True.
"""
if mode is not None:
warn_deprecated(since="0.0.17", removal="0.1.0", alternative="`base_url`")
self = cls(**kwargs).mode(mode=mode, **kwargs)
if mode == "nim":
if mode == "nim" or not self._is_hosted:
# ignoring list_all because there is one
models = self.available_models
else:
Expand All @@ -116,6 +139,11 @@ def get_available_models(
)
return models

@deprecated(
since="0.0.17",
removal="0.1.0",
alternative="`base_url` to constructor",
)
def mode(
self,
mode: Optional[_MODE_TYPE] = "nvidia",
Expand All @@ -125,20 +153,7 @@ def mode(
**kwargs: Any,
) -> NVIDIARerank:
"""
Change the mode.
There are two modes, "nvidia" and "nim". The "nvidia" mode is the default mode
and is used to interact with hosted NVIDIA AI endpoints. The "nim" mode is
used to interact with NVIDIA NIM endpoints, which are typically hosted
on-premises.
For the "nvidia" mode, the "api_key" parameter is available to specify your
API key. If not specified, the NVIDIA_API_KEY environment variable will be used.
For the "nim" mode, the "base_url" and "model" parameters are required. Set
base_url to the url of your NVIDIA NIM endpoint. For instance,
"https://localhost:9999/v1". Additionally, the "model" parameter must be set
to the name of the model inside the NIM.
Deprecated: use NVIDIARerank(base_url=...) instead.
"""
# set a default base_url for nim mode
if not base_url and mode == "nim":
Expand Down
2 changes: 1 addition & 1 deletion libs/ai-endpoints/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-nvidia-ai-endpoints"
version = "0.0.17"
version = "0.0.18"
description = "An integration package connecting NVIDIA AI Endpoints and LangChain"
authors = []
readme = "README.md"
Expand Down
10 changes: 4 additions & 6 deletions libs/ai-endpoints/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def get_mode(config: pytest.Config) -> dict:
nim_endpoint = config.getoption("--nim-endpoint")
if nim_endpoint:
return dict(mode="nim", base_url=nim_endpoint)
return dict(base_url=nim_endpoint)
return {}


Expand Down Expand Up @@ -50,14 +50,14 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
mode = get_mode(metafunc.config)

def get_all_models() -> List[Model]:
return ChatNVIDIA().mode(**mode).get_available_models(list_all=True, **mode)
return ChatNVIDIA.get_available_models(list_all=True, **mode)

if "chat_model" in metafunc.fixturenames:
models = [ChatNVIDIA._default_model]
if model := metafunc.config.getoption("chat_model_id"):
models = [model]
if metafunc.config.getoption("all_models"):
models = [model.id for model in ChatNVIDIA().mode(**mode).available_models]
models = [model.id for model in ChatNVIDIA(**mode).available_models]
metafunc.parametrize("chat_model", models, ids=models)

if "rerank_model" in metafunc.fixturenames:
Expand All @@ -67,9 +67,7 @@ def get_all_models() -> List[Model]:
# nim-mode reranking does not support model listing via /v1/models endpoint
if metafunc.config.getoption("all_models"):
if mode.get("mode", None) == "nim":
models = [
model.id for model in NVIDIARerank().mode(**mode).available_models
]
models = [model.id for model in NVIDIARerank(**mode).available_models]
else:
models = [
model.id
Expand Down
Loading

0 comments on commit 25be7fa

Please sign in to comment.