Merge pull request #43 from langchain-ai/mattf/deprecate-mode

Deprecate mode() in favor of __init__(base_url=...)
langchain-ai · May 24, 2024 · 25be7fa · 25be7fa
2 parents 178088f + 331ce23
commit 25be7fa
Show file tree

Hide file tree

Showing 9 changed files with 208 additions and 97 deletions.
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -19,16 +19,18 @@
     Tuple,
     Union,
 )
+from urllib.parse import urlparse
 
 import aiohttp
 import requests
-from langchain_core._api import deprecated
+from langchain_core._api import deprecated, warn_deprecated
 from langchain_core.pydantic_v1 import (
     BaseModel,
     Field,
     PrivateAttr,
     SecretStr,
     root_validator,
+    validator,
 )
 from requests.models import Response
 
@@ -113,6 +115,17 @@ def headers(self) -> dict:
                 )
         return headers_
 
+    @validator("base_url")
+    def validate_base_url(cls, v: str) -> str:
+        if v is not None:
+            result = urlparse(v)
+            # Ensure scheme and netloc (domain name) are present
+            if not (result.scheme and result.netloc):
+                raise ValueError(
+                    f"Invalid base_url, minimally needs scheme and netloc: {v}"
+                )
+        return v
+
     @root_validator(pre=True)
     def validate_model(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         """Validate and update model arguments, including API key and formatting"""
@@ -534,7 +547,16 @@ class _NVIDIAClient(BaseModel):
     _default_model: str = ""
     model: str = Field(description="Name of the model to invoke")
     infer_endpoint: str = Field("{base_url}/chat/completions")
-    curr_mode: _MODE_TYPE = Field("nvidia")
+    curr_mode: _MODE_TYPE = Field("nvidia")  # todo: remove this in 0.1
+    is_hosted: bool = Field(True)
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+        if "base_url" in kwargs:
+            self.is_hosted = False
+            self.curr_mode = "nim"
+            self.client.endpoints["infer"] = self.infer_endpoint
+            self.client.endpoints["models"] = "{base_url}/models"
 
     ####################################################################################
 
@@ -595,9 +617,9 @@ def available_functions(self) -> List[dict]:
     @property
     def available_models(self) -> List[Model]:
         """Map the available models that can be invoked."""
-        if self.curr_mode == "nim":
+        if self.curr_mode == "nim" or not self.is_hosted:
             return self.__class__.get_available_models(
-                client=self, mode="nim", base_url=self.client.base_url
+                client=self, base_url=self.client.base_url
             )
         else:
             return self.__class__.get_available_models(client=self)
@@ -625,7 +647,12 @@ def get_available_models(
         **kwargs: Any,
     ) -> List[Model]:
         """Map the available models that can be invoked. Callable from class"""
-        nveclient = (client or cls(**kwargs)).mode(mode, **kwargs).client
+        if mode is not None:
+            warn_deprecated(
+                name="mode", since="0.0.17", removal="0.1.0", alternative="`base_url`"
+            )
+        self = client or cls(**kwargs)
+        nveclient = self.client
         nveclient.reset_method_cache()
         out = sorted(
             [
@@ -637,7 +664,7 @@ def get_available_models(
         # nim model listing does not provide the type and we cannot know
         # the model name ahead of time to guess the type.
         # so we need to list all models.
-        if mode == "nim":
+        if mode == "nim" or not self.is_hosted:
             list_all = True
         if not filter:
             filter = cls.__name__
@@ -668,6 +695,11 @@ def get_binding_model(self) -> Optional[str]:
             return ""
         return self.model
 
+    @deprecated(
+        since="0.0.17",
+        removal="0.1.0",
+        alternative="`base_url` in constructor",
+    )
     def mode(
         self,
         mode: Optional[_MODE_TYPE] = "nvidia",
@@ -680,7 +712,7 @@ def mode(
         force_clone: bool = True,
         **kwargs: Any,
     ) -> Any:  # todo: in python 3.11+ this should be typing.Self
-        """Return a client swapped to a different mode"""
+        """Deprecated: pass `base_url=...` to constructor instead."""
         if isinstance(self, str):
             raise ValueError("Please construct the model before calling mode()")
         out = self if not force_clone else deepcopy(self)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -2,6 +2,7 @@
 
 from typing import Any, Generator, List, Optional, Sequence
 
+from langchain_core._api import deprecated, warn_deprecated
 from langchain_core.callbacks.manager import Callbacks
 from langchain_core.documents import Document
 from langchain_core.documents.compressor import BaseDocumentCompressor
@@ -37,30 +38,50 @@ class Config:
     max_batch_size: int = Field(
         _default_batch_size, ge=1, description="The maximum batch size."
     )
+    _is_hosted: bool = PrivateAttr(True)
 
     def __init__(self, **kwargs: Any):
         """
         Create a new NVIDIARerank document compressor.
 
-        Unless you plan to use the "nim" mode, you need to provide an API key. Your
-        options are -
-         0. Pass the key as the nvidia_api_key parameter.
-         1. Pass the key as the api_key parameter.
-         2. Set the NVIDIA_API_KEY environment variable, recommended.
-        Precedence is in the order listed above.
+        This class provides access to a NVIDIA NIM for reranking. By default, it
+        connects to a hosted NIM, but can be configured to connect to a local NIM
+        using the `base_url` parameter. An API key is required to connect to the
+        hosted NIM.
+
+        Args:
+            model (str): The model to use for reranking.
+            nvidia_api_key (str): The API key to use for connecting to the hosted NIM.
+            api_key (str): Alternative to nvidia_api_key.
+            base_url (str): The base URL of the NIM to connect to.
+
+        API Key:
+        - The recommended way to provide the API key is through the `NVIDIA_API_KEY`
+            environment variable.
         """
         super().__init__(**kwargs)
         self._client = _NVIDIAClient(
             model=self.model,
             api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
         )
+        if base_url := kwargs.get("base_url", None):
+            # todo: detect if the base_url points to hosted NIM, this depends on
+            #       moving from NVCF inference to API Catalog inference
+            self._is_hosted = False
+            self._client.client.base_url = base_url
+            self._client.client.endpoints["infer"] = "{base_url}/ranking"
+            self._client.client.endpoints = {
+                "infer": "{base_url}/ranking",
+                "status": None,
+                "models": None,
+            }
 
     @property
     def available_models(self) -> List[Model]:
         """
         Get a list of available models that work with NVIDIARerank.
         """
-        if self._client.curr_mode == "nim":
+        if self._client.curr_mode == "nim" or not self._is_hosted:
             # local NIM supports a single model and no /models endpoint
             models = [
                 Model(
@@ -102,8 +123,10 @@ def get_available_models(
         It is possible to get a list of all models, including those that are not
         chat models, by setting the list_all parameter to True.
         """
+        if mode is not None:
+            warn_deprecated(since="0.0.17", removal="0.1.0", alternative="`base_url`")
         self = cls(**kwargs).mode(mode=mode, **kwargs)
-        if mode == "nim":
+        if mode == "nim" or not self._is_hosted:
             # ignoring list_all because there is one
             models = self.available_models
         else:
@@ -116,6 +139,11 @@ def get_available_models(
             )
         return models
 
+    @deprecated(
+        since="0.0.17",
+        removal="0.1.0",
+        alternative="`base_url` to constructor",
+    )
     def mode(
         self,
         mode: Optional[_MODE_TYPE] = "nvidia",
@@ -125,20 +153,7 @@ def mode(
         **kwargs: Any,
     ) -> NVIDIARerank:
         """
-        Change the mode.
-
-        There are two modes, "nvidia" and "nim". The "nvidia" mode is the default mode
-        and is used to interact with hosted NVIDIA AI endpoints. The "nim" mode is
-        used to interact with NVIDIA NIM endpoints, which are typically hosted
-        on-premises.
-
-        For the "nvidia" mode, the "api_key" parameter is available to specify your
-        API key. If not specified, the NVIDIA_API_KEY environment variable will be used.
-
-        For the "nim" mode, the "base_url" and "model" parameters are required. Set
-        base_url to the url of your NVIDIA NIM endpoint. For instance,
-        "https://localhost:9999/v1". Additionally, the "model" parameter must be set
-        to the name of the model inside the NIM.
+        Deprecated: use NVIDIARerank(base_url=...) instead.
         """
         # set a default base_url for nim mode
         if not base_url and mode == "nim":

diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-nvidia-ai-endpoints"
-version = "0.0.17"
+version = "0.0.18"
 description = "An integration package connecting NVIDIA AI Endpoints and LangChain"
 authors = []
 readme = "README.md"

diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -9,7 +9,7 @@
 def get_mode(config: pytest.Config) -> dict:
     nim_endpoint = config.getoption("--nim-endpoint")
     if nim_endpoint:
-        return dict(mode="nim", base_url=nim_endpoint)
+        return dict(base_url=nim_endpoint)
     return {}
 
 
@@ -50,14 +50,14 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
     mode = get_mode(metafunc.config)
 
     def get_all_models() -> List[Model]:
-        return ChatNVIDIA().mode(**mode).get_available_models(list_all=True, **mode)
+        return ChatNVIDIA.get_available_models(list_all=True, **mode)
 
     if "chat_model" in metafunc.fixturenames:
         models = [ChatNVIDIA._default_model]
         if model := metafunc.config.getoption("chat_model_id"):
             models = [model]
         if metafunc.config.getoption("all_models"):
-            models = [model.id for model in ChatNVIDIA().mode(**mode).available_models]
+            models = [model.id for model in ChatNVIDIA(**mode).available_models]
         metafunc.parametrize("chat_model", models, ids=models)
 
     if "rerank_model" in metafunc.fixturenames:
@@ -67,9 +67,7 @@ def get_all_models() -> List[Model]:
         # nim-mode reranking does not support model listing via /v1/models endpoint
         if metafunc.config.getoption("all_models"):
             if mode.get("mode", None) == "nim":
-                models = [
-                    model.id for model in NVIDIARerank().mode(**mode).available_models
-                ]
+                models = [model.id for model in NVIDIARerank(**mode).available_models]
             else:
                 models = [
                     model.id