Merge pull request #51 from langchain-ai/raspawar/default-llm-model

Provide default model in local NIM mode
langchain-ai · Jun 25, 2024 · c16e959 · c16e959
2 parents 3727cd4 + af78e07
commit c16e959
Show file tree

Hide file tree

Showing 10 changed files with 175 additions and 19 deletions.
diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -137,6 +137,28 @@
     "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7d4a4e2e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/raspawar/langchain-nvidia/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py:583: UserWarning: Default model is set as: meta/llama3-8b-instruct. \n",
+      "Set model using model parameter. \n",
+      "To get available models use available_models property.\n",
+      "  UserWarning,\n"
+     ]
+    }
+   ],
+   "source": [
+    "# OR connect to an embedding NIM running at localhost:8000, with default model(first available model)\n",
+    "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "71d37987-d568-4a73-9d2a-8bd86323f8bf",

diff --git a/libs/ai-endpoints/docs/text_embedding/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/text_embedding/nvidia_ai_endpoints.ipynb
@@ -143,14 +143,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n",
     "\n",
     "# connect to an embedding NIM running at localhost:8080\n",
-    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:8080/v1\")"
+    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:9080/v1\", model=\"NV-Embed-QA\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/raspawar/langchain-nvidia/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py:579: UserWarning: Default model is set as: NV-Embed-QA. \n",
+      "Set model using model parameter. \n",
+      "To get available models use available_models property.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# connect to an default embedding NIM running at localhost:8080\n",
+    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:9080/v1\")"
    ]
   },
   {
@@ -521,7 +542,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -185,6 +185,10 @@ def available_models(self) -> list[Model]:
                 # so we'll let it through. use of this model will be
                 # accompanied by a warning.
                 model = Model(id=element["id"])
+
+            # add base model for local-nim mode
+            model.base_model = element.get("root")
+
             self._available_models.append(model)
 
         return self._available_models
@@ -510,7 +514,7 @@ class _NVIDIAClient(BaseModel):
 
     client: NVEModel = Field(NVEModel)
 
-    model: str = Field(..., description="Name of the model to invoke")
+    model: Optional[str] = Field(..., description="Name of the model to invoke")
     is_hosted: bool = Field(True)
 
     ####################################################################################
@@ -525,19 +529,22 @@ def _preprocess_args(cls, values: Any) -> Any:
                 "ai.api.nvidia.com",
             ]
 
+        # set default model for hosted endpoint
+        if values["is_hosted"] and not values["model"]:
+            values["model"] = values["default_model"]
+
         return values
 
     @root_validator
     def _postprocess_args(cls, values: Any) -> Any:
+        name = values.get("model")
         if values["is_hosted"]:
             if not values["client"].api_key:
                 warnings.warn(
                     "An API key is required for the hosted NIM. "
                     "This will become an error in the future.",
                     UserWarning,
                 )
-
-            name = values.get("model")
             if model := determine_model(name):
                 values["model"] = model.id
                 # not all models are on https://integrate.api.nvidia.com/v1,
@@ -558,7 +565,30 @@ def _postprocess_args(cls, values: Any) -> Any:
                         raise ValueError(
                             f"Model {name} is unknown, check `available_models`"
                         )
-
+        else:
+            # set default model
+            if not name:
+                if not (client := values.get("client")):
+                    warnings.warn(f"Unable to determine validity of {name}")
+                else:
+                    valid_models = [
+                        model.id
+                        for model in client.available_models
+                        if model.base_model and model.id == model.base_model
+                    ]
+                    name = next(iter(valid_models), None)
+                    if name:
+                        warnings.warn(
+                            f"Default model is set as: {name}. \n"
+                            "Set model using model parameter. \n"
+                            "To get available models use available_models property.",
+                            UserWarning,
+                        )
+                        values["model"] = name
+                    else:
+                        raise ValueError(
+                            f"Model {name} is unknown, check `available_models`"
+                        )
         return values
 
     @classmethod
@@ -586,18 +616,17 @@ def get_available_models(
         **kwargs: Any,
     ) -> List[Model]:
         """Retrieve a list of available models."""
-        available = [
-            model for model in self.client.available_models if model.client == filter
-        ]
+
+        available = self.client.available_models
 
         # if we're talking to a hosted endpoint, we mix in the known models
         # because they are not all discoverable by listing. for instance,
         # the NV-Embed-QA and VLM models are hosted on ai.api.nvidia.com
         # instead of integrate.api.nvidia.com.
         if self.is_hosted:
-            known = set(
-                model for model in MODEL_TABLE.values() if model.client == filter
-            )
-            available = list(set(available) | known)
+            known = set(MODEL_TABLE.values())
+            available = [
+                model for model in set(available) | known if model.client == filter
+            ]
 
         return available
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -20,6 +20,7 @@ class Model(BaseModel):
     client: Optional[str] = None
     endpoint: Optional[str] = None
     aliases: Optional[list] = None
+    base_model: Optional[str] = None
 
     def __hash__(self) -> int:
         return hash(self.id)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -136,7 +136,7 @@ class ChatNVIDIA(BaseChatModel):
         "https://integrate.api.nvidia.com/v1",
         description="Base url for model listing an invocation",
     )
-    model: str = Field(_default_model, description="Name of the model to invoke")
+    model: Optional[str] = Field(description="Name of the model to invoke")
     temperature: Optional[float] = Field(description="Sampling temperature in [0, 1]")
     max_tokens: Optional[int] = Field(
         1024, description="Maximum # of tokens to generate"
@@ -173,6 +173,7 @@ def __init__(self, **kwargs: Any):
         self._client = _NVIDIAClient(
             base_url=self.base_url,
             model=self.model,
+            default_model=self._default_model,
             api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
             infer_path="{base_url}/chat/completions",
         )

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -33,7 +33,7 @@ class Config:
         "https://integrate.api.nvidia.com/v1",
         description="Base url for model listing an invocation",
     )
-    model: str = Field(_default_model, description="Name of the model to invoke")
+    model: Optional[str] = Field(description="Name of the model to invoke")
     truncate: Literal["NONE", "START", "END"] = Field(
         default="NONE",
         description=(
@@ -72,6 +72,7 @@ def __init__(self, **kwargs: Any):
         self._client = _NVIDIAClient(
             base_url=self.base_url,
             model=self.model,
+            default_model=self._default_model,
             api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
             infer_path="{base_url}/embeddings",
         )

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -35,9 +35,7 @@ class Config:
         description="Base url for model listing an invocation",
     )
     top_n: int = Field(5, ge=0, description="The number of documents to return.")
-    model: str = Field(
-        _default_model_name, description="The model to use for reranking."
-    )
+    model: Optional[str] = Field(description="The model to use for reranking.")
     max_batch_size: int = Field(
         _default_batch_size, ge=1, description="The maximum batch size."
     )
@@ -65,6 +63,7 @@ def __init__(self, **kwargs: Any):
         self._client = _NVIDIAClient(
             base_url=self.base_url,
             model=self.model,
+            default_model=self._default_model_name,
             api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
             infer_path="{base_url}/ranking",
         )

diff --git a/libs/ai-endpoints/tests/unit_tests/test_api_key.py b/libs/ai-endpoints/tests/unit_tests/test_api_key.py
@@ -4,6 +4,7 @@
 
 import pytest
 from langchain_core.pydantic_v1 import SecretStr
+from requests_mock import Mocker
 
 
 @contextmanager
@@ -17,6 +18,24 @@ def no_env_var(var: str) -> Generator[None, None, None]:
             os.environ[var] = val
 
 
+@pytest.fixture(autouse=True)
+def mock_v1_local_models(requests_mock: Mocker) -> None:
+    requests_mock.get(
+        "https://test_url/v1/models",
+        json={
+            "data": [
+                {
+                    "id": "model1",
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": "model1",
+                },
+            ]
+        },
+    )
+
+
 def test_create_without_api_key(public_class: type) -> None:
     with no_env_var("NVIDIA_API_KEY"):
         with pytest.warns(UserWarning):

diff --git a/libs/ai-endpoints/tests/unit_tests/test_base_url.py b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
@@ -1,4 +1,5 @@
 import pytest
+from requests_mock import Mocker
 
 
 @pytest.mark.parametrize(
@@ -24,6 +25,24 @@ def test_param_base_url_hosted(public_class: type, base_url: str) -> None:
     assert client._client.is_hosted
 
 
+@pytest.fixture(autouse=True)
+def mock_v1_local_models(requests_mock: Mocker, base_url: str) -> None:
+    requests_mock.get(
+        f"{base_url}/models",
+        json={
+            "data": [
+                {
+                    "id": "model1",
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": "model1",
+                },
+            ]
+        },
+    )
+
+
 @pytest.mark.parametrize(
     "base_url",
     [

diff --git a/libs/ai-endpoints/tests/unit_tests/test_model.py b/libs/ai-endpoints/tests/unit_tests/test_model.py
@@ -26,6 +26,31 @@ def mock_v1_models(requests_mock: Mocker, known_unknown: str) -> None:
     )
 
 
+@pytest.fixture(autouse=True)
+def mock_v1_local_models(requests_mock: Mocker, known_unknown: str) -> None:
+    requests_mock.get(
+        "http://localhost:8000/v1/models",
+        json={
+            "data": [
+                {
+                    "id": known_unknown,
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": known_unknown,
+                },
+                {
+                    "id": "lora1",
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": known_unknown,
+                },
+            ]
+        },
+    )
+
+
 @pytest.mark.parametrize(
     "alias",
     [
@@ -84,3 +109,22 @@ def test_unknown_unknown(public_class: type) -> None:
     with pytest.raises(ValueError) as e:
         public_class(model="test/unknown-unknown", nvidia_api_key="a-bogus-key")
     assert "unknown" in str(e.value)
+
+
+def test_default_known(public_class: type, known_unknown: str) -> None:
+    """
+    Test that a model in the model table will be accepted.
+    """
+    # check if default model is getting set
+    with pytest.warns(UserWarning):
+        x = public_class(base_url="http://localhost:8000/v1")
+        assert x.model == known_unknown
+
+
+def test_default_lora(public_class: type) -> None:
+    """
+    Test that a model in the model table will be accepted.
+    """
+    # find a model that matches the public_class under test
+    x = public_class(base_url="http://localhost:8000/v1", model="lora1")
+    assert x.model == "lora1"