From e07839b1534b056049c94bcc26a464d7d586839e Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 5 Aug 2024 06:37:56 -0400
Subject: [PATCH 01/31] mark mistralai/codestral-22b-instruct-v0.1 as
 supporting structured output

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 075a470f..5aabea07 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -257,6 +257,7 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="chat",
         client="ChatNVIDIA",
         aliases=["ai-codestral-22b-instruct-v01"],
+        supports_structured_output=True,
     ),
     "google/gemma-2-9b-it": Model(
         id="google/gemma-2-9b-it",

From dbad718d7d8c15e6632ab2adab6d97ae3f03dc29 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 6 Aug 2024 11:44:57 -0400
Subject: [PATCH 02/31] add tests to align pydantic support,
 langchain_core.pydantic_v1 + pydantic.v1 + pydantic

---
 libs/ai-endpoints/scripts/check_pydantic.sh   |  2 +-
 .../unit_tests/test_structured_output.py      | 59 ++++++++++++++++++-
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/libs/ai-endpoints/scripts/check_pydantic.sh b/libs/ai-endpoints/scripts/check_pydantic.sh
index 06b5bb81..d0fa31d6 100755
--- a/libs/ai-endpoints/scripts/check_pydantic.sh
+++ b/libs/ai-endpoints/scripts/check_pydantic.sh
@@ -14,7 +14,7 @@ fi
 repository_path="$1"
 
 # Search for lines matching the pattern within the specified repository
-result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
+result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic' | grep -v "# ignore: check_pydantic")
 
 # Check if any matching lines were found
 if [ -n "$result" ]; then
diff --git a/libs/ai-endpoints/tests/unit_tests/test_structured_output.py b/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
index 0c8aa626..053b10b3 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
@@ -1,14 +1,18 @@
 import enum
 import warnings
-from typing import Callable, List, Optional
+from typing import Callable, List, Optional, Type
 
 import pytest
-from langchain_core.pydantic_v1 import BaseModel, Field
+import requests_mock
+from langchain_core.pydantic_v1 import BaseModel as lc_pydanticV1BaseModel
+from langchain_core.pydantic_v1 import Field
+from pydantic import BaseModel as pydanticV2BaseModel  # ignore: check_pydantic
+from pydantic.v1 import BaseModel as pydanticV1BaseModel  # ignore: check_pydantic
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
 
-class Joke(BaseModel):
+class Joke(lc_pydanticV1BaseModel):
     """Joke to tell user."""
 
     setup: str = Field(description="The setup of the joke")
@@ -136,3 +140,52 @@ def test_stream_enum_incomplete(
     for chunk in structured_llm.stream("This is ignored."):
         response = chunk
     assert response is None
+
+
+@pytest.mark.parametrize(
+    "pydanticBaseModel",
+    [
+        lc_pydanticV1BaseModel,
+        pydanticV1BaseModel,
+        pydanticV2BaseModel,
+    ],
+    ids=["lc-pydantic-v1", "pydantic-v1", "pydantic-v2"],
+)
+def test_pydantic_version(
+    requests_mock: requests_mock.Mocker,
+    pydanticBaseModel: Type,
+) -> None:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        json={
+            "id": "chatcmpl-ID",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "BOGUS",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": '{"name": "Sam Doe"}',
+                    },
+                    "logprobs": None,
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 22,
+                "completion_tokens": 20,
+                "total_tokens": 42,
+            },
+            "system_fingerprint": None,
+        },
+    )
+
+    class Person(pydanticBaseModel):  # type: ignore
+        name: str
+
+    llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Person)
+    response = llm.invoke("This is ignored.")
+    assert isinstance(response, Person)
+    assert response.name == "Sam Doe"

From be3cff28d740211d4fcff832463771e708d5d081 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 6 Aug 2024 11:45:26 -0400
Subject: [PATCH 03/31] use is_basemodel_subclass to detect BaseModel for
 with_structured_output

---
 .../chat_models.py                            | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
index 0b2e1408..e64e9d2d 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -50,6 +50,7 @@
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool
 from langchain_core.utils.function_calling import convert_to_openai_tool
+from langchain_core.utils.pydantic import is_basemodel_subclass
 
 from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 from langchain_nvidia_ai_endpoints._statics import Model
@@ -679,24 +680,6 @@ class Choices(enum.Enum):
             output_parser: BaseOutputParser = JsonOutputParser()
             nvext_param: Dict[str, Any] = {"guided_json": schema}
 
-        elif issubclass(schema, BaseModel):
-            # PydanticOutputParser does not support streaming. what we do
-            # instead is ignore all inputs that are incomplete wrt the
-            # underlying Pydantic schema. if the entire input is invalid,
-            # we return None.
-            class ForgivingPydanticOutputParser(PydanticOutputParser):
-                def parse_result(
-                    self, result: List[Generation], *, partial: bool = False
-                ) -> Any:
-                    try:
-                        return super().parse_result(result, partial=partial)
-                    except OutputParserException:
-                        pass
-                    return None
-
-            output_parser = ForgivingPydanticOutputParser(pydantic_object=schema)
-            nvext_param = {"guided_json": schema.schema()}
-
         elif issubclass(schema, enum.Enum):
             # langchain's EnumOutputParser is not in langchain_core
             # and doesn't support streaming. this is a simple implementation
@@ -724,6 +707,25 @@ def parse(self, response: str) -> Any:
                 )
             output_parser = EnumOutputParser(enum=schema)
             nvext_param = {"guided_choice": choices}
+
+        elif is_basemodel_subclass(schema):
+            # PydanticOutputParser does not support streaming. what we do
+            # instead is ignore all inputs that are incomplete wrt the
+            # underlying Pydantic schema. if the entire input is invalid,
+            # we return None.
+            class ForgivingPydanticOutputParser(PydanticOutputParser):
+                def parse_result(
+                    self, result: List[Generation], *, partial: bool = False
+                ) -> Any:
+                    try:
+                        return super().parse_result(result, partial=partial)
+                    except OutputParserException:
+                        pass
+                    return None
+
+            output_parser = ForgivingPydanticOutputParser(pydantic_object=schema)
+            nvext_param = {"guided_json": schema.schema()}
+
         else:
             raise ValueError(
                 "Schema must be a Pydantic object, a dictionary "

From 4670520765161e04df1e6c49a1c3c8f6dc8d3584 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 7 Aug 2024 06:49:50 -0400
Subject: [PATCH 04/31] mark nv-mistralai/mistral-nemo-12b-instruct as
 supporting tool calls

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 5aabea07..2aa64387 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -287,6 +287,7 @@ def validate_client(cls, client: str, values: dict) -> str:
         id="nv-mistralai/mistral-nemo-12b-instruct",
         model_type="chat",
         client="ChatNVIDIA",
+        supports_tools=True,
         supports_structured_output=True,
     ),
     "meta/llama-3.1-8b-instruct": Model(

From 466bf470d78463aedaa7b368dab7650f97e8f05b Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 7 Aug 2024 06:51:11 -0400
Subject: [PATCH 05/31] add response parsing unit tests to confirm no-arg tool
 call and single-arg tool call parsing works

---
 .../tests/unit_tests/test_bind_tools.py       | 129 +++++++++++++++++-
 1 file changed, 128 insertions(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
index 4876f5f1..2c7ae73f 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
@@ -1,7 +1,12 @@
+import json
 import warnings
-from typing import Any
+from functools import reduce
+from operator import add
+from typing import Any, List
 
 import pytest
+import requests_mock
+from langchain_core.messages import AIMessage, AIMessageChunk
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import tool
 
@@ -62,3 +67,125 @@ def test_bind_tool_and_select_negative(tools: Any, choice: str) -> None:
     with pytest.raises(ValueError) as e:
         ChatNVIDIA(api_key="BOGUS").bind_tools(tools=tools, tool_choice=choice)
     assert "not found in the tools list" in str(e.value)
+
+
+@pytest.fixture
+def mock_v1_models(requests_mock: requests_mock.Mocker) -> None:
+    requests_mock.get("https://integrate.api.nvidia.com/v1/models", json={"data": []})
+
+
+###
+# the invoke/stream response_parsing tests are here because of a bug in the
+# server response where "" was returned as the arguments for the tool call.
+# we're verifying expected results parse correctly.
+###
+
+
+@pytest.mark.parametrize(
+    "arguments",
+    [
+        r"{}",
+        # r"",
+        r'{"input": 3}',
+    ],
+    ids=[
+        "no-args-oai",
+        #  "no-args-nim",
+        "one-arg-int",
+    ],
+)
+def test_invoke_response_parsing(
+    requests_mock: requests_mock.Mocker,
+    mock_v1_models: None,
+    arguments: str,
+) -> None:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        json={
+            "id": "chatcmpl-ID",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "BOGUS",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "tool-ID",
+                                "type": "function",
+                                "function": {
+                                    "name": "magic",
+                                    "arguments": arguments,
+                                },
+                            }
+                        ],
+                    },
+                    "logprobs": None,
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 22,
+                "completion_tokens": 20,
+                "total_tokens": 42,
+            },
+            "system_fingerprint": None,
+        },
+    )
+
+    llm = ChatNVIDIA(api_key="BOGUS")
+    response = llm.invoke("What's the magic?")
+    assert isinstance(response, AIMessage)
+    assert response.tool_calls
+    assert response.tool_calls[0]["name"] == "magic"
+    assert response.tool_calls[0]["args"] == json.loads(arguments)
+
+
+@pytest.mark.parametrize(
+    "argument_chunks",
+    [
+        [
+            r'"{}"',
+        ],
+        [
+            r'""',
+        ],
+        [
+            r'"{\""',
+            r'"input\""',
+            r'"\":"',
+            r"3",
+            r'"}"',
+        ],
+        [r'"{\"intput\": 3}"'],
+    ],
+    ids=["no-args-oai", "no-args-nim", "one-arg-int-oai", "one-arg-int-nim"],
+)
+def test_stream_response_parsing(
+    requests_mock: requests_mock.Mocker,
+    mock_v1_models: None,
+    argument_chunks: List[str],
+) -> None:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        text="\n\n".join(
+            [
+                'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"ID1","type":"function","function":{"name":"magic","arguments":""}}]},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                *[
+                    f'data: {{"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[{{"index":0,"delta":{{"tool_calls":[{{"index":0,"function":{{"arguments":{argument}}}}}]}},"logprobs":null,"finish_reason":null}}]}}'  # noqa: E501
+                    for argument in argument_chunks
+                ],
+                'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}',  # noqa: E501
+                "data: [DONE]",
+            ]
+        ),
+    )
+
+    llm = ChatNVIDIA(api_key="BOGUS")
+    response = reduce(add, llm.stream("What's the magic?"))
+    assert isinstance(response, AIMessageChunk)
+    assert response.tool_calls
+    assert response.tool_calls[0]["name"] == "magic"

From af781f91d4570cf4716564317b3f7023a2319080 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 7 Aug 2024 05:55:35 -0500
Subject: [PATCH 06/31] Update _scheduled_test.yml

change scheduled time from 13:00GMT to 8:00GMT so results are available by 6AM ET
---
 .github/workflows/_scheduled_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_scheduled_test.yml b/.github/workflows/_scheduled_test.yml
index 849b8042..2572e940 100644
--- a/.github/workflows/_scheduled_test.yml
+++ b/.github/workflows/_scheduled_test.yml
@@ -4,7 +4,7 @@ run-name: langchain-nvidia Scheduled tests
 on:
   workflow_dispatch:
   schedule:
-    - cron:  '0 13 * * *'
+    - cron:  '0 8 * * *'
 
 env:
   POETRY_VERSION: "1.7.1"

From 91a336da282683f87b306d5953f6b13de3a23712 Mon Sep 17 00:00:00 2001
From: raspawar <raspawar@nvidia.com>
Date: Wed, 7 Aug 2024 21:17:31 +0530
Subject: [PATCH 07/31] change min version for langchain core

---
 libs/ai-endpoints/poetry.lock    | 2 +-
 libs/ai-endpoints/pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/ai-endpoints/poetry.lock b/libs/ai-endpoints/poetry.lock
index 1a4176d7..457bc907 100644
--- a/libs/ai-endpoints/poetry.lock
+++ b/libs/ai-endpoints/poetry.lock
@@ -1444,4 +1444,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "eaa16f38cabc695e4ef785ee4d7e87635669ef3323931d2514747a13a3db8ac8"
+content-hash = "6acf7fa18caa83c76267748e07a1f53c0f5eef8a3b1d8cff07169c224b200a2d"
diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
index 85308c40..ee94d7f9 100644
--- a/libs/ai-endpoints/pyproject.toml
+++ b/libs/ai-endpoints/pyproject.toml
@@ -12,7 +12,7 @@ license = "MIT"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
-langchain-core = ">=0.1.47,<0.3"
+langchain-core = ">=0.2.22,<0.3"
 aiohttp = "^3.9.1"
 pillow = ">=10.0.0,<11.0.0"
 

From 432baa329f4edf741e4e15e136e879ffb5bcd1eb Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 7 Aug 2024 11:04:18 -0400
Subject: [PATCH 08/31] add regression tests for ValueError when assistant
 content is None

---
 .../tests/unit_tests/test_bind_tools.py       | 73 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
index 2c7ae73f..a8f044f9 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
@@ -6,7 +6,13 @@
 
 import pytest
 import requests_mock
-from langchain_core.messages import AIMessage, AIMessageChunk
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    HumanMessage,
+    ToolMessage,
+)
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import tool
 
@@ -189,3 +195,68 @@ def test_stream_response_parsing(
     assert isinstance(response, AIMessageChunk)
     assert response.tool_calls
     assert response.tool_calls[0]["name"] == "magic"
+
+
+def test_regression_parsing_human_ai_tool_invoke(
+    requests_mock: requests_mock.Mocker,
+) -> None:
+    """
+    a bug existed in the inference for sequence -
+     0. messages = [human message]
+     1. messages.append(llm.invoke(messages))
+     2. llm.invoke(messages) <- raised ValueError: Message ... has no content
+    """
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        json={
+            "id": "chatcmpl-ID",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "BOGUS",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": [
+                            {
+                                "id": "tool-ID",
+                                "type": "function",
+                                "function": {
+                                    "name": "magic",
+                                    "arguments": "{}",
+                                },
+                            }
+                        ],
+                    },
+                    "logprobs": None,
+                    "finish_reason": "tool_calls",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 22,
+                "completion_tokens": 20,
+                "total_tokens": 42,
+            },
+            "system_fingerprint": None,
+        },
+    )
+
+    llm = ChatNVIDIA(api_key="BOGUS")
+    messages: List[BaseMessage] = [HumanMessage("THIS IS IGNORED")]
+    response0 = llm.invoke(messages)
+    messages.append(response0)
+    messages.append(ToolMessage(content="SO IS THIS", tool_call_id="BOGUS"))
+    llm.invoke(messages)
+
+
+def test_regression_ai_null_content(
+    requests_mock: requests_mock.Mocker,
+) -> None:
+    requests_mock.post("https://integrate.api.nvidia.com/v1/chat/completions", json={})
+    llm = ChatNVIDIA(api_key="BOGUS")
+    assistant = AIMessage(content="SKIPPED")
+    assistant.content = None  # type: ignore
+    llm.invoke([assistant])
+    llm.stream([assistant])

From 9fbdd1b6ff1570255a912e504049b92315c26f20 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Thu, 8 Aug 2024 10:49:47 -0400
Subject: [PATCH 09/31] add PYTEST_ARGS option to Makefile, e.g. make test
 PYTEST_ARGS=-v

---
 libs/ai-endpoints/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/ai-endpoints/Makefile b/libs/ai-endpoints/Makefile
index dd4ffcbb..52863145 100644
--- a/libs/ai-endpoints/Makefile
+++ b/libs/ai-endpoints/Makefile
@@ -7,16 +7,16 @@ all: help
 TEST_FILE ?= tests/unit_tests/
 
 test:
-	poetry run pytest $(TEST_FILE)
+	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
 
 tests:
-	poetry run pytest $(TEST_FILE)
+	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
 
 check_imports: $(shell find langchain_nvidia_ai_endpoints -name '*.py')
 	poetry run python ./scripts/check_imports.py $^
 
 integration_tests:
-	poetry run pytest tests/integration_tests
+	poetry run pytest tests/integration_tests $(PYTEST_ARGS)
 
 
 ######################

From 9083688f152f6c0f2734432098a1963790c08173 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Fri, 9 Aug 2024 12:55:56 -0400
Subject: [PATCH 10/31] add mistralai/mistral-large-2-instruct to set of
 supported chat + tool + structured output models

---
 .../ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 2aa64387..e9b280e4 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -332,6 +332,13 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="chat",
         client="ChatNVIDIA",
     ),
+    "mistralai/mistral-large-2-instruct": Model(
+        id="mistralai/mistral-large-2-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+        supports_tools=True,
+        supports_structured_output=True,
+    ),
 }
 
 QA_MODEL_TABLE = {

From ed364bce25c28d20f980f9b52a6ec7dc118a70c0 Mon Sep 17 00:00:00 2001
From: haydeniw <hwolff@nvidia.com>
Date: Mon, 12 Aug 2024 11:49:40 -0700
Subject: [PATCH 11/31] removed langgraph react agent

---
 cookbook/nvidia_nim_agents_llama3.1.ipynb | 157 +++++++++++++++-------
 1 file changed, 105 insertions(+), 52 deletions(-)

diff --git a/cookbook/nvidia_nim_agents_llama3.1.ipynb b/cookbook/nvidia_nim_agents_llama3.1.ipynb
index e3e13dce..5e354d16 100644
--- a/cookbook/nvidia_nim_agents_llama3.1.ipynb
+++ b/cookbook/nvidia_nim_agents_llama3.1.ipynb
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "aaeb35a9",
    "metadata": {},
    "outputs": [],
@@ -89,7 +89,7 @@
     "import getpass\n",
     "import os\n",
     "\n",
-    "os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
+    "os.environ[\"NVIDIA_API_KEY\"] = \"nvapi-xxx\""
    ]
   },
   {
@@ -102,7 +102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "id": "579881ca",
    "metadata": {},
    "outputs": [],
@@ -126,7 +126,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "id": "c8832545-d3c1-404f-afdb-6a00891f84c9",
    "metadata": {},
    "outputs": [],
@@ -134,12 +134,12 @@
     "import getpass\n",
     "import os\n",
     "\n",
-    "os.environ[\"TAVILY_API_KEY\"] = getpass.getpass(\"Enter your Tavily API key: \")"
+    "os.environ[\"TAVILY_API_KEY\"] = \"tvly-xxx\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "id": "e1d1511d",
    "metadata": {},
    "outputs": [],
@@ -160,15 +160,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "da73ae35",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/nvidia/test/lib/python3.10/site-packages/langchain/hub.py:86: DeprecationWarning: The `langchainhub sdk` is deprecated.\n",
+      "Please use the `langsmith sdk` instead:\n",
+      "  pip install langsmith\n",
+      "Use the `pull_prompt` method.\n",
+      "  res_dict = client.pull_repo(owner_repo_commit)\n"
+     ]
+    }
+   ],
    "source": [
-    "from langgraph.prebuilt import create_react_agent\n",
+    "from langchain import hub\n",
+    "from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
     "from langchain.callbacks.tracers import ConsoleCallbackHandler\n",
     "\n",
-    "app = create_react_agent(llm, tools)"
+    "prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
+    "agent = create_openai_tools_agent(llm, tools, prompt)"
    ]
   },
   {
@@ -181,19 +195,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "id": "02a109cc",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `tavily_search_results_json` with `{'query': 'langchain definition'}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://python.langchain.com/v0.2/docs/introduction/', 'content': \"Introduction. LangChain is a framework for developing applications powered by large language models (LLMs).. LangChain simplifies every stage of the LLM application lifecycle: Development: Build your applications using LangChain's open-source building blocks, components, and third-party integrations.Use LangGraph to build stateful agents with first-class streaming and human-in-the-loop support.\"}]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `tavily_search_results_json` with `{'query': 'langchain'}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://python.langchain.com/v0.2/docs/introduction/', 'content': \"LangChain is a framework for developing applications powered by large language models (LLMs). Learn how to use LangChain's open-source libraries, components, and integrations to build, deploy, and evaluate LLM applications and agents.\"}]\u001b[0m\u001b[32;1m\u001b[1;3mBased on the information provided, LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, deployment, and evaluation.\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'What is langchain?',\n",
+       " 'output': 'Based on the information provided, LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, deployment, and evaluation.'}"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "query = \"What is LangChain?\"\n",
-    "messages = app.invoke({\"messages\": [(\"human\", query)]}, config={'callbacks': [ConsoleCallbackHandler()]})\n",
-    "{\n",
-    "    \"input\": query,\n",
-    "    \"output\": messages[\"messages\"][-1].content,\n",
-    "}"
+    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n",
+    "agent_executor.invoke({\"input\": \"What is langchain?\"})"
    ]
   },
   {
@@ -218,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "id": "e9d8ed5f-b6e9-495f-85ff-e431d39475c4",
    "metadata": {},
    "outputs": [],
@@ -244,7 +286,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "id": "b71d7d05-d3ec-4005-911c-3e44df8102b4",
    "metadata": {},
    "outputs": [],
@@ -263,28 +305,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "id": "64a0eead-ee86-4b0b-8ae3-fb194ea69186",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `get_current_location` with `{}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3m[35.7721, -78.6386]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `get_current_location` with `{}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3m[35.7721, -78.6386]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `tavily_search_results_json` with `{'query': 'weather in the location 35.7721, -78.6386'}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://www.weatherapi.com/', 'content': \"{'location': {'name': 'Raleigh', 'region': 'North Carolina', 'country': 'United States of America', 'lat': 35.77, 'lon': -78.64, 'tz_id': 'America/New_York', 'localtime_epoch': 1723484374, 'localtime': '2024-08-12 13:39'}, 'current': {'last_updated_epoch': 1723483800, 'last_updated': '2024-08-12 13:30', 'temp_c': 28.8, 'temp_f': 83.8, 'is_day': 1, 'condition': {'text': 'Patchy rain nearby', 'icon': '//cdn.weatherapi.com/weather/64x64/day/176.png', 'code': 1063}, 'wind_mph': 4.0, 'wind_kph': 6.5, 'wind_degree': 67, 'wind_dir': 'ENE', 'pressure_mb': 1017.0, 'pressure_in': 30.02, 'precip_mm': 0.04, 'precip_in': 0.0, 'humidity': 48, 'cloud': 73, 'feelslike_c': 29.7, 'feelslike_f': 85.5, 'windchill_c': 28.8, 'windchill_f': 83.8, 'heatindex_c': 29.7, 'heatindex_f': 85.5, 'dewpoint_c': 16.9, 'dewpoint_f': 62.4, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 6.0, 'gust_mph': 5.1, 'gust_kph': 8.2}}\"}]\u001b[0m\u001b[32;1m\u001b[1;3mThe current weather information of your location is as follows: the location is Raleigh, North Carolina, United States of America, with a temperature of 28.8 degrees Celsius and a humidity of 48%. The weather condition is patchy rain nearby, with a wind speed of 6.5 km/h and a wind direction of ENE. The atmospheric pressure is 1017.0 mb, and the precipitation is 0.04 mm. The feels-like temperature is 29.7 degrees Celsius, and the heat index is also 29.7 degrees Celsius. The dew point is 16.9 degrees Celsius, and the visibility is 10.0 km. The UV index is 6.0, and the gust speed is 8.2 km/h.\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'Search for the current weather information of my location?',\n",
+       " 'output': 'The current weather information of your location is as follows: the location is Raleigh, North Carolina, United States of America, with a temperature of 28.8 degrees Celsius and a humidity of 48%. The weather condition is patchy rain nearby, with a wind speed of 6.5 km/h and a wind direction of ENE. The atmospheric pressure is 1017.0 mb, and the precipitation is 0.04 mm. The feels-like temperature is 29.7 degrees Celsius, and the heat index is also 29.7 degrees Celsius. The dew point is 16.9 degrees Celsius, and the visibility is 10.0 km. The UV index is 6.0, and the gust speed is 8.2 km/h.'}"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "from langgraph.prebuilt import create_react_agent\n",
     "from langchain.globals import set_verbose\n",
     "from langchain.callbacks.tracers import ConsoleCallbackHandler\n",
     "\n",
     "set_verbose(True) # verbose output to follow function calling\n",
     "\n",
-    "query = \"What is the current weather where I am?\"\n",
-    "app = create_react_agent(llm, tools)\n",
-    "\n",
+    "query = \"Search for the current weather information of my location?\"\n",
+    "agent = create_openai_tools_agent(llm, tools, prompt)\n",
     "\n",
-    "messages = app.invoke({\"messages\": [(\"human\", query)]}, config={'callbacks': [ConsoleCallbackHandler()]})\n",
-    "{\n",
-    "    \"input\": query,\n",
-    "    \"output\": messages[\"messages\"][-1].content,\n",
-    "}"
+    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n",
+    "agent_executor.invoke({\"input\": query})"
    ]
   },
   {
@@ -296,26 +369,6 @@
     "Finally, the result is returned to the user."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "8ace95bd-f2f7-469e-9d9e-ea7b4c57e8f4",
-   "metadata": {},
-   "source": [
-    "Below, you can see a diagram of the application's graph. The agent continues to use tools until the query is resolved."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "128b55cf-5ee3-42d2-897b-173a6d696921",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.display import Image, display\n",
-    "\n",
-    "display(Image(app.get_graph(xray=True).draw_mermaid_png()))"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "42ce0ec8-d5bb-4ba8-b2d6-6fe3a0c0aeec",
@@ -344,7 +397,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,

From 45e04a08e6dffb3d8452c76a7afeb6fff28c0560 Mon Sep 17 00:00:00 2001
From: haydeniw <hwolff@nvidia.com>
Date: Mon, 12 Aug 2024 13:52:47 -0700
Subject: [PATCH 12/31] improved to work with 8b and custom langgraph

---
 cookbook/nvidia_nim_agents_llama3.1.ipynb | 391 ++++++++++++++++------
 1 file changed, 287 insertions(+), 104 deletions(-)

diff --git a/cookbook/nvidia_nim_agents_llama3.1.ipynb b/cookbook/nvidia_nim_agents_llama3.1.ipynb
index 5e354d16..011b2cf7 100644
--- a/cookbook/nvidia_nim_agents_llama3.1.ipynb
+++ b/cookbook/nvidia_nim_agents_llama3.1.ipynb
@@ -38,7 +38,9 @@
     "\n",
     "Next, the model is packaged as a NIM, meaning it's optimized to deliver best performance on NVIDIA accelerated infrastructure and easy to deploy as well as use. This microservice packaging also uses OpenAI compatible APIs, so developers can build world-class generative AI agents with ease.\n",
     "\n",
-    "Let's see how to use tools in a couple of examples."
+    "Let's see how to use tools for agentic applications with LangGraph. \n",
+    "\n",
+    "*Note: lots of the educational content is adapted from https://langchain-ai.github.io/langgraph/concepts/high_level/.*"
    ]
   },
   {
@@ -68,7 +70,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install -U langchain langgraph langchain-nvidia-ai-endpoints langchain-community langchain-openai tavily-python geocoder"
+    "!poetry add langchain langgraph langchain-nvidia-ai-endpoints langchain-community langchain-openai tavily-python geocoder"
    ]
   },
   {
@@ -81,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 1,
    "id": "aaeb35a9",
    "metadata": {},
    "outputs": [],
@@ -102,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 2,
    "id": "579881ca",
    "metadata": {},
    "outputs": [],
@@ -126,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 3,
    "id": "c8832545-d3c1-404f-afdb-6a00891f84c9",
    "metadata": {},
    "outputs": [],
@@ -139,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 4,
    "id": "e1d1511d",
    "metadata": {},
    "outputs": [],
@@ -152,90 +154,146 @@
   },
   {
    "cell_type": "markdown",
-   "id": "cd230847",
+   "id": "8f63dd76-d8c7-429e-bf7c-d2f575ef8340",
    "metadata": {},
    "source": [
-    "Create [ReAct agent](https://python.langchain.com/v0.2/docs/concepts/#react-agents), prebuilt in [LangGraph](https://langchain-ai.github.io/langgraph/#overview). "
+    "We will wrap the tools as a `ToolNode` which will be beneficial to use in LangGraph later."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "id": "da73ae35",
+   "execution_count": 5,
+   "id": "75437d15-2e38-4673-850c-3272274aa917",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langgraph.prebuilt import ToolNode\n",
+    "\n",
+    "tool_node = ToolNode(tools)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d69d35aa-09d6-4484-a230-c0fe4c2b6bcb",
    "metadata": {},
+   "source": [
+    "Let's invoke the tool manually to see the result."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a433c5c5-c69e-410c-bfbd-df9b3f9bcf3b",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/nvidia/test/lib/python3.10/site-packages/langchain/hub.py:86: DeprecationWarning: The `langchainhub sdk` is deprecated.\n",
-      "Please use the `langsmith sdk` instead:\n",
-      "  pip install langsmith\n",
-      "Use the `pull_prompt` method.\n",
-      "  res_dict = client.pull_repo(owner_repo_commit)\n"
-     ]
+     "data": {
+      "text/plain": [
+       "{'messages': [ToolMessage(content='[{\"url\": \"https://www.weatherapi.com/\", \"content\": \"{\\'location\\': {\\'name\\': \\'San Francisco\\', \\'region\\': \\'California\\', \\'country\\': \\'United States of America\\', \\'lat\\': 37.78, \\'lon\\': -122.42, \\'tz_id\\': \\'America/Los_Angeles\\', \\'localtime_epoch\\': 1723495766, \\'localtime\\': \\'2024-08-12 13:49\\'}, \\'current\\': {\\'last_updated_epoch\\': 1723495500, \\'last_updated\\': \\'2024-08-12 13:45\\', \\'temp_c\\': 16.0, \\'temp_f\\': 60.8, \\'is_day\\': 1, \\'condition\\': {\\'text\\': \\'Sunny\\', \\'icon\\': \\'//cdn.weatherapi.com/weather/64x64/day/113.png\\', \\'code\\': 1000}, \\'wind_mph\\': 12.3, \\'wind_kph\\': 19.8, \\'wind_degree\\': 250, \\'wind_dir\\': \\'WSW\\', \\'pressure_mb\\': 1015.0, \\'pressure_in\\': 29.98, \\'precip_mm\\': 0.0, \\'precip_in\\': 0.0, \\'humidity\\': 75, \\'cloud\\': 0, \\'feelslike_c\\': 15.9, \\'feelslike_f\\': 60.7, \\'windchill_c\\': 15.9, \\'windchill_f\\': 60.7, \\'heatindex_c\\': 16.0, \\'heatindex_f\\': 60.8, \\'dewpoint_c\\': 11.5, \\'dewpoint_f\\': 52.8, \\'vis_km\\': 10.0, \\'vis_miles\\': 6.0, \\'uv\\': 5.0, \\'gust_mph\\': 14.5, \\'gust_kph\\': 23.4}}\"}]', name='tavily_search_results_json', tool_call_id='tool_call_id', artifact={'query': \"What's the weather in San Francisco?\", 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'Weather in San Francisco', 'url': 'https://www.weatherapi.com/', 'content': \"{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1723495766, 'localtime': '2024-08-12 13:49'}, 'current': {'last_updated_epoch': 1723495500, 'last_updated': '2024-08-12 13:45', 'temp_c': 16.0, 'temp_f': 60.8, 'is_day': 1, 'condition': {'text': 'Sunny', 'icon': '//cdn.weatherapi.com/weather/64x64/day/113.png', 'code': 1000}, 'wind_mph': 12.3, 'wind_kph': 19.8, 'wind_degree': 250, 'wind_dir': 'WSW', 'pressure_mb': 1015.0, 'pressure_in': 29.98, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 75, 'cloud': 0, 'feelslike_c': 15.9, 'feelslike_f': 60.7, 'windchill_c': 15.9, 'windchill_f': 60.7, 'heatindex_c': 16.0, 'heatindex_f': 60.8, 'dewpoint_c': 11.5, 'dewpoint_f': 52.8, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 5.0, 'gust_mph': 14.5, 'gust_kph': 23.4}}\", 'score': 0.9999223, 'raw_content': None}], 'response_time': 2.19})]}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "from langchain import hub\n",
-    "from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
-    "from langchain.callbacks.tracers import ConsoleCallbackHandler\n",
+    "from langchain_core.messages import AIMessage\n",
+    "\n",
+    "message_with_single_tool_call = AIMessage(\n",
+    "    content=\"\",\n",
+    "    tool_calls=[\n",
+    "        {\n",
+    "            \"name\": \"tavily_search_results_json\",\n",
+    "            \"args\": {\"query\": \"What's the weather in San Francisco?\"},\n",
+    "            \"id\": \"tool_call_id\",\n",
+    "            \"type\": \"tool_call\",\n",
+    "        }\n",
+    "    ],\n",
+    ")\n",
     "\n",
-    "prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
-    "agent = create_openai_tools_agent(llm, tools, prompt)"
+    "tool_node.invoke({\"messages\": [message_with_single_tool_call]})"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "be70d7ee",
+   "id": "6ae74552-a8d9-4a05-ab55-47d6b3cfea5d",
    "metadata": {},
    "source": [
-    "Run agent; a callback is passed to provide more verbose output."
+    "Now, let's see how to use the tool with a chat model. This requires binding the tool to the LLM. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "id": "02a109cc",
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 7,
+   "id": "771400bb-3a7d-4c87-b7fe-30e2f9c92f5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_with_tools = llm.bind_tools(tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5a15de17-dc4b-467a-bbf6-7526b2adb069",
+   "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `tavily_search_results_json` with `{'query': 'langchain definition'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://python.langchain.com/v0.2/docs/introduction/', 'content': \"Introduction. LangChain is a framework for developing applications powered by large language models (LLMs).. LangChain simplifies every stage of the LLM application lifecycle: Development: Build your applications using LangChain's open-source building blocks, components, and third-party integrations.Use LangGraph to build stateful agents with first-class streaming and human-in-the-loop support.\"}]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `tavily_search_results_json` with `{'query': 'langchain'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://python.langchain.com/v0.2/docs/introduction/', 'content': \"LangChain is a framework for developing applications powered by large language models (LLMs). Learn how to use LangChain's open-source libraries, components, and integrations to build, deploy, and evaluate LLM applications and agents.\"}]\u001b[0m\u001b[32;1m\u001b[1;3mBased on the information provided, LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, deployment, and evaluation.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
+     "data": {
+      "text/plain": [
+       "[{'name': 'tavily_search_results_json',\n",
+       "  'args': {'query': 'San Francisco weather today'},\n",
+       "  'id': 'chatcmpl-tool-99f68eb818504801aec450e1cee73b6f',\n",
+       "  'type': 'tool_call'}]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm_with_tools.invoke(\"What's the weather in San Francisco?\").tool_calls"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef07259b-2baf-4407-a3b3-3e48d060adfa",
+   "metadata": {},
+   "source": [
+    "As you can see, the LLM decides that it is best to use the `tavily_search_results_json` tool and that the query is \"San Francisco Weather today\". Output is structured accordingly."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6909a295-e9d4-416f-a57d-c08a2bc5f1c5",
+   "metadata": {},
+   "source": [
+    "Let's send this as a message to the ToolNode -- more on this in the next section :) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "335a3176-2f52-4001-afe7-d6536498493c",
+   "metadata": {},
+   "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'input': 'What is langchain?',\n",
-       " 'output': 'Based on the information provided, LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, deployment, and evaluation.'}"
+       "{'messages': [ToolMessage(content='[{\"url\": \"https://www.weatherapi.com/\", \"content\": \"{\\'location\\': {\\'name\\': \\'San Francisco\\', \\'region\\': \\'California\\', \\'country\\': \\'United States of America\\', \\'lat\\': 37.78, \\'lon\\': -122.42, \\'tz_id\\': \\'America/Los_Angeles\\', \\'localtime_epoch\\': 1723495766, \\'localtime\\': \\'2024-08-12 13:49\\'}, \\'current\\': {\\'last_updated_epoch\\': 1723495500, \\'last_updated\\': \\'2024-08-12 13:45\\', \\'temp_c\\': 16.0, \\'temp_f\\': 60.8, \\'is_day\\': 1, \\'condition\\': {\\'text\\': \\'Sunny\\', \\'icon\\': \\'//cdn.weatherapi.com/weather/64x64/day/113.png\\', \\'code\\': 1000}, \\'wind_mph\\': 12.3, \\'wind_kph\\': 19.8, \\'wind_degree\\': 250, \\'wind_dir\\': \\'WSW\\', \\'pressure_mb\\': 1015.0, \\'pressure_in\\': 29.98, \\'precip_mm\\': 0.0, \\'precip_in\\': 0.0, \\'humidity\\': 75, \\'cloud\\': 0, \\'feelslike_c\\': 15.9, \\'feelslike_f\\': 60.7, \\'windchill_c\\': 15.9, \\'windchill_f\\': 60.7, \\'heatindex_c\\': 16.0, \\'heatindex_f\\': 60.8, \\'dewpoint_c\\': 11.5, \\'dewpoint_f\\': 52.8, \\'vis_km\\': 10.0, \\'vis_miles\\': 6.0, \\'uv\\': 5.0, \\'gust_mph\\': 14.5, \\'gust_kph\\': 23.4}}\"}]', name='tavily_search_results_json', tool_call_id='chatcmpl-tool-d4fbc9ca41ec4728b6df56ab2c41fa97', artifact={'query': 'San Francisco weather today', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'Weather in San Francisco', 'url': 'https://www.weatherapi.com/', 'content': \"{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1723495766, 'localtime': '2024-08-12 13:49'}, 'current': {'last_updated_epoch': 1723495500, 'last_updated': '2024-08-12 13:45', 'temp_c': 16.0, 'temp_f': 60.8, 'is_day': 1, 'condition': {'text': 'Sunny', 'icon': '//cdn.weatherapi.com/weather/64x64/day/113.png', 'code': 1000}, 'wind_mph': 12.3, 'wind_kph': 19.8, 'wind_degree': 250, 'wind_dir': 'WSW', 'pressure_mb': 1015.0, 'pressure_in': 29.98, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 75, 'cloud': 0, 'feelslike_c': 15.9, 'feelslike_f': 60.7, 'windchill_c': 15.9, 'windchill_f': 60.7, 'heatindex_c': 16.0, 'heatindex_f': 60.8, 'dewpoint_c': 11.5, 'dewpoint_f': 52.8, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 5.0, 'gust_mph': 14.5, 'gust_kph': 23.4}}\", 'score': 0.9988695, 'raw_content': None}], 'response_time': 2.36})]}"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n",
-    "agent_executor.invoke({\"input\": \"What is langchain?\"})"
+    "tool_node.invoke({\"messages\": [llm_with_tools.invoke(\"What's the weather in San Francisco?\")]})"
    ]
   },
   {
@@ -243,11 +301,13 @@
    "id": "b5e9bbb9",
    "metadata": {},
    "source": [
-    "## 🔨 Tool Usage -- Adding on a Custom Tool\n",
+    "## 🔨 Tool Usage -- Adding on a Custom Tool and Using LangGraph\n",
     "\n",
     "Let's see how to [define a custom tool](https://python.langchain.com/v0.2/docs/how_to/custom_tools/) for your NIM agent and how it handles multiple tools.  \n",
     "\n",
-    "We'll enhance the NIM with Tavily search with some custom tools to determine a user's current location (based on IP address) and return a latitude and longitude. We will use these tools to have Tavily look up the weather in the user's current location."
+    "We'll enhance the NIM with Tavily search with some custom tools to determine a user's current location (based on IP address) and return a latitude and longitude. We will use these tools to have Tavily look up the weather in the user's current location.\n",
+    "\n",
+    "In addition, we'll see how to use the `ToolNode` we declared earlier in a graph declared with LangGraph. We'll use an agent that repeatedly calls an LLM deciding which tools to call, the input to those tools, executes/produces output, and then feeds the outputs back to the LLM as observation. When no more tools are needed, the loop ends. "
    ]
   },
   {
@@ -260,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 10,
    "id": "e9d8ed5f-b6e9-495f-85ff-e431d39475c4",
    "metadata": {},
    "outputs": [],
@@ -281,92 +341,215 @@
    "id": "089e3223-50f3-4e8e-9043-24c792ca7daf",
    "metadata": {},
    "source": [
-    "Let's update the tools to use the Tavily tool delcared earlier and also add the `get_current_location` tool."
+    "Let's update the tools and the `ToolNode` to use the Tavily tool delcared earlier and also add the `get_current_location` tool."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 11,
    "id": "b71d7d05-d3ec-4005-911c-3e44df8102b4",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Declare two tools: Tavily and custom get_current_location tool.\n",
-    "tools = [TavilySearchResults(max_results=1), get_current_location]"
+    "tools = [TavilySearchResults(max_results=1), get_current_location]\n",
+    "tool_node = ToolNode(tools)\n",
+    "\n",
+    "# be sure to bind the updated tools to the LLM!\n",
+    "llm_with_tools = llm.bind_tools(tools)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cd04f130-3f9b-4a0d-a018-d954dc41ad4b",
+   "id": "5e0a6ed4-d6a8-4e0b-a094-40adf98f77d4",
    "metadata": {},
    "source": [
-    "We already declared our LLM, so we don't need to redeclare it. However, we do want to update the agent to have the updated tools."
+    "Let's create a graph! LangGraph models agent workflows as graphs and the behavior of the agent is defined by 3 key pieces:\n",
+    "1) `State`: shared data structure that represents the snapshot of the application. In this example, the state consists of messages.\n",
+    "2) `Nodes`: Python functions that encode the logic of the agents. They receive the state as input and then perform some actions and return an updated State. In this example, the nodes are an agent and tools. \n",
+    "3) `Edges`: Python functions that determine which Node to execute next based on the State.\n",
+    "\n",
+    "A `StateGraph` is the main graph class used and is parameterized to use `MessagesState` as the graph state."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "id": "64a0eead-ee86-4b0b-8ae3-fb194ea69186",
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 12,
+   "id": "9dc8754b-0734-4eec-98e3-0234d3c111f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Literal\n",
+    "\n",
+    "from langgraph.graph import StateGraph, MessagesState\n",
+    "\n",
+    "# in this graph continue until no more tools\n",
+    "def should_continue(state: MessagesState) -> Literal[\"tools\", \"__end__\"]:\n",
+    "    messages = state[\"messages\"]\n",
+    "    last_message = messages[-1]\n",
+    "    if last_message.tool_calls:\n",
+    "        return \"tools\"\n",
+    "    return \"__end__\"\n",
+    "\n",
+    "# call the model on the current messages\n",
+    "def call_model(state: MessagesState):\n",
+    "    messages = state[\"messages\"]\n",
+    "    response = llm_with_tools.invoke(messages)\n",
+    "    return {\"messages\": [response]}\n",
+    "\n",
+    "\n",
+    "workflow = StateGraph(MessagesState)\n",
+    "\n",
+    "# Define the two nodes we will cycle between\n",
+    "workflow.add_node(\"agent\", call_model)\n",
+    "workflow.add_node(\"tools\", tool_node)\n",
+    "\n",
+    "# Define edges of the graph\n",
+    "workflow.add_edge(\"__start__\", \"agent\")\n",
+    "workflow.add_conditional_edges(\n",
+    "    \"agent\",\n",
+    "    should_continue,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "workflow.add_edge(\"tools\", \"agent\")\n",
+    "\n",
+    "# check structure of graph by compiling it\n",
+    "app = workflow.compile()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f84c68ce-bb8d-4e95-94fa-fbb9c40c01e5",
+   "metadata": {},
+   "source": [
+    "Let's see a visual representation of the graph. As you can see, the agent will keep calling tools until it's finished."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0deb52d9-51a9-4d90-88e0-402d7b77e6e7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/4gHYSUNDX1BST0ZJTEUAAQEAAAHIAAAAAAQwAABtbnRyUkdCIFhZWiAH4AABAAEAAAAAAABhY3NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAA9tYAAQAAAADTLQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlkZXNjAAAA8AAAACRyWFlaAAABFAAAABRnWFlaAAABKAAAABRiWFlaAAABPAAAABR3dHB0AAABUAAAABRyVFJDAAABZAAAAChnVFJDAAABZAAAAChiVFJDAAABZAAAAChjcHJ0AAABjAAAADxtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAHMAUgBHAEJYWVogAAAAAAAAb6IAADj1AAADkFhZWiAAAAAAAABimQAAt4UAABjaWFlaIAAAAAAAACSgAAAPhAAAts9YWVogAAAAAAAA9tYAAQAAAADTLXBhcmEAAAAAAAQAAAACZmYAAPKnAAANWQAAE9AAAApbAAAAAAAAAABtbHVjAAAAAAAAAAEAAAAMZW5VUwAAACAAAAAcAEcAbwBvAGcAbABlACAASQBuAGMALgAgADIAMAAxADb/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCADbAMcDASIAAhEBAxEB/8QAHQABAAEFAQEBAAAAAAAAAAAAAAYDBAUHCAkBAv/EAFkQAAEDBAADAgcICwoKCwAAAAECAwQABQYRBxIhEzEIFBYiQVGUFRcyVVZh0dMJI0JxdIGRk5W00jU2OFJTdZKyw9QYJDdUYmNyobHBMzRDRVdkgoOE4fH/xAAaAQEBAAMBAQAAAAAAAAAAAAAAAQIDBQQH/8QAMxEBAAEDAAYIBAYDAAAAAAAAAAECAxEEEiExUaEUQVJhcZGxwRMVI9EiM1OB4fAFMkL/2gAMAwEAAhEDEQA/APVOlKUClKUClWl0ucezW9+bKUUsMp5jypKlKPcEpSOqlE6ASOpJAHU1g/J6Xk32+/OOsxVbLdnjulCEJ9HbKSduL9YB5BvQCtc6ttNETGtVOI/u5cMzJvtthOFEi4RWFjoUuvpSR+ImqPlVZfjiB7Sj6apR8Lx+I2EMWK2tIAA0iI2O7oPRVXyVsvxPA9mR9FZ/R7+RsPKqy/HED2lH008qrL8cQPaUfTTyVsvxPA9mR9FPJWy/E8D2ZH0U+j38l2HlVZfjiB7Sj6aeVVl+OIHtKPpp5K2X4ngezI+inkrZfieB7Mj6KfR7+RsPKqy/HED2lH019Rk1ncUEou0FSj6EyUE/8a+eStl+J4HsyPor4vE7G4gpVZrepJ6EGKgg/wC6n0e/kbGUSoLSFJIUkjYIOwRX2owvAoMFan7ApWOyyeb/ABIajrP+sY+AoH0kAK79KBO6yNjvLk9b8OYx4pc4ug8yDtCwe5xs+lCtHR7wQQeorGqiMa1E5jylMcGWpSlaUKUpQKUpQKUpQKUpQKUpQKUpQRe7au2cWm3L0qNBYXcnEH7p3mDbP3wNuq6+kIPeNiUVGHR4nxJYcXsIn2tTSFa6czLvNrfrIeJH+yfVUnr0Xd1ERux9881kpSledEAhceMHuWUXLHYd4cmXa3KfRIajQJLiA4ykqdbS6lsoW4kA7QlRVsa1vpUZ4U+E9jfEPhnMzC4NS7AxAK1TUPwJXZtI7dxprkcUykPKIQNhvmKSrRAPSojhwvGOeEAYOF2TLbZityudwkZNBvluKLU25yqUmZCkK9LroSezQpQIWSUoIqOYvc86w7wd7hhFnx3J7VllinuplzI1rUrtITlzUp12A4oFt93xdwqSkbOwemwKDeVq8ILAbziGQZPFv27Rj6Su6qdhyGn4aeXm2thbYdGx1HmddHW9VFM78LHFMYtNjuNrbn3yHcb3GtSpLNrm9kG3DtbzSgwQ/pPVIbJ5yfNJ1qtG3bDbxLsvH1NmxvO5MPIcQiItb2RsSpEue8yZCXEjtOZxKtup5WlBKtbKU8vWt7cfrDcU8PcHm2myzLonGshtN1k262sFyT4swsBwNNDqtSQd8o69DQbfs92j320w7lE7bxWWyl9rxhhbDnKobHM24ErQdHqlQBHcQKvKxuOXxvJbJEubUSbAbkp50x7lGXGkIGyNLbWApJ6b0R6RWSoFRjLtWu52G8o0lbcxEB49fPZkKDYT+dLKvxH11J6jGeJ8bi2e3pBLsu6xCkAb6MuiQon1DlZV1+cV6LH5kRO7r8Ovksb0npSledClKUClKUClKUClKUClKUClKUGKyKzKvERosOJYuER0SYb6wSG3QCOoBBKVJUpCgD1StQBHfVO13yNfA/b5TQjXFCSmTbnjs8vcVJ2BztnfRYGj3HRBSMzWOvOPW7IWm27hEbk9kSppw7S40ojRUhY0pB102kg1upqpmNWvd6f3+998UIHg2cJ0kEcN8WBHcRaGP2a+f4NfCf8A8NsV/RDH7NSE4MW+kfIr7HR0AR44HdD77iVKP4zunkTI+VV+/PM/VVlqW+3ykxHFJI8dqJHaYZbS0y0kIQ2gaSlIGgAPQAKqVF/ImR8qr9+eZ+qp5EyPlVfvzzP1VPh2+3ykxHFKKVz74LV6yHjHwXtOVX7KLqi5ypMtpwQ1NNt8rUlxtOgWyfgoG+vfW2vImR8qr9+eZ+qp8O32+UmI4rDIuB3DzLrzIu17wiwXe6SeXtpk23NOuucqQlPMpSSTpKQPvAVj1eDfwpWlAVw4xdQQOVINpYPKNk6Hm+sk/jrP+RMj5VX788z9VQYS8QQrJ78tJ6a7dof7w2DT4dvt8pMRxVrZacX4W46ItuhW7GrM2sqTHiNJYa7RR7koSBtSj6ANk92zX2zwpF1uwvs9gxilpTMGKv4bTaiCpax6Fq5U9PuQAO8qqpa8LtVqmiaGnZlwAIEyc+uQ6nfeEqWTyA+pOh81Z2pNVNETFvr6/sbI3FKUrQhSlKBSlKBSlKBSlKBSlKBSlKBSlKBSlKBSlKDnfwA/4MOPfhtx/XXq6IrnfwA/4MOPfhtx/XXq6IoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoOd/AD/gw49+G3H9deroiud/AD/gw49+G3H9deroigUpSgUpSgUpSgUpSgUpSgUpSgUpSgUpSgUpSgUpSgUrHX6+M2C3mS6hby1LS0zHaAK3nFdEoTvps+s6AAJJABNRtd+y1StottlbSe5K5rqiPxhob/ACVvosV3IzG7vnC4TWuG/sn/AALVlOE2ziTbI5cuNgAh3HkGyqEtZKFf+24o93odUT0TXVvu7mH+YWP2t76usfkKciyqw3Gy3Sz2GXbbhHciSY65b2nGlpKVJP2v0gmtvRa+MecGHmP9jy4KOcU+O8K9yW1CyYkpu6vuDYCpIVuM3sdx508/qIaUPTXr/XOvg6cGLp4OGCu45ZmLTcFSJbkyTPkSHEuPKVoJBAb0AlASnQ6b2enMa2n7u5h/mFj9re+rp0WvjHnBhN6VCRfcw2NwLJr8Le+rrLY/kr0+Yu33KIiBc0t9slDLpdaebBAKkLKUnoVAEEAjY7wQawq0euiNbZPhMGEgpSleZClKUClKUClKUClKUClKUClKUClKUClKUEO4gHVxw4dNKvCgQR/5KUf+IFX1WPEH908M/nhX6jLqOcXsnbw/h7dbkq9rx9xIbaZnMwvHXUurcShCG2P+0WoqCUp9agT0FdONlqjwn1lZ6kxpXKELjvxAxPEuLTN0RPn3XGrfAuFuk3+2x40lDclTiFreairLakN9mXOmiQFBQGqp3XjdlXDeRxBltZmjidbLPj8CTDltxorbDM6VKLKW3CwEhWhyOAc6fMJB2dKGvXhHWdK5ot+X8YMeavT9yj36TaEWKfJcuN/t9rjKgS22StlTKYr7nOhRCgUOJJGknmPWr/DcxzqLfOEy7xlhvEXPbY+uRGFuYYTAfTCElC2ClPMR0UkhwrB3sa7qusOhkOIdBKFJWASklJ3og6I/LWKUdcQ8e+eHNG/m2z/9fkrTfgc2K6W/hm7LmZJNu0V653RtuDIYjobYWm4SAtxKm20rJWQVEKUQCfNAGhW41f5Q8d/BJv8AY1ttzmJnun0lYTulKVykKUpQKUpQKUpQKUpQKUpQKUpQKUpQKUpQQ7iD+6eGfzwr9Rl1iuIOB23iTjD9kui5LDK3WpDUmE72b8d5pxLjTratHSkrSkjYI6dQR0rLcUnmLTjByGTJYisY+tV0cXJWEIKEtLQ4Co/BPI4vR9eh0B3UaxnivZMwx+Be7Oxd7hbJzfasSY1pkuoWnZHRSUEd4I7/AEV1LcTct0xTtxs5zPuyxnchN44At2ez5ncbbdslyLIr9aBbpXjd2bYcmFBUWyHOx5WVALUkcqQjSjtBJJqI8IeD2TyGr/jOW2ibD4a3C1qjO2S+Sbc8+uUpaftjSoDTaUICAr4R5ubkIA1W9/LON8WX79CS/qqeWcYf92X79CS/qqy6PX2ZNWeCK2HggxZ7Rd7bLzDLL/EuFuctYbu9wQ8I7K08pLYDaQVgdy1hSvnPWslH4SWeM9w/dTJnFWFMqYtwLiNOpVFMY9t5nnHkO/N5fO+bpVfH+K+P5Za27nYzcbzbXSpKJlvtkl9lZSopUAtDZBIUCD16EEVkfLON8WX79CS/qqvwK+zJqzwYXBOEkDh1e7tMtN4vBt1wefk+4ciQhcGM6852ji2U8gWklXMdFZA51aA3UhV/lDx38Em/2NURmUZRA9zL719dklj+zrAZDxKsGDZDjt7y6Q9jcCc8u0Wzx1hXO/Ie5VFSwkEtIAaCQV62VnYACSWrNqJmuMRiecYMTG9uClKVyGJSlKBSlKBSlKBSlKBSlKBSlKBSlfla0toUpSglKRsqJ0AKD9VCuLHFKHwkxyPdZVovF9clTGoEaBZIapMh15zfKNDoB0PUkegDZIBt7pxCvRz3ErTYMVev+M3eM5MmZSxLbESI0E/awnqS4paigjWvNO082lctfhVwrhcJ7RcoUW73i+O3Ge7cZM29TFSXluL0NAnoAEpSOg662dmgoQMBvb/EXJL5ecqfvGK3OCiDExJ6I2IsdOh2qnNjbilHmHXXmrIPNpOpyww3GZbZZbS002kIQ2hISlKQNAADuAqpSgVzj4eHHP3luBs9qBI7HI8i5rZb+U6W2lQ+3PD0jlQdAjuUtBro6tDeEH4HGIeEnkltvOT3zJIblvieKMRLXKYbYSOdS1L5XGVnnVzAEgjYQnp0oOTfsXfHX3HyO6cL7pICYl05rjai4r4MlKR2rQ/220hQHcOyV6VV6V159eBP4GGGZPiGG8VJF4yKPkUG7OyW48aUwmKoxpa0oSpJZKylQbAUOcb2rWt16C0CqMqIxNbS3IZbfbStLgS6gKAUlQUlWj6QQCD6CAarUoNaP4ZfcGyTOs0tN3vWWKuUIOxcNkyW0xky20aT2LiwOyCwlCSO7qpR5joJkOE50Mlx2wS7xbXsSvd2ZW6mwXV1AloKPhgJB87Q0dgbAUnYSToSqovlvDHFs6u9gut9skW43OwyhNtktxJDsZ0EHaVAg62EkpOwSlJI2BoJRStVP5Zk/COBnuT8RrvAueHQ5CZVqVZ7c745GjKUQpt5A2Fcm0AKG9gKUogdE7Fx6/wMqsVuvNrf8atlwjolRnwhSe0aWkKSrSgCNgg9QKDIUpSgUpSgUpSgUpSgUpSgxGVZdZcHsjt4yC5xrPa2lttuTJbgQ2hS1pQjaj0G1KSN/PUMuGP5JxJuec4vmdktsfh1LjIhQFw5zvj0zmTt1aynlDaeoSE9CCg/CSQakfE6x2jIsAvsK/WVOR2rxZT71qUN+Ndl9tSgdR1KkJ18+q+cMMyb4g8PbBkbVtkWdu4xEPiBLSQ7H2NFCtgdxGt6699BlcZxm14bj9vsdkhNW60wGUsRorI0ltA7gPSfvnqT1NZOlKBSlKBVGXLYgRXpUp5uNGYQpx155YShtAGypRPQAAEkmsflWV2fB8enX2/XFi1WiC2XZEuSrlQhP/Mk6AA6kkAAk1zExByfw3Z7cq5Nz8R4EsuBbEAkszsnIOwtzXVuNsAgDqrvGzooCU+AAoL8F7HFpIUlUy4lKgdgjx17qK6Kqystkt+N2mJa7VCYt1tiNpZjxYzYQ20gDQSlI6AVe0ClKUClKUHwgKBBGwehBqGXfhgxcuIuP5exfLzbHrTGchqtcOVywJjKgdJdZIIJSohQUNHzQOuhqaUoIHw7z+935me1mOMjCLk1cnYUKPIntPpuDYAUh1kpI3tJGxroQfUdTytU8X/Iny/4W+VHjvu17sOe4Hiu+z8Z7I83a6+55fX6a2tQKUpQKUpQKUpQKUr8rcQ2NrUEj/SOqDUnhDeEvj3g1Wyz3HJbLf7lAubrjCJNmitutsuJCVBDqnHEBKlgqKQNkhtf8WuK4X2TnPLlMXYcexy23a6Tr6tu2XC8pKdw3FlLDC47Kk6dG0bWHVDvGj0VXoPxIwTHOK+F3TFsjZam2q4NFtxJUOZtX3LiCfgrSdEH0EV5ocI/BQvPDHw4sUxi9N+N2WDKXeod2Sn7VJjsJU40vv6K7RLaVJJ2kn0ggm4kerNKpeNM/wAs3/SFPGmf5Zv+kKYkVah/FXixjPBjDpeS5VcUwLex5qEDznZDhHmtNI71rOu775JABIwXHHj7j3AzGmJ1wS7drxcHPFrRYreOeVcZHQBttI3obUnatdNjoSUpOueFXATIs9zCJxS41qZn5M159kxZs80CwIJ2PN6hb/dtR3ogHZISUwYnFeF2U+FLkMHOOLcByy4PEcEjH+Hzij9s/iyZ4+6UQejZ7t6IA5gvqdttDLaG20JQ2gBKUpGgAO4AV+qUClKUClKUClK/C3UN651pTvu5jqg/dWl2flxbVNet8VE6e2ytceK692KXnAklKCvlVyAnQ5tHW96PdVbxpn+Wb/pCnjTP8s3/AEhVxI86Mg+yoSBeIrcrg/FZk26QsOtzruVvNLG0kIJjAtLB6E6Pq1XXvgu8e5PhHcNnsufxheKte6DsNiOuZ40H0IQgl1K+zb6cylo1o9Wz19A4b8OjwW573hG2KbicdLkXP5QbIQPtcefsB5SyB5qVJIdJP+tPcmvRnhrhVm4W4FYsTs6m0W+0xURmzsAuEdVOK190tRUo/Oo0xIlVKpeNM/yzf9IV9EhpRADqCT3AKFMSKlKUqBSlKC1uk33NtkuXy83YMrd5fXypJ/5Vry14lar9bolyvNviXi5SmUPPSZzCXlbUASlPMPNQO4JGhoevZqc5V+9i8fgb39Q1Hsa/e5avwRr+oK6WjzNFuaqZxOWW6Fl732LfJqz+wNfs0977Fvk1Z/YGv2agvCvwirFxJGUlxqTZkWOZMQt6bDksseKsLCe2W860hCFHfMWiedA3sdCakGEcbcK4iz34VhvYlS2o/jZZfjPRlLY3rtm+1QntG9kDnRtPUdeorbF+5P8A3PmmZ4s1732LfJqz+wNfs0977Fvk1Z/YGv2awGJceMEzq/os1kyBubPdS4uOkx3mm5SW/hlh1aAh4J9JbUrp17qjWD+EPa18HsTy7Npce1zr4XG241uivvF1xK3BpplAccOko2e/XedU6Rc7c+ZmeLYZ4fYz0Ldgt0dwdUvRoyGXEH1pWgBST84IIqRYJdJF0sBMp0yJEaTIhqeOtuBp1SEqOgBzFKQToAb3rpVhZLzDyOzwrrbnvGIE1lEhh7lKedtQ2lWlAEbBHeK/XDP9xLh/O079YXWF6qblmZqnOJj3XOY2pdSlK5bEpSlAq1ul0i2W3yJ015MeIwgrccV3AD5h1J9QHUnoKuq1Bx1vLjs6zWNCtMFK50hO/hFJCWh842Vq++hNezQ9HnSr9Nrj6LCOZVxFvOWPuJZkSLPatkNxY6+zecT6C44nzgT/ABUkAb0ebW6hqrDbXFqW5AjuuK1zLdaC1K++T1NX1K+j2bVGj06lqMQx1pY/yetXxZD9nR9FPJ61fFkP2dH0VkKiF54uYlj95ctc+8IYlNKSh49i4pphStcqXXUpKGydjopQ7xWyq7FEZqqx+5meLP8Ak9aviyH7Oj6KeT1q+LIfs6PoqO3zjDiOOXOdb7hdizLgKQJaERXnBHCkJWlTikoISgpWnzyQnvG9ggXeUcTMaw5+Gzdboll+WgustMtOPrU2O9zlbSohH+kdD56x+PRGfx7t+0zPFl/J61fFkP2dH0UOO2ogj3Mh6PT/AKuj6KwXCfLpeecO7Jf5zbDUqcyXHERklLYPMoeaCSe4DvJqW1lRc16YqidkmZ4q9kuNwxdxK7NPft4SR9oSoqYUPUWj5v4wAfURW8eH2fM5nDW28hMW7RwPGIyTtJB6BxBPek6++D0PoJ0PV3Y7w5jeS2m6tq5Q1IQy91+Ew4oIcB9ethWvWgVytP0GjSrc1RH443T7SsTnZLpulKV89GLyr97F4/A3v6hqPY1+9y1fgjX9QVJMjZXIx66NNpKnFxXUpSPSSggVGsXWlzGrSpJ2lURkg+scgroWfyZ8fZepzNdMTyK8cPuNXDVrH7uxe7vd7pdrdMXEWm3zGXXUvNoEn4AUsbbKSQQd70KyGXW+9+EDlNp9xMYvmHxrVjd5hSJl9gqg8r8yMllqO0D1cCFDnKkgoHInRJNdOUpqo5hx5F7zd3gtjkfCr5jMjDJDMq8TblBMeNHSxDcjqYYdPmvBxSxotkjlGzqsNj9gVaeB+H2u/Y1nVnyvFbjMjQ7rjlrVIkQ39rPbISOYPR3UOhJPKpKuoOtbHW9KaoiPCS45NduGuOzMyiJg5O9EQqewlITyufOkEhKiNEpHcSR6KkfDP9xLh/O079YXV3Vtw1QU2GYv7ly6TlJOu8eMuDf+4/8A5WVeyxV4x7r1JZSlK5qFKUoFaQ43RVR81tUpX/RyoC2UnX3TbnMR+R0fkPqrd9RniBhyc0sJioWlmcwsPxHl70hwAjStfcqBKT8x33gV0v8AH6RTo2k0117t0/usOf6UlxnI8iRb58ZUeU1tD8V4dR6P/Uk+gjoRUNHBjAgdjDbGD/N7X7NfQpqqmImjEx4/xLBMq5yiYWzbrplFhyex5ncvdS7yX2nbPLl+58uNIXsFwNuJbQQFELCwOifTW2veXwH5GWL9Htfs1MWWUR2kNNIS22hISlCRoJA6ACtFdmb2NeIjH7+sDTj2LzWPfrjtW2UWJkFlmCCytXjITbUt6bJH2w8w5em+vTvqwxNVz4eZYzc7njt5uke7Y7bIrL8CEp9yI6whQcYcSOrfMVhWzobB2enTelKnRozFUTiYzPnMz7iAcBLbMtHCDGYc+I/AmNR1ByNJbLbjZ7RR0pJ6g9an9R2/cOsWyid47eMdtl0l8gb7eXFQ4vlHcNkb11NY73lsB+Rli/R7X7NbKKa7dMUUxExGzf8AwJnVJ+Kq4uRILfV2XKZjoGt9VOJG/wAQ2fxVjrFjNkw2E8zaLbCs0Ra+1cRFaSygq0BzHQA3oAb+atu8JcEffnsZJcWVMstJV4hHcSQslQ5S8oHu83YSPUpR9IrXpOkxotmble/q8Vp35bfpSlfM1Kicrh8nt3F2y93KxsrUVmLDDC2Qo9SUpdaXy7PXSSBsk661LKVsouVW/wDWVzhDfIC4fLO9/mIX93p5AXD5Z3v8xC/u9TKlbuk3O7yj7GUN8gLh8s73+Yhf3enkBcPlne/zEL+71MqU6Tc7vKPsZRBHD+QvzZWVXqUyfhNf4szzD0jnaZSsffSoH1EVKYcNi3RGYsVlEeMygNttNJCUoSBoAAdwqtStdd2u5sqn29DOSlKVpQpSlApSlBhckw2zZc0hF1gokLbBDbwJQ63vv5XEkKT+I9ahT3AO1qWSzfb1HQe5AWwsD7xU0T+Umtn0r2WtM0ixGrbrmIXLVnvAwflLe/yRfqKe8DB+Ut7/ACRfqK2nSt/zPS/1PT7GWrPeBg/KW9/ki/UU94GD8pb3+SL9RW06U+Z6X+p6fYy1Z7wMH5S3v8kX6ivo4AwN9ckvZH/xR/YVtKlPmel/qehlCrBwgxywyG5KmHrpLbIUh64udrykdxCNBAPzhINTWlK8V29cvVa1yqZnvMlKUrSj/9k=",
+      "text/plain": [
+       "<IPython.core.display.Image object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from IPython.display import Image, display\n",
+    "\n",
+    "try:\n",
+    "    display(Image(app.get_graph().draw_mermaid_png()))\n",
+    "except Exception:\n",
+    "    # This requires some extra dependencies and is optional\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3365e44f-cbb2-4822-a910-05d8aeaf4982",
+   "metadata": {},
+   "source": [
+    "And now let's run the graph in 2 examples! First, we'll try a query that only requires one tool call. Then we'll try a query that requires multiple tool calls."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "1d4ca572-4016-4ab6-8791-ffaa791d86ac",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
       "\n",
+      "What's the weather in San Francisco?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  tavily_search_results_json (chatcmpl-tool-a266d125e555420f976689051e8d3f5c)\n",
+      " Call ID: chatcmpl-tool-a266d125e555420f976689051e8d3f5c\n",
+      "  Args:\n",
+      "    query: San Francisco weather\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: tavily_search_results_json\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `get_current_location` with `{}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[35.7721, -78.6386]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `get_current_location` with `{}`\n",
+      "[{\"url\": \"https://www.weatherapi.com/\", \"content\": \"{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.78, 'lon': -122.42, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1723495766, 'localtime': '2024-08-12 13:49'}, 'current': {'last_updated_epoch': 1723495500, 'last_updated': '2024-08-12 13:45', 'temp_c': 16.0, 'temp_f': 60.8, 'is_day': 1, 'condition': {'text': 'Sunny', 'icon': '//cdn.weatherapi.com/weather/64x64/day/113.png', 'code': 1000}, 'wind_mph': 12.3, 'wind_kph': 19.8, 'wind_degree': 250, 'wind_dir': 'WSW', 'pressure_mb': 1015.0, 'pressure_in': 29.98, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 75, 'cloud': 0, 'feelslike_c': 15.9, 'feelslike_f': 60.7, 'windchill_c': 15.9, 'windchill_f': 60.7, 'heatindex_c': 16.0, 'heatindex_f': 60.8, 'dewpoint_c': 11.5, 'dewpoint_f': 52.8, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 5.0, 'gust_mph': 14.5, 'gust_kph': 23.4}}\"}]\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
+      "The current weather in San Francisco is sunny with a temperature of 16.0 degrees Celsius (60.8 degrees Fahrenheit) and a wind speed of 19.8 kilometers per hour (12.3 miles per hour) from the west-southwest. The humidity is 75% and the pressure is 1015 millibars.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# example with a single tool call\n",
+    "for chunk in app.stream(\n",
+    "    {\"messages\": [(\"human\", \"What's the weather in San Francisco?\")]}, stream_mode=\"values\"\n",
+    "):\n",
+    "    chunk[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "6c297224-9c76-4a3c-a085-61546239dab8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
       "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[35.7721, -78.6386]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `tavily_search_results_json` with `{'query': 'weather in the location 35.7721, -78.6386'}`\n",
+      "What's the weather where I currently am?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  get_current_location (chatcmpl-tool-3de300004d9d42179f8cac2f00a85752)\n",
+      " Call ID: chatcmpl-tool-3de300004d9d42179f8cac2f00a85752\n",
+      "  Args:\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: get_current_location\n",
       "\n",
+      "[43.7064, -79.3986]\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  tavily_search_results_json (chatcmpl-tool-9b6c5f77fe174bbf8308e0dffa57545c)\n",
+      " Call ID: chatcmpl-tool-9b6c5f77fe174bbf8308e0dffa57545c\n",
+      "  Args:\n",
+      "    query: weather [43.7064, -79.3986]\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: tavily_search_results_json\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://www.weatherapi.com/', 'content': \"{'location': {'name': 'Raleigh', 'region': 'North Carolina', 'country': 'United States of America', 'lat': 35.77, 'lon': -78.64, 'tz_id': 'America/New_York', 'localtime_epoch': 1723484374, 'localtime': '2024-08-12 13:39'}, 'current': {'last_updated_epoch': 1723483800, 'last_updated': '2024-08-12 13:30', 'temp_c': 28.8, 'temp_f': 83.8, 'is_day': 1, 'condition': {'text': 'Patchy rain nearby', 'icon': '//cdn.weatherapi.com/weather/64x64/day/176.png', 'code': 1063}, 'wind_mph': 4.0, 'wind_kph': 6.5, 'wind_degree': 67, 'wind_dir': 'ENE', 'pressure_mb': 1017.0, 'pressure_in': 30.02, 'precip_mm': 0.04, 'precip_in': 0.0, 'humidity': 48, 'cloud': 73, 'feelslike_c': 29.7, 'feelslike_f': 85.5, 'windchill_c': 28.8, 'windchill_f': 83.8, 'heatindex_c': 29.7, 'heatindex_f': 85.5, 'dewpoint_c': 16.9, 'dewpoint_f': 62.4, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 6.0, 'gust_mph': 5.1, 'gust_kph': 8.2}}\"}]\u001b[0m\u001b[32;1m\u001b[1;3mThe current weather information of your location is as follows: the location is Raleigh, North Carolina, United States of America, with a temperature of 28.8 degrees Celsius and a humidity of 48%. The weather condition is patchy rain nearby, with a wind speed of 6.5 km/h and a wind direction of ENE. The atmospheric pressure is 1017.0 mb, and the precipitation is 0.04 mm. The feels-like temperature is 29.7 degrees Celsius, and the heat index is also 29.7 degrees Celsius. The dew point is 16.9 degrees Celsius, and the visibility is 10.0 km. The UV index is 6.0, and the gust speed is 8.2 km/h.\u001b[0m\n",
+      "[{\"url\": \"https://www.weatherapi.com/\", \"content\": \"{'location': {'name': 'Toronto', 'region': 'Ontario', 'country': 'Canada', 'lat': 43.67, 'lon': -79.42, 'tz_id': 'America/Toronto', 'localtime_epoch': 1723495822, 'localtime': '2024-08-12 16:50'}, 'current': {'last_updated_epoch': 1723495500, 'last_updated': '2024-08-12 16:45', 'temp_c': 24.5, 'temp_f': 76.1, 'is_day': 1, 'condition': {'text': 'Partly Cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 11.2, 'wind_kph': 18.0, 'wind_degree': 308, 'wind_dir': 'NW', 'pressure_mb': 1015.0, 'pressure_in': 29.97, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 45, 'cloud': 37, 'feelslike_c': 25.3, 'feelslike_f': 77.6, 'windchill_c': 24.5, 'windchill_f': 76.1, 'heatindex_c': 25.3, 'heatindex_f': 77.6, 'dewpoint_c': 11.9, 'dewpoint_f': 53.5, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 6.0, 'gust_mph': 12.9, 'gust_kph': 20.7}}\"}]\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
+      "The current weather in Toronto, Ontario, Canada is Partly Cloudy with a temperature of 24.5 degrees Celsius and a wind speed of 18.0 km/h. The humidity is 45% and the UV index is 6.0.\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': 'Search for the current weather information of my location?',\n",
-       " 'output': 'The current weather information of your location is as follows: the location is Raleigh, North Carolina, United States of America, with a temperature of 28.8 degrees Celsius and a humidity of 48%. The weather condition is patchy rain nearby, with a wind speed of 6.5 km/h and a wind direction of ENE. The atmospheric pressure is 1017.0 mb, and the precipitation is 0.04 mm. The feels-like temperature is 29.7 degrees Celsius, and the heat index is also 29.7 degrees Celsius. The dew point is 16.9 degrees Celsius, and the visibility is 10.0 km. The UV index is 6.0, and the gust speed is 8.2 km/h.'}"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "from langchain.globals import set_verbose\n",
-    "from langchain.callbacks.tracers import ConsoleCallbackHandler\n",
-    "\n",
-    "set_verbose(True) # verbose output to follow function calling\n",
-    "\n",
-    "query = \"Search for the current weather information of my location?\"\n",
-    "agent = create_openai_tools_agent(llm, tools, prompt)\n",
-    "\n",
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n",
-    "agent_executor.invoke({\"input\": query})"
+    "# example with a multiple tool calls\n",
+    "for chunk in app.stream(\n",
+    "    {\"messages\": [(\"human\", \"What's the weather where I currently am?\")]}, stream_mode=\"values\"\n",
+    "):\n",
+    "    chunk[\"messages\"][-1].pretty_print()"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "147c4727",
+   "id": "cd04f130-3f9b-4a0d-a018-d954dc41ad4b",
    "metadata": {},
    "source": [
-    "In order to execute this query, first a tool to get the current location needs to be called. Then a tool to get the current weather at that location needs to be called. \n",
-    "Finally, the result is returned to the user."
+    "We already declared our LLM, so we don't need to redeclare it. However, we do want to update the agent to have the updated tools."
    ]
   },
   {
@@ -397,7 +580,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

From 43f695b3f2d48b8cd9ae9f811a526294e9020881 Mon Sep 17 00:00:00 2001
From: haydeniw <hwolff@nvidia.com>
Date: Mon, 12 Aug 2024 13:54:13 -0700
Subject: [PATCH 13/31] changed to pip install

---
 cookbook/nvidia_nim_agents_llama3.1.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cookbook/nvidia_nim_agents_llama3.1.ipynb b/cookbook/nvidia_nim_agents_llama3.1.ipynb
index 011b2cf7..dcfb24dd 100644
--- a/cookbook/nvidia_nim_agents_llama3.1.ipynb
+++ b/cookbook/nvidia_nim_agents_llama3.1.ipynb
@@ -70,7 +70,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!poetry add langchain langgraph langchain-nvidia-ai-endpoints langchain-community langchain-openai tavily-python geocoder"
+    "%pip install -U langchain langgraph langchain-nvidia-ai-endpoints langchain-community langchain-openai tavily-python geocoder"
    ]
   },
   {

From 8f4deaa380373cf23132f13971eb83094aeef7c8 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 13 Aug 2024 14:16:47 -0400
Subject: [PATCH 14/31] add mistralai/mathstral-7b-v0.1,
 rakuten/rakutenai-7b-instruct, rakuten/rakutenai-7b-chat to set of supported
 chat models

---
 .../langchain_nvidia_ai_endpoints/_statics.py     | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index e9b280e4..52e29679 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -339,6 +339,21 @@ def validate_client(cls, client: str, values: dict) -> str:
         supports_tools=True,
         supports_structured_output=True,
     ),
+    "mistralai/mathstral-7b-v0.1": Model(
+        id="mistralai/mathstral-7b-v0.1",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "rakuten/rakutenai-7b-instruct": Model(
+        id="rakuten/rakutenai-7b-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "rakuten/rakutenai-7b-chat": Model(
+        id="rakuten/rakutenai-7b-chat",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
 }
 
 QA_MODEL_TABLE = {

From fdae5abd150a46d597a832adac45f16581b4fca8 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Thu, 8 Aug 2024 15:48:51 -0400
Subject: [PATCH 15/31] add Completions API support

---
 .../docs/llms/nvidia_ai_endpoints.ipynb       | 222 +++++++++++++++++
 .../langchain_nvidia_ai_endpoints/__init__.py |  10 +-
 .../langchain_nvidia_ai_endpoints/_statics.py |  32 ++-
 .../langchain_nvidia_ai_endpoints/llm.py      | 230 ++++++++++++++++++
 .../tests/integration_tests/conftest.py       |  28 ++-
 .../tests/integration_tests/test_base_url.py  |   1 +
 .../test_completions_models.py                | 150 ++++++++++++
 .../integration_tests/test_register_model.py  |   6 +
 .../ai-endpoints/tests/unit_tests/conftest.py |   8 +-
 .../unit_tests/test_completions_models.py     | 149 ++++++++++++
 .../tests/unit_tests/test_imports.py          |   1 +
 .../tests/unit_tests/test_register_model.py   |  26 ++
 12 files changed, 848 insertions(+), 15 deletions(-)
 create mode 100644 libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
 create mode 100644 libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
 create mode 100644 libs/ai-endpoints/tests/integration_tests/test_completions_models.py
 create mode 100644 libs/ai-endpoints/tests/unit_tests/test_completions_models.py

diff --git a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
new file mode 100644
index 00000000..c301fb8e
--- /dev/null
+++ b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
@@ -0,0 +1,222 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# NVIDIA NIMs\n",
+    "\n",
+    ":::caution\n",
+    "You are currently on a page documenting the use of models as [text completion models](/docs/concepts/#llms).\n",
+    "Many popular models are [chat completion models](/docs/concepts/#chat-models).\n",
+    "\n",
+    "To use chat completion models, use [ChatNVIDA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n",
+    ":::\n",
+    "\n",
+    "The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n",
+    "NVIDIA NIM inference microservice. NIM supports models across domains like chat, completion, embedding, and re-ranking models \n",
+    "from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA \n",
+    "accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single \n",
+    "command on NVIDIA accelerated infrastructure.\n",
+    "\n",
+    "NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, \n",
+    "NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, \n",
+    "giving enterprises ownership and full control of their IP and AI application.\n",
+    "\n",
+    "NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. \n",
+    "At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with NVIDIA supported via the `NVIDIA` class.\n",
+    "\n",
+    "For more information on accessing the completion models through this api, check out the [NVIDIA](https://python.langchain.com/docs/integrations/llms/nvidia_ai_endpoints/) documentation.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%pip install -qU langchain-nvidia-ai-endpoints"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "**To get started:**\n",
+    "\n",
+    "1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n",
+    "\n",
+    "2. Click on your model of choice.\n",
+    "\n",
+    "3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n",
+    "\n",
+    "4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# del os.environ['NVIDIA_API_KEY']  ## delete key and reset\n",
+    "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
+    "    print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
+    "else:\n",
+    "    candidate_api_key = getpass(\"NVAPI Key (starts with nvapi-): \")\n",
+    "    assert candidate_api_key.startswith(\"nvapi-\"), f\"{candidate_api_key[:5]}... is not a valid key\"\n",
+    "    os.environ[\"NVIDIA_API_KEY\"] = candidate_api_key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage\n",
+    "\n",
+    "See [LLM](/docs/how_to#llms) for full functionality."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_nvidia_ai_endpoints import NVIDIA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = NVIDIA().bind(max_tokens=256)\n",
+    "llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = \"# Function that does quicksort written in Rust without comments:\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(llm.invoke(prompt))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for chunk in llm.stream(prompt):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm.batch([prompt])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await llm.ainvoke(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async for chunk in llm.astream(prompt):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await llm.abatch([prompt])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async for chunk in llm.astream_log(prompt):\n",
+    "    print(chunk)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = llm.invoke(\n",
+    "    \"X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.1) #Train a logistic regression model, predict the labels on the test set and compute the accuracy score\"\n",
+    ")\n",
+    "print(response)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "langchain-nvidia-ai-endpoints-m0-Y4aGr-py3.10",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py
index d5796e3c..bfda8d36 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py
@@ -42,6 +42,14 @@
 from langchain_nvidia_ai_endpoints._statics import Model, register_model
 from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
 from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
+from langchain_nvidia_ai_endpoints.llm import NVIDIA
 from langchain_nvidia_ai_endpoints.reranking import NVIDIARerank
 
-__all__ = ["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank", "register_model", "Model"]
+__all__ = [
+    "ChatNVIDIA",
+    "NVIDIA",
+    "NVIDIAEmbeddings",
+    "NVIDIARerank",
+    "register_model",
+    "Model",
+]
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 52e29679..a2992523 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -10,8 +10,8 @@ class Model(BaseModel):
     Model information.
 
     id: unique identifier for the model, passed as model parameter for requests
-    model_type: API type (chat, vlm, embedding, ranking, completion)
-    client: client name, e.g. ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
+    model_type: API type (chat, vlm, embedding, ranking, completions)
+    client: client name, e.g. ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank, NVIDIA
     endpoint: custom endpoint for the model
     aliases: list of aliases for the model
     supports_tools: whether the model supports tool calling
@@ -23,9 +23,11 @@ class Model(BaseModel):
     id: str
     # why do we have a model_type? because ChatNVIDIA can speak both chat and vlm.
     model_type: Optional[
-        Literal["chat", "vlm", "embedding", "ranking", "completion", "qa"]
+        Literal["chat", "vlm", "embedding", "ranking", "completions", "qa"]
+    ] = None
+    client: Optional[
+        Literal["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank", "NVIDIA"]
     ] = None
-    client: Optional[Literal["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank"]] = None
     endpoint: Optional[str] = None
     aliases: Optional[list] = None
     supports_tools: Optional[bool] = False
@@ -42,6 +44,7 @@ def validate_client(cls, client: str, values: dict) -> str:
                 "ChatNVIDIA": ("chat", "vlm", "qa"),
                 "NVIDIAEmbeddings": ("embedding",),
                 "NVIDIARerank": ("ranking",),
+                "NVIDIA": ("completions",),
             }
             model_type = values.get("model_type")
             if model_type not in supported[client]:
@@ -491,14 +494,18 @@ def validate_client(cls, client: str, values: dict) -> str:
     ),
 }
 
-# COMPLETION_MODEL_TABLE = {
-#     "mistralai/mixtral-8x22b-v0.1": Model(
-#         id="mistralai/mixtral-8x22b-v0.1",
-#         model_type="completion",
-#         client="NVIDIA",
-#         aliases=["ai-mixtral-8x22b"],
-#     ),
-# }
+COMPLETION_MODEL_TABLE = {
+    "bigcode/starcoder2-7b": Model(
+        id="bigcode/starcoder2-7b",
+        model_type="completions",
+        client="NVIDIA",
+    ),
+    "bigcode/starcoder2-15b": Model(
+        id="bigcode/starcoder2-15b",
+        model_type="completions",
+        client="NVIDIA",
+    ),
+}
 
 
 OPENAI_MODEL_TABLE = {
@@ -518,6 +525,7 @@ def validate_client(cls, client: str, values: dict) -> str:
     **VLM_MODEL_TABLE,
     **EMBEDDING_MODEL_TABLE,
     **RANKING_MODEL_TABLE,
+    **COMPLETION_MODEL_TABLE,
 }
 
 if "_INCLUDE_OPENAI" in os.environ:
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
new file mode 100644
index 00000000..94d23cd4
--- /dev/null
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+import os
+import warnings
+from typing import Any, Dict, Iterator, List, Optional
+
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models.llms import LLM
+from langchain_core.outputs import GenerationChunk
+from langchain_core.pydantic_v1 import Field, PrivateAttr, root_validator
+
+from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
+from langchain_nvidia_ai_endpoints._statics import Model
+
+
+class NVIDIA(LLM):
+    """
+    LangChain LLM that uses the Completions API with NVIDIA NIMs.
+    """
+
+    class Config:
+        validate_assignment = True
+
+    _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
+    _default_model_name: str = "bigcode/starcoder2-7b"
+    _default_base_url: str = "https://integrate.api.nvidia.com/v1"
+    base_url: str = Field(
+        description="Base url for model listing and invocation",
+    )
+    model: Optional[str] = Field(description="The model to use for completions.")
+
+    _base_url_var = "NVIDIA_BASE_URL"
+
+    _init_args: Dict[str, Any] = PrivateAttr()
+    """Stashed arguments given to the constructor that can be passed to
+    the Completions API endpoint."""
+
+    @root_validator(pre=True)
+    def _validate_base_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        values["base_url"] = (
+            values.get(cls._base_url_var.lower())
+            or values.get("base_url")
+            or os.getenv(cls._base_url_var)
+            or cls._default_base_url
+        )
+        return values
+
+    def __check_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Check kwargs, warn for unknown keys, and return a copy recognized keys.
+        """
+        completions_arguments = {
+            "frequency_penalty",
+            "max_tokens",
+            "presence_penalty",
+            "seed",
+            "stop",
+            "temperature",
+            "top_p",
+            "best_of",
+            "echo",
+            "logit_bias",
+            "logprobs",
+            "n",
+            "suffix",
+            "user",
+            "stream",
+        }
+
+        recognized_kwargs = {
+            k: v for k, v in kwargs.items() if k in completions_arguments
+        }
+        unrecognized_kwargs = set(kwargs) - completions_arguments
+        if len(unrecognized_kwargs) > 0:
+            warnings.warn(f"Unrecognized, ignored arguments: {unrecognized_kwargs}")
+
+        return recognized_kwargs
+
+    def __init__(self, **kwargs: Any):
+        """
+        Create a new NVIDIA LLM for Completions APIs.
+
+        This class provides access to a NVIDIA NIM for completions. By default, it
+        connects to a hosted NIM, but can be configured to connect to a local NIM
+        using the `base_url` parameter. An API key is required to connect to the
+        hosted NIM.
+
+        Args:
+            model (str): The model to use for reranking.
+            nvidia_api_key (str): The API key to use for connecting to the hosted NIM.
+            api_key (str): Alternative to nvidia_api_key.
+            base_url (str): The base URL of the NIM to connect to.
+
+        API Key:
+        - The recommended way to provide the API key is through the `NVIDIA_API_KEY`
+            environment variable.
+
+        Additional arguments that can be passed to the Completions API:
+        - max_tokens (int): The maximum number of tokens to generate.
+        - stop (str or List[str]): The stop sequence to use for generating completions.
+        - temperature (float): The temperature to use for generating completions.
+        - top_p (float): The top-p value to use for generating completions.
+        - frequency_penalty (float): The frequency penalty to apply to the completion.
+        - presence_penalty (float): The presence penalty to apply to the completion.
+        - seed (int): The seed to use for generating completions.
+        - best_of (int): The number of completions to generate and return the best of.
+        - echo (bool): Whether to echo the prompt in the completion.
+        - logit_bias (Dict[str, float]): The logit bias to apply to the completion.
+        - logprobs (int): The number of logprobs to return.
+        - n (int): The number of completions to generate.
+        - suffix (str): The suffix to use for generating completions.
+        - user (str): The user ID to use for generating completions.
+
+        These additional arguments can also be passed with `bind()`, e.g.
+        `NVIDIA().bind(max_tokens=512)`, or pass directly to `invoke()` or `stream()`,
+        e.g. `NVIDIA().invoke("prompt", max_tokens=512)`.
+        """
+        super().__init__(**kwargs)
+        self._client = _NVIDIAClient(
+            base_url=self.base_url,
+            model_name=self.model,
+            default_hosted_model_name=self._default_model_name,
+            api_key=kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None)),
+            infer_path="{base_url}/completions",
+            cls=self.__class__.__name__,
+        )
+        # todo: only store the model in one place
+        # the model may be updated to a newer name during initialization
+        self.model = self._client.model_name
+
+        # stash all additional args that can be passed to the Completions API,
+        # but first make sure we pull out any args that are processed elsewhere.
+        for key in [
+            "model",
+            "nvidia_base_url",
+            "base_url",
+        ]:
+            if key in kwargs:
+                del kwargs[key]
+        self._init_args = self.__check_kwargs(kwargs)
+
+    @property
+    def available_models(self) -> List[Model]:
+        """
+        Get a list of available models that work with NVIDIARerank.
+        """
+        return self._client.get_available_models(self.__class__.__name__)
+
+    @classmethod
+    def get_available_models(
+        cls,
+        **kwargs: Any,
+    ) -> List[Model]:
+        """
+        Get a list of available models that work with the Completions API.
+        """
+        return cls(**kwargs).available_models
+
+    @property
+    def _llm_type(self) -> str:
+        """
+        Get the type of language model used by this chat model.
+        Used for logging purposes only.
+        """
+        return "NVIDIA"
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        payload: Dict[str, Any] = {
+            "model": self.model,
+            "prompt": prompt,
+            **self._init_args,
+            **self.__check_kwargs(kwargs),
+        }
+        if stop:
+            payload["stop"] = stop
+
+        if payload.get("stream", False):
+            warnings.warn("stream set to true for non-streaming call, ignoring")
+            del payload["stream"]
+
+        response = self._client.get_req(payload=payload)
+        response.raise_for_status()
+
+        # todo: handle response's usage and system_fingerprint
+
+        choices = response.json()["choices"]
+        # todo: write a test for this by setting n > 1 on the request
+        #       aug 2024: n > 1 is not supported by endpoints
+        if len(choices) > 1:
+            warnings.warn(
+                f"Multiple choices in response, returning only the first: {choices}"
+            )
+
+        return choices[0]["text"]
+
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        payload: Dict[str, Any] = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": True,
+            **self._init_args,
+            **self.__check_kwargs(kwargs),
+        }
+        if stop:
+            payload["stop"] = stop
+
+        # we construct payload w/ **kwargs positioned to override stream=True,
+        # this lets us know if a user passed stream=False
+        if not payload.get("stream", True):
+            warnings.warn("stream set to false for streaming call, ignoring")
+            payload["stream"] = True
+
+        for chunk in self._client.get_req_stream(payload=payload):
+            content = chunk["content"]
+            generation = GenerationChunk(text=content)
+            if run_manager:  # todo: add tests for run_manager
+                run_manager.on_llm_new_token(content, chunk=generation)
+            yield generation
diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
index f1dd58f6..e89d2a7d 100644
--- a/libs/ai-endpoints/tests/integration_tests/conftest.py
+++ b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -3,7 +3,12 @@
 import pytest
 from langchain_core.documents import Document
 
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
+from langchain_nvidia_ai_endpoints import (
+    NVIDIA,
+    ChatNVIDIA,
+    NVIDIAEmbeddings,
+    NVIDIARerank,
+)
 from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, Model
 
 
@@ -39,6 +44,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         nargs="+",
         help="Run tests for a specific qa model or list of models",
     )
+    parser.addoption(
+        "--completions-model-id",
+        action="store",
+        nargs="+",
+        help="Run tests for a specific completions model or list of models",
+    )
     parser.addoption(
         "--embedding-model-id",
         action="store",
@@ -98,6 +109,18 @@ def get_all_known_models() -> List[Model]:
             ]
         metafunc.parametrize("tool_model", models, ids=models)
 
+    if "completions_model" in metafunc.fixturenames:
+        models = [NVIDIA._default_model_name]
+        if model_list := metafunc.config.getoption("completions_model_id"):
+            models = model_list
+        if metafunc.config.getoption("all_models"):
+            models = [
+                model.id
+                for model in NVIDIA(**mode).available_models
+                if model.model_type == "completions"
+            ]
+        metafunc.parametrize("completions_model", models, ids=models)
+
     if "structured_model" in metafunc.fixturenames:
         models = ["meta/llama-3.1-8b-instruct"]
         if model_list := metafunc.config.getoption("structured_model_id"):
@@ -163,6 +186,7 @@ def mode(request: pytest.FixtureRequest) -> dict:
         ChatNVIDIA,
         NVIDIAEmbeddings,
         NVIDIARerank,
+        NVIDIA,
     ]
 )
 def public_class(request: pytest.FixtureRequest) -> type:
@@ -180,5 +204,7 @@ def _contact_service(instance: Any) -> None:
             instance.compress_documents(
                 documents=[Document(page_content="World")], query="Hello"
             )
+        elif isinstance(instance, NVIDIA):
+            instance.invoke("Hello")
 
     return _contact_service
diff --git a/libs/ai-endpoints/tests/integration_tests/test_base_url.py b/libs/ai-endpoints/tests/integration_tests/test_base_url.py
index 3c821623..7c522c92 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_base_url.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_base_url.py
@@ -13,6 +13,7 @@ def mock_endpoints(requests_mock: Mocker) -> None:
         "/v1/embeddings",
         "/v1/chat/completions",
         "/v1/ranking",
+        "/v1/completions",
     ]:
         requests_mock.post(
             re.compile(f".*{endpoint}"),
diff --git a/libs/ai-endpoints/tests/integration_tests/test_completions_models.py b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
new file mode 100644
index 00000000..f7d1308a
--- /dev/null
+++ b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
@@ -0,0 +1,150 @@
+# https://platform.openai.com/docs/api-reference/completions/create
+# POST https://.../v1/completions
+#  model: str -- The ID of the model to use for completion.
+#  prompt: str | Array[str] -- The prompt(s) to generate completions for.
+#  best_of: Optional[int] (default: 1) -- An integer representing the number
+#                                         of completions to generate and score.
+#                                         The API will return the best completion
+#                                         of the group.
+#  echo: Optional[bool] (default: False) -- Whether to echo the prompt in addition
+#                                           to the completion.
+#  frequency_penalty: Optional[float] (default: 0.0) -- Float that penalizes new
+#                                                       tokens. Range -2.0 to 2.0.
+#  logit_bias: Optional[Dict[str, float]] -- Dict containing token to logit bias.
+#  logprobs: Optional[int] (default: None) -- Integer representing the number of
+#                                             logprobs to return. 0 means no logprobs.
+#                                             Max value is 5.
+#  max_tokens: Optional[int] (default: 16) -- Integer representing the maximum number
+#                                             of tokens to generate.
+#  n: Optional[int] (default: 1) -- Integer representing the number of completions
+#                                   to generate.
+#  presence_penalty: Optional[float] (default: 0.0) -- Float that penalizes new tokens
+#                                                      based on whether they appear in
+#                                                      the text so far. Range -2.0 to
+#                                                      2.0.
+#  seed: Optional[int] (default: None) -- Integer seed that attempts to make the
+#                                         completions deterministic.
+#  stop: Optional[str|Array[str]] -- Token at which to stop generating completions.
+#                                    Up to 4 sequences.
+#  stream: Optional[bool] (default: False) -- Whether to stream back partial progress.
+#  stream_options: Optional[Dict["include_usage": bool]] -- Dict containing stream
+#                                                           options.
+#  suffix: Optional[str] -- Suffix to add to the completion.
+#  temperature: Optional[float] (default: 1.0) -- Sampling temperature, between 0 and 2.
+#  top_p: Optional[float] (default: 1.0) -- Alternative to temperature sampling.
+#  user: Optional[str] -- User ID to associate with the request.
+#
+# Returns:
+#  id: str -- The ID of the completion.
+#  object: str -- Always "text_completion".
+#  created: int -- Unix timestamp of when the completion was created.
+#  model: str -- The ID of the model used to generate the completion.
+#  choices: List[{"finish_reason": "stop"|"length"|"content_filter",
+#                 "index": int,
+#                 "text": str,
+#                 "logprobs": Optional[{"text_offset": array,
+#                                       "token_logprobs": array,
+#                                       "tokens": array,
+#                                       "top_logprobs": array}]}] --
+#    List of completions generated by the model.
+#  usage: {"completion_tokens": int,
+#          "prompt_tokens": int,
+#          "total_tokens": int} -- Usage statistics for the model.
+#  system_fingerprint: str -- System fingerprint of the model used to generate
+#                             the completion.
+
+
+from typing import Any, Callable, Tuple
+
+import pytest
+
+from langchain_nvidia_ai_endpoints import NVIDIA
+
+
+def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
+    return llm.invoke(prompt, **kwargs), 1
+
+
+def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
+    response = ""
+    count = 0
+    for chunk in llm.stream(prompt, **kwargs):
+        response += chunk
+        count += 1
+    return response, count
+
+
+@pytest.mark.parametrize(
+    "func, count", [(invoke, 0), (stream, 1)], ids=["invoke", "stream"]
+)
+def test_basic(completions_model: str, mode: dict, func: Callable, count: int) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    response, cnt = func(llm, "Hello, my name is")
+    assert isinstance(response, str)
+    assert cnt > count, "Should have received more chunks"
+
+
+@pytest.mark.parametrize(
+    "param, value",
+    [
+        ("frequency_penalty", 0.5),
+        ("max_tokens", 32),
+        ("presence_penalty", 0.5),
+        ("seed", 1234),
+        ("stop", "Hello"),
+        ("temperature", 0.5),
+        ("top_p", 0.5),
+    ],
+)
+@pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"])
+def test_params(
+    completions_model: str, mode: dict, param: str, value: Any, func: Callable
+) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    response, _ = func(llm, "Hello, my name is", **{param: value})
+    assert isinstance(response, str)
+
+
+@pytest.mark.parametrize(
+    "param, value",
+    [
+        ("best_of", 5),
+        ("echo", True),
+        ("logit_bias", {"hello": 1.0}),
+        ("logprobs", 2),
+        ("n", 2),
+        ("suffix", "Hello"),
+        ("user", "1234"),
+    ],
+)
+@pytest.mark.parametrize("func", [invoke, stream], ids=["invoke", "stream"])
+@pytest.mark.xfail(reason="Not consistently implemented")
+def test_params_incomplete(
+    completions_model: str, mode: dict, param: str, value: Any, func: Callable
+) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    response, _ = func(llm, "Hello, my name is", **{param: value})
+    assert isinstance(response, str)
+
+
+def test_invoke_with_stream_true(completions_model: str, mode: dict) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    with pytest.warns(UserWarning) as record:
+        response = llm.invoke("Hello, my name is", stream=True)
+    assert isinstance(response, str)
+    assert len(record) == 1
+    assert "stream set to true" in str(record[0].message)
+    assert "ignoring" in str(record[0].message)
+
+
+def test_stream_with_stream_false(completions_model: str, mode: dict) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    with pytest.warns(UserWarning) as record:
+        response = next(llm.stream("Hello, my name is", stream=False))
+    assert isinstance(response, str)
+    assert len(record) == 1
+    assert "stream set to false" in str(record[0].message)
+    assert "ignoring" in str(record[0].message)
+
+
+# todo: check stream_options
diff --git a/libs/ai-endpoints/tests/integration_tests/test_register_model.py b/libs/ai-endpoints/tests/integration_tests/test_register_model.py
index 6488aee3..238f2cb5 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_register_model.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_register_model.py
@@ -4,6 +4,7 @@
 import pytest
 
 from langchain_nvidia_ai_endpoints import (
+    NVIDIA,
     ChatNVIDIA,
     Model,
     NVIDIAEmbeddings,
@@ -34,6 +35,11 @@
             "nv-rerank-qa-mistral-4b:1",
             "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0bf77f50-5c35-4488-8e7a-f49bb1974af6",
         ),
+        (
+            NVIDIA,
+            "bigcode/starcoder2-7b",
+            "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/dd7b01e7-732d-4da5-8e8d-315f79165a23",
+        ),
     ],
 )
 def test_registered_model_functional(
diff --git a/libs/ai-endpoints/tests/unit_tests/conftest.py b/libs/ai-endpoints/tests/unit_tests/conftest.py
index f0790214..4288819e 100644
--- a/libs/ai-endpoints/tests/unit_tests/conftest.py
+++ b/libs/ai-endpoints/tests/unit_tests/conftest.py
@@ -3,7 +3,12 @@
 import pytest
 import requests_mock
 
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
+from langchain_nvidia_ai_endpoints import (
+    NVIDIA,
+    ChatNVIDIA,
+    NVIDIAEmbeddings,
+    NVIDIARerank,
+)
 
 
 @pytest.fixture(
@@ -11,6 +16,7 @@
         ChatNVIDIA,
         NVIDIAEmbeddings,
         NVIDIARerank,
+        NVIDIA,
     ]
 )
 def public_class(request: pytest.FixtureRequest) -> type:
diff --git a/libs/ai-endpoints/tests/unit_tests/test_completions_models.py b/libs/ai-endpoints/tests/unit_tests/test_completions_models.py
new file mode 100644
index 00000000..34de4c7c
--- /dev/null
+++ b/libs/ai-endpoints/tests/unit_tests/test_completions_models.py
@@ -0,0 +1,149 @@
+import json
+from functools import reduce
+from operator import add
+from typing import Any, Callable, List
+
+import pytest
+import requests_mock
+
+from langchain_nvidia_ai_endpoints import NVIDIA
+
+
+def invoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> str:
+    return llm.invoke(prompt, **kwargs)
+
+
+def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> str:
+    return reduce(add, llm.stream(prompt, **kwargs))
+
+
+mock_response = {
+    "id": "ID",
+    "object": "text_completion",
+    "created": 1234567890,
+    "model": "BOGUS",
+    "choices": [
+        {
+            "index": 0,
+            "text": "COMPLETION",
+        }
+    ],
+    "usage": {"prompt_tokens": 7, "total_tokens": 207, "completion_tokens": 200},
+}
+
+
+@pytest.fixture(scope="function")
+def mock_v1_completions_invoke(
+    requests_mock: requests_mock.Mocker,
+) -> requests_mock.Mocker:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/completions",
+        json=mock_response,
+    )
+    return requests_mock
+
+
+@pytest.fixture(scope="function")
+def mock_v1_completions_stream(
+    requests_mock: requests_mock.Mocker,
+) -> requests_mock.Mocker:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/completions",
+        text="\n\n".join(
+            [
+                f"data: {json.dumps(mock_response)}",
+                "data: [DONE]",
+            ]
+        ),
+    )
+    return requests_mock
+
+
+@pytest.mark.parametrize(
+    "param, value",
+    [
+        ("frequency_penalty", [0.25, 0.5, 0.75]),
+        ("max_tokens", [2, 32, 512]),
+        ("presence_penalty", [0.25, 0.5, 0.75]),
+        ("seed", [1, 1234, 4321]),
+        ("stop", ["Hello", "There", "World"]),
+        ("temperature", [0, 0.5, 1]),
+        ("top_p", [0, 0.5, 1]),
+        ("best_of", [1, 5, 10]),
+        ("echo", [True, False, True]),
+        ("logit_bias", [{"hello": 1.0}, {"there": 1.0}, {"world": 1.0}]),
+        ("logprobs", [1, 2, 3]),
+        ("n", [1, 2, 3]),
+        ("suffix", ["Hello", "There", "World"]),
+        ("user", ["Bob", "Alice", "Eve"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "func, mock_name",
+    [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")],
+    ids=["invoke", "stream"],
+)
+def test_params(
+    param: str,
+    value: List[Any],
+    func: Callable,
+    mock_name: str,
+    request: pytest.FixtureRequest,
+) -> None:
+    """
+    This tests the following...
+     - priority order (init -> bind -> infer)
+     - param passed to init, bind, invoke / stream
+    ...for each known Completion API param.
+    """
+
+    mock = request.getfixturevalue(mock_name)
+
+    init, bind, infer = value
+
+    llm = NVIDIA(api_key="BOGUS", **{param: init})
+    func(llm, "IGNORED")
+    request_payload = mock.last_request.json()
+    assert param in request_payload
+    assert request_payload[param] == init
+
+    bound_llm = llm.bind(**{param: bind})
+    func(bound_llm, "IGNORED")
+    request_payload = mock.last_request.json()
+    assert param in request_payload
+    assert request_payload[param] == bind
+
+    func(bound_llm, "IGNORED", **{param: infer})
+    request_payload = mock.last_request.json()
+    assert param in request_payload
+    assert request_payload[param] == infer
+
+
+@pytest.mark.parametrize(
+    "func, mock_name",
+    [(invoke, "mock_v1_completions_invoke"), (stream, "mock_v1_completions_stream")],
+    ids=["invoke", "stream"],
+)
+def test_params_unknown(
+    func: Callable,
+    mock_name: str,
+    request: pytest.FixtureRequest,
+) -> None:
+    request.getfixturevalue(mock_name)
+
+    with pytest.warns(UserWarning) as record:
+        llm = NVIDIA(api_key="BOGUS", init_unknown="INIT")
+    assert len(record) == 1
+    assert "Unrecognized, ignored arguments: {'init_unknown'}" in str(record[0].message)
+
+    with pytest.warns(UserWarning) as record:
+        func(llm, "IGNORED", arg_unknown="ARG")
+    assert len(record) == 1
+    assert "Unrecognized, ignored arguments: {'arg_unknown'}" in str(record[0].message)
+
+    bound_llm = llm.bind(bind_unknown="BIND")
+
+    with pytest.warns(UserWarning) as record:
+        func(bound_llm, "IGNORED")
+    assert len(record) == 1
+    assert "Unrecognized, ignored arguments: {'bind_unknown'}" in str(record[0].message)
diff --git a/libs/ai-endpoints/tests/unit_tests/test_imports.py b/libs/ai-endpoints/tests/unit_tests/test_imports.py
index e72c2c6c..200bbea4 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_imports.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_imports.py
@@ -4,6 +4,7 @@
     "ChatNVIDIA",
     "NVIDIAEmbeddings",
     "NVIDIARerank",
+    "NVIDIA",
     "register_model",
     "Model",
 ]
diff --git a/libs/ai-endpoints/tests/unit_tests/test_register_model.py b/libs/ai-endpoints/tests/unit_tests/test_register_model.py
index d42bdee5..482d40dc 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_register_model.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_register_model.py
@@ -3,6 +3,7 @@
 import pytest
 
 from langchain_nvidia_ai_endpoints import (
+    NVIDIA,
     ChatNVIDIA,
     Model,
     NVIDIAEmbeddings,
@@ -16,12 +17,19 @@
     [
         ("chat", "NVIDIAEmbeddings"),
         ("chat", "NVIDIARerank"),
+        ("chat", "NVIDIA"),
         ("vlm", "NVIDIAEmbeddings"),
         ("vlm", "NVIDIARerank"),
+        ("vlm", "NVIDIA"),
         ("embeddings", "ChatNVIDIA"),
         ("embeddings", "NVIDIARerank"),
+        ("embeddings", "NVIDIA"),
         ("ranking", "ChatNVIDIA"),
         ("ranking", "NVIDIAEmbeddings"),
+        ("ranking", "NVIDIA"),
+        ("completions", "ChatNVIDIA"),
+        ("completions", "NVIDIAEmbeddings"),
+        ("completions", "NVIDIARerank"),
     ],
 )
 def test_mismatched_type_client(model_type: str, client: str) -> None:
@@ -53,6 +61,7 @@ def test_registered_model_usable(public_class: type) -> None:
         "ChatNVIDIA": "chat",
         "NVIDIAEmbeddings": "embedding",
         "NVIDIARerank": "ranking",
+        "NVIDIA": "completions",
     }[public_class.__name__]
     with warnings.catch_warnings():
         warnings.simplefilter("error")
@@ -112,21 +121,38 @@ def test_registered_model_is_available() -> None:
             endpoint="BOGUS",
         )
     )
+    register_model(
+        Model(
+            id="test/completions",
+            model_type="completions",
+            client="NVIDIA",
+            endpoint="BOGUS",
+        )
+    )
     chat_models = ChatNVIDIA.get_available_models(api_key="BOGUS")
     embedding_models = NVIDIAEmbeddings.get_available_models(api_key="BOGUS")
     ranking_models = NVIDIARerank.get_available_models(api_key="BOGUS")
+    completions_models = NVIDIA.get_available_models(api_key="BOGUS")
 
     assert "test/chat" in [model.id for model in chat_models]
     assert "test/chat" not in [model.id for model in embedding_models]
     assert "test/chat" not in [model.id for model in ranking_models]
+    assert "test/chat" not in [model.id for model in completions_models]
 
     assert "test/embedding" not in [model.id for model in chat_models]
     assert "test/embedding" in [model.id for model in embedding_models]
     assert "test/embedding" not in [model.id for model in ranking_models]
+    assert "test/embedding" not in [model.id for model in completions_models]
 
     assert "test/rerank" not in [model.id for model in chat_models]
     assert "test/rerank" not in [model.id for model in embedding_models]
     assert "test/rerank" in [model.id for model in ranking_models]
+    assert "test/rerank" not in [model.id for model in completions_models]
+
+    assert "test/completions" not in [model.id for model in chat_models]
+    assert "test/completions" not in [model.id for model in embedding_models]
+    assert "test/completions" not in [model.id for model in ranking_models]
+    assert "test/completions" in [model.id for model in completions_models]
 
 
 def test_registered_model_without_client_is_not_listed(public_class: type) -> None:

From e8777338219b63b08010de885b6a130d2afe3f79 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 19 Aug 2024 08:33:02 -0400
Subject: [PATCH 16/31] fix spelling of completions and NVIDIA

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
index 94d23cd4..dabe62b5 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -86,7 +86,7 @@ def __init__(self, **kwargs: Any):
         hosted NIM.
 
         Args:
-            model (str): The model to use for reranking.
+            model (str): The model to use for completions.
             nvidia_api_key (str): The API key to use for connecting to the hosted NIM.
             api_key (str): Alternative to nvidia_api_key.
             base_url (str): The base URL of the NIM to connect to.
@@ -142,7 +142,7 @@ def __init__(self, **kwargs: Any):
     @property
     def available_models(self) -> List[Model]:
         """
-        Get a list of available models that work with NVIDIARerank.
+        Get a list of available models that work with NVIDIA.
         """
         return self._client.get_available_models(self.__class__.__name__)
 

From baf73fae0e561449c5977295646fedc427740554 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 19 Aug 2024 12:30:37 -0400
Subject: [PATCH 17/31] add baichuan-inc/baichuan2-13b-chat and
 thudm/chatglm3-6b to set of supported chat models

---
 .../langchain_nvidia_ai_endpoints/_statics.py          | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 52e29679..173ddae1 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -354,6 +354,16 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="chat",
         client="ChatNVIDIA",
     ),
+    "baichuan-inc/baichuan2-13b-chat": Model(
+        id="baichuan-inc/baichuan2-13b-chat",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "thudm/chatglm3-6b": Model(
+        id="thudm/chatglm3-6b",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
 }
 
 QA_MODEL_TABLE = {

From 386a42b805048703d194ce27b50bd8858bb7113f Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 19 Aug 2024 14:05:29 -0400
Subject: [PATCH 18/31] trim param docs to include only known functional params

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
index dabe62b5..de58338b 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -103,13 +103,6 @@ def __init__(self, **kwargs: Any):
         - frequency_penalty (float): The frequency penalty to apply to the completion.
         - presence_penalty (float): The presence penalty to apply to the completion.
         - seed (int): The seed to use for generating completions.
-        - best_of (int): The number of completions to generate and return the best of.
-        - echo (bool): Whether to echo the prompt in the completion.
-        - logit_bias (Dict[str, float]): The logit bias to apply to the completion.
-        - logprobs (int): The number of logprobs to return.
-        - n (int): The number of completions to generate.
-        - suffix (str): The suffix to use for generating completions.
-        - user (str): The user ID to use for generating completions.
 
         These additional arguments can also be passed with `bind()`, e.g.
         `NVIDIA().bind(max_tokens=512)`, or pass directly to `invoke()` or `stream()`,

From 214c08e1b7fe7b02bcc561a37411d44157c9dc03 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 20 Aug 2024 16:19:22 -0400
Subject: [PATCH 19/31] update default models, embedding:
 nvidia/nv-embedqa-e5-v5, reranking: nvidia/nv-rerankqa-mistral-4b-v3

---
 .../langchain_nvidia_ai_endpoints/embeddings.py          | 2 +-
 .../langchain_nvidia_ai_endpoints/reranking.py           | 2 +-
 .../ai-endpoints/tests/integration_tests/test_ranking.py | 9 ++-------
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
index cd32b4ed..f8940f84 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -34,7 +34,7 @@ class Config:
         validate_assignment = True
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
-    _default_model_name: str = "NV-Embed-QA"
+    _default_model_name: str = "nvidia/nv-embedqa-e5-v5"
     _default_max_batch_size: int = 50
     _default_base_url: str = "https://integrate.api.nvidia.com/v1"
     base_url: str = Field(
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
index 62a425b0..2c2047fa 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -28,7 +28,7 @@ class Config:
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
 
     _default_batch_size: int = 32
-    _default_model_name: str = "nv-rerank-qa-mistral-4b:1"
+    _default_model_name: str = "nvidia/nv-rerankqa-mistral-4b-v3"
     _default_base_url: str = "https://integrate.api.nvidia.com/v1"
     base_url: str = Field(
         description="Base url for model listing an invocation",
diff --git a/libs/ai-endpoints/tests/integration_tests/test_ranking.py b/libs/ai-endpoints/tests/integration_tests/test_ranking.py
index 867aab64..06f9444b 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_ranking.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_ranking.py
@@ -196,14 +196,9 @@ def test_truncate_positive(rerank_model: str, mode: dict, truncate: str) -> None
 
 
 @pytest.mark.parametrize("truncate", [None, "NONE"])
-@pytest.mark.xfail(
-    reason=(
-        "truncation is inconsistent across models, "
-        "nv-rerank-qa-mistral-4b:1 truncates by default "
-        "while others do not"
-    )
-)
 def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None:
+    if rerank_model == "nv-rerank-qa-mistral-4b:1":
+        pytest.skip("nv-rerank-qa-mistral-4b:1 truncates by default")
     query = "What is acceleration?"
     documents = [
         Document(page_content="NVIDIA " * length)

From bb2fa1dd2bc7059228a3b3be107711ebdeb4ebf5 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Thu, 22 Aug 2024 12:05:48 -0400
Subject: [PATCH 20/31] all multiple models to pytest --vlm-model-id

---
 libs/ai-endpoints/tests/integration_tests/conftest.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
index f1dd58f6..372d27e1 100644
--- a/libs/ai-endpoints/tests/integration_tests/conftest.py
+++ b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -54,7 +54,8 @@ def pytest_addoption(parser: pytest.Parser) -> None:
     parser.addoption(
         "--vlm-model-id",
         action="store",
-        help="Run tests for a specific vlm model",
+        nargs="+",
+        help="Run tests for a specific vlm model or list of models",
     )
     parser.addoption(
         "--all-models",
@@ -120,8 +121,8 @@ def get_all_known_models() -> List[Model]:
 
     if "vlm_model" in metafunc.fixturenames:
         models = ["nvidia/neva-22b"]
-        if model := metafunc.config.getoption("vlm_model_id"):
-            models = [model]
+        if model_list := metafunc.config.getoption("vlm_model_id"):
+            models = model_list
         if metafunc.config.getoption("all_models"):
             models = [
                 model.id

From 8fab970891c9ee659d839effdd542d082d22c524 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Thu, 22 Aug 2024 12:06:59 -0400
Subject: [PATCH 21/31] add microsoft/phi-3.5-mini-instruct,
 microsoft/phi-3.5-moe-instruct, nvidia/nemotron-mini-4b-instruct,
 ai21labs/jamba-1.5-large-instruct, ai21labs/jamba-1.5-mini-instruct to set of
 supported chat models

---
 .../langchain_nvidia_ai_endpoints/_statics.py | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 173ddae1..fa125c67 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -364,6 +364,31 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="chat",
         client="ChatNVIDIA",
     ),
+    "microsoft/phi-3.5-mini-instruct": Model(
+        id="microsoft/phi-3.5-mini-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "microsoft/phi-3.5-moe-instruct": Model(
+        id="microsoft/phi-3.5-moe-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "nvidia/nemotron-mini-4b-instruct": Model(
+        id="nvidia/nemotron-mini-4b-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "ai21labs/jamba-1.5-large-instruct": Model(
+        id="ai21labs/jamba-1.5-large-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "ai21labs/jamba-1.5-mini-instruct": Model(
+        id="ai21labs/jamba-1.5-mini-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
 }
 
 QA_MODEL_TABLE = {

From 603a63695f40fc83abccb24d0bf3e0d612434cc5 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Fri, 23 Aug 2024 07:13:37 -0400
Subject: [PATCH 22/31] add yentinglin/llama-3-taiwan-70b-instruct,
 tokyotech-llm/llama-3-swallow-70b-instruct-v0.1 to set of supported chat
 models

---
 .../langchain_nvidia_ai_endpoints/_statics.py          | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index fa125c67..ee17b468 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -389,6 +389,16 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="chat",
         client="ChatNVIDIA",
     ),
+    "yentinglin/llama-3-taiwan-70b-instruct": Model(
+        id="yentinglin/llama-3-taiwan-70b-instruct",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
+    "tokyotech-llm/llama-3-swallow-70b-instruct-v0.1": Model(
+        id="tokyotech-llm/llama-3-swallow-70b-instruct-v0.1",
+        model_type="chat",
+        client="ChatNVIDIA",
+    ),
 }
 
 QA_MODEL_TABLE = {

From 8ea0de755df3e2ab8946b5e1cb4638c96b0e6eda Mon Sep 17 00:00:00 2001
From: raspawar <raspawar@nvidia.com>
Date: Mon, 26 Aug 2024 19:21:54 +0530
Subject: [PATCH 23/31] raise a warning for known endpoints

---
 .../langchain_nvidia_ai_endpoints/_common.py  | 23 ++++---------------
 .../tests/unit_tests/test_base_url.py         | 14 +++--------
 2 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
index 5393e160..687f2e13 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -17,7 +17,7 @@
     Tuple,
     Union,
 )
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse
 
 import requests
 from langchain_core.pydantic_v1 import (
@@ -124,7 +124,7 @@ def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 
         ## Making sure /v1 in added to the url, followed by infer_path
         if "base_url" in values:
-            base_url = values["base_url"]
+            base_url = values["base_url"].strip("/")
             parsed = urlparse(base_url)
             expected_format = "Expected format is: http://host:port"
 
@@ -133,24 +133,9 @@ def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
                     f"Invalid base_url format. {expected_format} Got: {base_url}"
                 )
 
-            if parsed.path:
-                normalized_path = parsed.path.strip("/")
-                if normalized_path == "v1":
-                    pass
-                elif normalized_path in [
-                    "v1/embeddings",
-                    "v1/completions",
-                    "v1/rankings",
-                ]:
-                    warnings.warn(f"Using {base_url}, ignoring the rest")
-                else:
-                    raise ValueError(
-                        f"Base URL path is not recognized. {expected_format}"
-                    )
+            if base_url.endswith(("/embeddings", "/completions", "/rankings")):
+                warnings.warn(f"Using {base_url}, ignoring the rest")
 
-            base_url = urlunparse(
-                (parsed.scheme, parsed.netloc, "v1", None, None, None)
-            )
             values["base_url"] = base_url
             values["infer_path"] = values["infer_path"].format(base_url=base_url)
 
diff --git a/libs/ai-endpoints/tests/unit_tests/test_base_url.py b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
index b5c3a8ef..e8e393b2 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_base_url.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
@@ -94,6 +94,7 @@ def test_param_base_url_hosted(public_class: type, base_url: str) -> None:
         "https://localhost",
         "http://localhost:8888",
         "http://0.0.0.0:8888/v1",
+        "http://0.0.0.0:8888/v1/",
     ],
 )
 def test_param_base_url_not_hosted(public_class: type, base_url: str) -> None:
@@ -107,18 +108,9 @@ def test_param_base_url_not_hosted(public_class: type, base_url: str) -> None:
     [
         "http://localhost:8888/embeddings",
         "http://0.0.0.0:8888/rankings",
+        "http://localhost:8888/embeddings/",
+        "http://0.0.0.0:8888/rankings/",
         "http://localhost:8888/chat/completions",
-    ],
-)
-def test_expect_error(public_class: type, base_url: str) -> None:
-    with pytest.raises(ValueError) as e:
-        public_class(model="model1", base_url=base_url)
-    assert "Expected format is" in str(e.value)
-
-
-@pytest.mark.parametrize(
-    "base_url",
-    [
         "http://localhost:8080/v1/embeddings",
         "http://0.0.0.0:8888/v1/rankings",
     ],

From d315b112065909550a96f4b065752759f8652bfa Mon Sep 17 00:00:00 2001
From: raspawar <raspawar@nvidia.com>
Date: Tue, 27 Aug 2024 11:35:55 +0530
Subject: [PATCH 24/31] add reranker test case and url

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py | 4 +++-
 libs/ai-endpoints/tests/unit_tests/test_base_url.py        | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
index 687f2e13..2bde648a 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -133,7 +133,9 @@ def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
                     f"Invalid base_url format. {expected_format} Got: {base_url}"
                 )
 
-            if base_url.endswith(("/embeddings", "/completions", "/rankings")):
+            if base_url.endswith(
+                ("/embeddings", "/completions", "/rankings", "/reranking")
+            ):
                 warnings.warn(f"Using {base_url}, ignoring the rest")
 
             values["base_url"] = base_url
diff --git a/libs/ai-endpoints/tests/unit_tests/test_base_url.py b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
index e8e393b2..0baaca6c 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_base_url.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
@@ -95,6 +95,7 @@ def test_param_base_url_hosted(public_class: type, base_url: str) -> None:
         "http://localhost:8888",
         "http://0.0.0.0:8888/v1",
         "http://0.0.0.0:8888/v1/",
+        "http://blah/some/other/path/v1",
     ],
 )
 def test_param_base_url_not_hosted(public_class: type, base_url: str) -> None:

From 6ff6bb7dc4a97d2e45cdd7b3ede316c85b474fed Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 27 Aug 2024 07:21:19 -0400
Subject: [PATCH 25/31] fix ChatNVIDA -> ChatNVIDIA

---
 libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
index c301fb8e..681df1b3 100644
--- a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
+++ b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
@@ -10,7 +10,7 @@
     "You are currently on a page documenting the use of models as [text completion models](/docs/concepts/#llms).\n",
     "Many popular models are [chat completion models](/docs/concepts/#chat-models).\n",
     "\n",
-    "To use chat completion models, use [ChatNVIDA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n",
+    "To use chat completion models, use [ChatNVIDIA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n",
     ":::\n",
     "\n",
     "The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n",

From e3b290e73591929cd466fc94c3e64928274890d0 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 27 Aug 2024 07:27:53 -0400
Subject: [PATCH 26/31] add Completions example to README

---
 libs/ai-endpoints/README.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/libs/ai-endpoints/README.md b/libs/ai-endpoints/README.md
index 7171c9a0..68f06985 100644
--- a/libs/ai-endpoints/README.md
+++ b/libs/ai-endpoints/README.md
@@ -225,6 +225,29 @@ llm.invoke(
 )
 ```
 
+## Completions
+
+You can also work with models that support the Completions API. These models accept a `prompt` instead of `messages`.
+
+```python
+completions_llm = NVIDIA().bind(max_tokens=512)
+[model.id for model in completions_llm.get_available_models()]
+
+# [
+#   ...
+#   'bigcode/starcoder2-7b',
+#   'bigcode/starcoder2-15b',
+#   ...
+# ]
+```
+
+```python
+prompt = "# Function that does quicksort written in Rust without comments:"
+for chunk in completions_llm.stream(prompt):
+    print(chunk, end="", flush=True)
+```
+
+
 ## Embeddings
 
 You can also connect to embeddings models through this package. Below is an example:

From ac5d18a7cbb9f50796c2986dd2862f282aff3355 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 27 Aug 2024 17:00:43 -0400
Subject: [PATCH 27/31] set default model to
 nvidia/mistral-nemo-minitron-8b-base

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py | 5 +++++
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py      | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index a2992523..21affcd7 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -505,6 +505,11 @@ def validate_client(cls, client: str, values: dict) -> str:
         model_type="completions",
         client="NVIDIA",
     ),
+    "nvidia/mistral-nemo-minitron-8b-base": Model(
+        id="nvidia/mistral-nemo-minitron-8b-base",
+        model_type="completions",
+        client="NVIDIA",
+    ),
 }
 
 
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
index de58338b..0dac0956 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -22,7 +22,7 @@ class Config:
         validate_assignment = True
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
-    _default_model_name: str = "bigcode/starcoder2-7b"
+    _default_model_name: str = "nvidia/mistral-nemo-minitron-8b-base"
     _default_base_url: str = "https://integrate.api.nvidia.com/v1"
     base_url: str = Field(
         description="Base url for model listing and invocation",

From 4c351cd1519fae2b07a9775c72857b2cba3c7c6b Mon Sep 17 00:00:00 2001
From: Daniel Glogowski <dglogowski@nvidia.com>
Date: Tue, 27 Aug 2024 19:45:40 -0700
Subject: [PATCH 28/31] updated llm nb

---
 .../docs/llms/nvidia_ai_endpoints.ipynb       | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
index 681df1b3..a4a41f76 100644
--- a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
+++ b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
@@ -128,6 +128,15 @@
     "print(llm.invoke(prompt))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Stream, Batch, and Async\n",
+    "\n",
+    "These models natively support streaming, and as is the case with all LangChain LLMs they expose a batch method to handle concurrent requests, as well as async methods for invoke, stream, and batch. Below are a few examples."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -196,6 +205,25 @@
     ")\n",
     "print(response)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Supported models\n",
+    "\n",
+    "Querying `available_models` will still give you all of the other models offered by your API credentials."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NVIDIA.get_available_models()\n",
+    "# llm.get_available_models()"
+   ]
   }
  ],
  "metadata": {

From a785670a0ce1681b5d122854eeb59356bece3969 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 28 Aug 2024 06:40:59 -0400
Subject: [PATCH 29/31] add _identifying_params

---
 libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py | 10 ++++++++++
 .../tests/unit_tests/test_completions_models.py        |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
index 0dac0956..12f364a5 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -157,6 +157,16 @@ def _llm_type(self) -> str:
         """
         return "NVIDIA"
 
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """
+        Get parameters used to help identify the LLM.
+        """
+        return {
+            "model": self.model,
+            "base_url": self.base_url,
+        }
+
     def _call(
         self,
         prompt: str,
diff --git a/libs/ai-endpoints/tests/unit_tests/test_completions_models.py b/libs/ai-endpoints/tests/unit_tests/test_completions_models.py
index 34de4c7c..24239bb7 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_completions_models.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_completions_models.py
@@ -147,3 +147,8 @@ def test_params_unknown(
         func(bound_llm, "IGNORED")
     assert len(record) == 1
     assert "Unrecognized, ignored arguments: {'bind_unknown'}" in str(record[0].message)
+
+
+def test_identifying_params() -> None:
+    llm = NVIDIA(api_key="BOGUS")
+    assert set(llm._identifying_params.keys()) == {"model", "base_url"}

From a42e389d6c679faff352b0241d1232e8bca7e4a0 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 28 Aug 2024 06:53:51 -0400
Subject: [PATCH 30/31] add ainvoke / astream basic tests

---
 .../test_completions_models.py                | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/libs/ai-endpoints/tests/integration_tests/test_completions_models.py b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
index f7d1308a..3fb4e724 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
@@ -74,6 +74,19 @@ def stream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
     return response, count
 
 
+async def ainvoke(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
+    return await llm.ainvoke(prompt, **kwargs), 1
+
+
+async def astream(llm: NVIDIA, prompt: str, **kwargs: Any) -> Tuple[str, int]:
+    response = ""
+    count = 0
+    async for chunk in llm.astream(prompt, **kwargs):
+        response += chunk
+        count += 1
+    return response, count
+
+
 @pytest.mark.parametrize(
     "func, count", [(invoke, 0), (stream, 1)], ids=["invoke", "stream"]
 )
@@ -84,6 +97,16 @@ def test_basic(completions_model: str, mode: dict, func: Callable, count: int) -
     assert cnt > count, "Should have received more chunks"
 
 
+@pytest.mark.parametrize(
+    "func, count", [(ainvoke, 0), (astream, 1)], ids=["ainvoke", "astream"]
+)
+async def test_abasic(completions_model: str, mode: dict, func: Callable, count: int) -> None:
+    llm = NVIDIA(model=completions_model, **mode)
+    response, cnt = await func(llm, "Hello, my name is")
+    assert isinstance(response, str)
+    assert cnt > count, "Should have received more chunks"
+
+
 @pytest.mark.parametrize(
     "param, value",
     [

From f21f394308f5615479fc9fa383e1918dd9a41811 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 28 Aug 2024 06:57:17 -0400
Subject: [PATCH 31/31] fix lint

---
 .../tests/integration_tests/test_completions_models.py        | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/ai-endpoints/tests/integration_tests/test_completions_models.py b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
index 3fb4e724..74a7927e 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_completions_models.py
@@ -100,7 +100,9 @@ def test_basic(completions_model: str, mode: dict, func: Callable, count: int) -
 @pytest.mark.parametrize(
     "func, count", [(ainvoke, 0), (astream, 1)], ids=["ainvoke", "astream"]
 )
-async def test_abasic(completions_model: str, mode: dict, func: Callable, count: int) -> None:
+async def test_abasic(
+    completions_model: str, mode: dict, func: Callable, count: int
+) -> None:
     llm = NVIDIA(model=completions_model, **mode)
     response, cnt = await func(llm, "Hello, my name is")
     assert isinstance(response, str)