Merge branch 'main' into erick/ai-endpoints-patch-standard-tests

langchain-ai · Aug 28, 2024 · b3982b3 · b3982b3
2 parents 952acc5 + 9f9b762
commit b3982b3
Show file tree

Hide file tree

Showing 27 changed files with 1,877 additions and 393 deletions.
diff --git a/.github/workflows/_scheduled_test.yml b/.github/workflows/_scheduled_test.yml
@@ -4,7 +4,7 @@ run-name: langchain-nvidia Scheduled tests
 on:
   workflow_dispatch:
   schedule:
-    - cron:  '0 13 * * *'
+    - cron:  '0 8 * * *'
 
 env:
   POETRY_VERSION: "1.7.1"

diff --git a/cookbook/nvidia_nim_agents_llama3.1.ipynb b/cookbook/nvidia_nim_agents_llama3.1.ipynb
diff --git a/libs/ai-endpoints/Makefile b/libs/ai-endpoints/Makefile
@@ -7,16 +7,16 @@ all: help
 TEST_FILE ?= tests/unit_tests/
 
 test:
-	poetry run pytest $(TEST_FILE)
+	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
 
 tests:
-	poetry run pytest $(TEST_FILE)
+	poetry run pytest $(PYTEST_ARGS) $(TEST_FILE)
 
 check_imports: $(shell find langchain_nvidia_ai_endpoints -name '*.py')
 	poetry run python ./scripts/check_imports.py $^
 
 integration_tests:
-	poetry run pytest tests/integration_tests
+	poetry run pytest tests/integration_tests $(PYTEST_ARGS)
 
 
 ######################

diff --git a/libs/ai-endpoints/README.md b/libs/ai-endpoints/README.md
@@ -225,6 +225,29 @@ llm.invoke(
 )
 ```
 
+## Completions
+
+You can also work with models that support the Completions API. These models accept a `prompt` instead of `messages`.
+
+```python
+completions_llm = NVIDIA().bind(max_tokens=512)
+[model.id for model in completions_llm.get_available_models()]
+
+# [
+#   ...
+#   'bigcode/starcoder2-7b',
+#   'bigcode/starcoder2-15b',
+#   ...
+# ]
+```
+
+```python
+prompt = "# Function that does quicksort written in Rust without comments:"
+for chunk in completions_llm.stream(prompt):
+    print(chunk, end="", flush=True)
+```
+
+
 ## Embeddings
 
 You can also connect to embeddings models through this package. Below is an example:

diff --git a/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/llms/nvidia_ai_endpoints.ipynb
@@ -0,0 +1,250 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# NVIDIA NIMs\n",
+    "\n",
+    ":::caution\n",
+    "You are currently on a page documenting the use of models as [text completion models](/docs/concepts/#llms).\n",
+    "Many popular models are [chat completion models](/docs/concepts/#chat-models).\n",
+    "\n",
+    "To use chat completion models, use [ChatNVIDIA](/docs/integrations/chat/nvidia_ai_endpoints/) instead.\n",
+    ":::\n",
+    "\n",
+    "The `langchain-nvidia-ai-endpoints` package contains LangChain integrations building applications with models on \n",
+    "NVIDIA NIM inference microservice. NIM supports models across domains like chat, completion, embedding, and re-ranking models \n",
+    "from the community as well as NVIDIA. These models are optimized by NVIDIA to deliver the best performance on NVIDIA \n",
+    "accelerated infrastructure and deployed as a NIM, an easy-to-use, prebuilt containers that deploy anywhere using a single \n",
+    "command on NVIDIA accelerated infrastructure.\n",
+    "\n",
+    "NVIDIA hosted deployments of NIMs are available to test on the [NVIDIA API catalog](https://build.nvidia.com/). After testing, \n",
+    "NIMs can be exported from NVIDIA’s API catalog using the NVIDIA AI Enterprise license and run on-premises or in the cloud, \n",
+    "giving enterprises ownership and full control of their IP and AI application.\n",
+    "\n",
+    "NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. \n",
+    "At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with NVIDIA supported via the `NVIDIA` class.\n",
+    "\n",
+    "For more information on accessing the completion models through this api, check out the [NVIDIA](https://python.langchain.com/docs/integrations/llms/nvidia_ai_endpoints/) documentation.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%pip install -qU langchain-nvidia-ai-endpoints"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "**To get started:**\n",
+    "\n",
+    "1. Create a free account with [NVIDIA](https://build.nvidia.com/), which hosts NVIDIA AI Foundation models.\n",
+    "\n",
+    "2. Click on your model of choice.\n",
+    "\n",
+    "3. Under `Input` select the `Python` tab, and click `Get API Key`. Then click `Generate Key`.\n",
+    "\n",
+    "4. Copy and save the generated key as `NVIDIA_API_KEY`. From there, you should have access to the endpoints."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# del os.environ['NVIDIA_API_KEY']  ## delete key and reset\n",
+    "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
+    "    print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
+    "else:\n",
+    "    candidate_api_key = getpass(\"NVAPI Key (starts with nvapi-): \")\n",
+    "    assert candidate_api_key.startswith(\"nvapi-\"), f\"{candidate_api_key[:5]}... is not a valid key\"\n",
+    "    os.environ[\"NVIDIA_API_KEY\"] = candidate_api_key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage\n",
+    "\n",
+    "See [LLM](/docs/how_to#llms) for full functionality."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_nvidia_ai_endpoints import NVIDIA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = NVIDIA().bind(max_tokens=256)\n",
+    "llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = \"# Function that does quicksort written in Rust without comments:\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(llm.invoke(prompt))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Stream, Batch, and Async\n",
+    "\n",
+    "These models natively support streaming, and as is the case with all LangChain LLMs they expose a batch method to handle concurrent requests, as well as async methods for invoke, stream, and batch. Below are a few examples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for chunk in llm.stream(prompt):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm.batch([prompt])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await llm.ainvoke(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async for chunk in llm.astream(prompt):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await llm.abatch([prompt])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async for chunk in llm.astream_log(prompt):\n",
+    "    print(chunk)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = llm.invoke(\n",
+    "    \"X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=0.1) #Train a logistic regression model, predict the labels on the test set and compute the accuracy score\"\n",
+    ")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Supported models\n",
+    "\n",
+    "Querying `available_models` will still give you all of the other models offered by your API credentials."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NVIDIA.get_available_models()\n",
+    "# llm.get_available_models()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "langchain-nvidia-ai-endpoints-m0-Y4aGr-py3.10",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/__init__.py
@@ -42,6 +42,14 @@
 from langchain_nvidia_ai_endpoints._statics import Model, register_model
 from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
 from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
+from langchain_nvidia_ai_endpoints.llm import NVIDIA
 from langchain_nvidia_ai_endpoints.reranking import NVIDIARerank
 
-__all__ = ["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank", "register_model", "Model"]
+__all__ = [
+    "ChatNVIDIA",
+    "NVIDIA",
+    "NVIDIAEmbeddings",
+    "NVIDIARerank",
+    "register_model",
+    "Model",
+]
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -17,7 +17,7 @@
     Tuple,
     Union,
 )
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse
 
 import requests
 from langchain_core.pydantic_v1 import (
@@ -124,7 +124,7 @@ def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 
         ## Making sure /v1 in added to the url, followed by infer_path
         if "base_url" in values:
-            base_url = values["base_url"]
+            base_url = values["base_url"].strip("/")
             parsed = urlparse(base_url)
             expected_format = "Expected format is: http://host:port"
 
@@ -133,24 +133,11 @@ def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
                     f"Invalid base_url format. {expected_format} Got: {base_url}"
                 )
 
-            if parsed.path:
-                normalized_path = parsed.path.strip("/")
-                if normalized_path == "v1":
-                    pass
-                elif normalized_path in [
-                    "v1/embeddings",
-                    "v1/completions",
-                    "v1/rankings",
-                ]:
-                    warnings.warn(f"Using {base_url}, ignoring the rest")
-                else:
-                    raise ValueError(
-                        f"Base URL path is not recognized. {expected_format}"
-                    )
+            if base_url.endswith(
+                ("/embeddings", "/completions", "/rankings", "/reranking")
+            ):
+                warnings.warn(f"Using {base_url}, ignoring the rest")
 
-            base_url = urlunparse(
-                (parsed.scheme, parsed.netloc, "v1", None, None, None)
-            )
             values["base_url"] = base_url
             values["infer_path"] = values["infer_path"].format(base_url=base_url)