nits and minor changes

langchain-ai · May 28, 2024 · 540d24c · 540d24c
1 parent aecdf0f
commit 540d24c
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 22 deletions.
diff --git a/libs/ai-endpoints/README.md b/libs/ai-endpoints/README.md
@@ -50,14 +50,14 @@ When ready to deploy, you can self-host models with NVIDIA NIM—which is includ
 ```python
 from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
 
-# connect to an chat NIM running at localhost:8000
-llm = ChatNVIDIA(base_url="http://localhost:8000/v1")
+# connect to an chat NIM running at localhost:8000, specifying a specific model
+llm = ChatNVIDIA(base_url="http://localhost:8000/v1", model="meta-llama3-8b-instruct")
 
-# connect to an embedding NIM running at localhost:2016
-embedder = NVIDIAEmbeddings(base_url="http://localhost:2016/v1")
+# connect to an embedding NIM running at localhost:8080
+embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1")
 
-# connect to a reranking NIM running at localhost:1976
-ranker = NVIDIARerank(base_url="http://localhost:1976/v1")
+# connect to a reranking NIM running at localhost:2016
+ranker = NVIDIARerank(base_url="http://localhost:2016/v1")
 ```
 
 ## Stream, Batch, and Async

diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -22,7 +22,7 @@
     "NIMs are packaged as container images on a per model basis and are distributed as NGC container images through the NVIDIA NGC Catalog. \n",
     "At their core, NIMs provide easy, consistent, and familiar APIs for running inference on an AI model.\n",
     "\n",
-    "This example goes over how to use LangChain to interact with the supported [NVIDIA Retrieval QA Embedding Model](https://build.nvidia.com/nvidia/embed-qa-4) for [retrieval-augmented generation](https://developer.nvidia.com/blog/build-enterprise-retrieval-augmented-generation-apps-with-nvidia-retrieval-qa-embedding-model/) via the `NVIDIAEmbeddings` class.\n",
+    "This example goes over how to use LangChain to interact with NVIDIA supported via the `ChatNVIDIA` class.\n",
     "\n",
     "For more information on accessing the chat models through this api, check out the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) documentation."
    ]
@@ -133,8 +133,8 @@
    "source": [
     "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
     "\n",
-    "# connect to an embedding NIM running at localhost:8000\n",
-    "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\")"
+    "# connect to an embedding NIM running at localhost:8000, specifying a specific model\n",
+    "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta-llama3-8b-instruct\")"
    ]
   },
   {

diff --git a/libs/ai-endpoints/docs/providers/nvidia.mdx b/libs/ai-endpoints/docs/providers/nvidia.mdx
@@ -61,14 +61,14 @@ When ready to deploy, you can self-host models with NVIDIA NIM—which is includ
 ```python
 from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
 
-# connect to an chat NIM running at localhost:8000
-llm = ChatNVIDIA(base_url="http://localhost:8000/v1")
+# connect to an chat NIM running at localhost:8000, specifyig a specific model
+llm = ChatNVIDIA(base_url="http://localhost:8000/v1", model="meta-llama3-8b-instruct")
 
-# connect to an embedding NIM running at localhost:2016
-embedder = NVIDIAEmbeddings(base_url="http://localhost:2016/v1")
+# connect to an embedding NIM running at localhost:8080
+embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1")
 
-# connect to a reranking NIM running at localhost:1976
-ranker = NVIDIARerank(base_url="http://localhost:1976/v1")
+# connect to a reranking NIM running at localhost:2016
+ranker = NVIDIARerank(base_url="http://localhost:2016/v1")
 ```
 
 ## Using NVIDIA AI Foundation Endpoints

diff --git a/libs/ai-endpoints/docs/retrievers/nvidia_rerank.ipynb b/libs/ai-endpoints/docs/retrievers/nvidia_rerank.ipynb
@@ -107,11 +107,11 @@
    "source": [
     "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, NVIDIARerank\n",
     "\n",
-    "# connect to an embedding NIM running at localhost:2016\n",
-    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:2016/v1\")\n",
+    "# connect to an embedding NIM running at localhost:8080\n",
+    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:8080/v1\")\n",
     "\n",
-    "# connect to a reranking NIM running at localhost:1976\n",
-    "ranker = NVIDIARerank(base_url=\"http://localhost:1976/v1\")"
+    "# connect to a reranking NIM running at localhost:2016\n",
+    "reranker = NVIDIARerank(base_url=\"http://localhost:2016/v1\")"
    ]
   },
   {

diff --git a/libs/ai-endpoints/docs/text_embedding/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/text_embedding/nvidia_ai_endpoints.ipynb
@@ -149,8 +149,8 @@
    "source": [
     "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n",
     "\n",
-    "# connect to an embedding NIM running at localhost:2016\n",
-    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:2016/v1\")"
+    "# connect to an embedding NIM running at localhost:8080\n",
+    "embedder = NVIDIAEmbeddings(base_url=\"http://localhost:8080/v1\")"
    ]
   },
   {
@@ -437,7 +437,7 @@
    "source": [
     "vectorstore = FAISS.from_texts(\n",
     "    [\"harrison worked at kensho\"],\n",
-    "    embedding=NVIDIAEmbeddings(model=\"ai-embed-qa-4\"),\n",
+    "    embedding=NVIDIAEmbeddings(model=\"NV-Embed-QA\"),\n",
     ")\n",
     "retriever = vectorstore.as_retriever()\n",
     "\n",