From 4c876f0e02abc492e334bfc1170070194577fdb1 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Fri, 24 May 2024 07:40:53 -0400
Subject: [PATCH] nemotron_qa_8b is no longer available

---
 libs/ai-endpoints/README.md                   | 28 --------------
 .../docs/chat/nvidia_ai_endpoints.ipynb       | 37 -------------------
 .../langchain_nvidia_ai_endpoints/_statics.py |  1 -
 .../tests/integration_tests/conftest.py       |  2 +-
 4 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/libs/ai-endpoints/README.md b/libs/ai-endpoints/README.md
index d1b2649e..de2500c0 100644
--- a/libs/ai-endpoints/README.md
+++ b/libs/ai-endpoints/README.md
@@ -258,34 +258,6 @@ llm.invoke(
 )
 ```
 
-## RAG: Context models
-
-NVIDIA also has Q&A models that support a special "context" chat message containing retrieved context (such as documents within a RAG chain). This is useful to avoid prompt-injecting the model.
-
-**Note:** Only "user" (human) and "context" chat messages are supported for these models, not system or AI messages useful in conversational flows.
-
-The `_qa_` models like `nemotron_qa_8b` support this.
-
-```python
-from langchain_nvidia_ai_endpoints import ChatNVIDIA
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.messages import ChatMessage
-prompt = ChatPromptTemplate.from_messages(
-    [
-        ChatMessage(role="context", content="Parrots and Cats have signed the peace accord."),
-        ("user", "{input}")
-    ]
-)
-llm = ChatNVIDIA(model="nemotron_qa_8b")
-chain = (
-    prompt
-    | llm
-    | StrOutputParser()
-)
-chain.invoke({"input": "What was signed?"})
-```
-
 ## Embeddings
 
 You can also connect to embeddings models through this package. Below is an example:
diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
index ee817725..ce0e706b 100644
--- a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
+++ b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -567,43 +567,6 @@
     "For more advanced or custom use-cases (i.e. supporting the diffusion models), you may be interested in leveraging the `NVEModel` client as a requests backbone. The `NVIDIAEmbeddings` class is a good source of inspiration for this. "
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "1cd6249a-7ffa-4886-b7e8-5778dc93499e",
-   "metadata": {},
-   "source": [
-    "## RAG: Context models\n",
-    "\n",
-    "NVIDIA also has Q&A models that support a special \"context\" chat message containing retrieved context (such as documents within a RAG chain). This is useful to avoid prompt-injecting the model. The `_qa_` models like `nemotron_qa_8b` support this.\n",
-    "\n",
-    "**Note:** Only \"user\" (human) and \"context\" chat messages are supported for these models; System or AI messages that would useful in conversational flows are not supported."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f994b4d3-c1b0-4e87-aad0-a7b487e2aa43",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.messages import ChatMessage\n",
-    "from langchain_core.output_parsers import StrOutputParser\n",
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        ChatMessage(\n",
-    "            role=\"context\", content=\"Parrots and Cats have signed the peace accord.\"\n",
-    "        ),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
-    "llm = ChatNVIDIA(model=\"nemotron_qa_8b\")\n",
-    "chain = prompt | llm | StrOutputParser()\n",
-    "chain.invoke({\"input\": \"What was signed?\"})"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "137662a6",
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 55eada05..afc6434c 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -24,7 +24,6 @@ class Model(BaseModel):
         "api_type": "aifm",
         "alternative": "meta/llama2-70b",
     },
-    "playground_nemotron_qa_8b": {"model_type": "qa", "api_type": "aifm"},
     "playground_gemma_7b": {
         "model_type": "chat",
         "api_type": "aifm",
diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
index 649d1032..98dde45a 100644
--- a/libs/ai-endpoints/tests/integration_tests/conftest.py
+++ b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -87,7 +87,7 @@ def get_all_models() -> List[Model]:
         metafunc.parametrize("image_in_model", models, ids=models)
 
     if "qa_model" in metafunc.fixturenames:
-        models = ["nemotron_qa_8b"]
+        models = []
         if metafunc.config.getoption("all_models"):
             models = [
                 model.id for model in get_all_models() if model.model_type == "qa"