langchain-ai · mattf · Sep 19, 2024 · Aug 31, 2024 · Sep 6, 2024 · Sep 9, 2024
diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -270,6 +270,80 @@
    "outputs": [],
    "source": ["from langchain_core.messages import HumanMessage\n\nllm.invoke(\n    [\n        HumanMessage(\n            content=[\n                {\"type\": \"text\", \"text\": \"Describe this image:\"},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n            ]\n        )\n    ]\n)"]
   },
+  {
+   "cell_type": "markdown",
+   "id": "25e8db7c",
+   "metadata": {},
+   "source": [
+    "#### Passing an image as an NVCF asset\n",
+    "\n",
+    "If your image is sufficiently large or you will pass it multiple times in a chat conversation, you may upload it once and reference it in your chat conversation.\n",
+    "\n",
+    "See https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/assets.html for details about how upload the image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "091f7fce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "\n",
+    "content_type = \"image/jpg\"\n",
+    "description = \"example-image-from-lc-nv-ai-e-notebook\"\n",
+    "\n",
+    "create_response = requests.post(\n",
+    "    \"https://api.nvcf.nvidia.com/v2/nvcf/assets\",\n",
+    "    headers={\n",
+    "        \"Authorization\": f\"Bearer {os.environ['NVIDIA_API_KEY']}\",\n",
+    "        \"accept\": \"application/json\",\n",
+    "        \"Content-Type\": \"application/json\",\n",
+    "    },\n",
+    "    json={\n",
+    "        \"contentType\": content_type,\n",
+    "        \"description\": description\n",
+    "    }\n",
+    ")\n",
+    "create_response.raise_for_status()\n",
+    "\n",
+    "upload_response = requests.put(\n",
+    "    create_response.json()[\"uploadUrl\"],\n",
+    "    headers={\n",
+    "        \"Content-Type\": content_type,\n",
+    "        \"x-amz-meta-nvcf-asset-description\": description,\n",
+    "    },\n",
+    "    data=image_content,\n",
+    ")\n",
+    "upload_response.raise_for_status()\n",
+    "\n",
+    "asset_id = create_response.json()[\"assetId\"]\n",
+    "asset_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c24be59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm.invoke(\n",
+    "    [\n",
+    "        HumanMessage(\n",
+    "            content=[\n",
+    "                {\"type\": \"text\", \"text\": \"Describe this image\"},\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\"url\": f\"data:{content_type};asset_id,{asset_id}\"},\n",
+    "                },\n",
+    "            ]\n",
+    "        )\n",
+    "    ]\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0573dd1f-9a17-4c99-ab2a-8d930b89d283",

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -331,11 +331,15 @@ def _post(
         self,
         invoke_url: str,
         payload: Optional[dict] = {},
+        extra_headers: dict = {},
     ) -> Tuple[Response, requests.Session]:
         """Method for posting to the AI Foundation Model Function API."""
         self.last_inputs = {
             "url": invoke_url,
-            "headers": self.headers_tmpl["call"],
+            "headers": {
+                **self.headers_tmpl["call"],
+                **extra_headers,
+            },
             "json": payload,
         }
         session = self.get_session_fn()
@@ -443,9 +447,12 @@ def _try_raise(self, response: Response) -> None:
     def get_req(
         self,
         payload: dict = {},
+        extra_headers: dict = {},
     ) -> Response:
         """Post to the API."""
-        response, session = self._post(self.infer_url, payload)
+        response, session = self._post(
+            self.infer_url, payload, extra_headers=extra_headers
+        )
         return self._wait(response, session)
 
     def postprocess(
@@ -519,10 +526,14 @@ def _aggregate_msgs(self, msg_list: Sequence[dict]) -> Tuple[dict, bool]:
     def get_req_stream(
         self,
         payload: dict,
+        extra_headers: dict = {},
     ) -> Iterator[Dict]:
         self.last_inputs = {
             "url": self.infer_url,
-            "headers": self.headers_tmpl["stream"],
+            "headers": {
+                **self.headers_tmpl["stream"],
+                **extra_headers,
+            },
             "json": payload,
         }
 

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -23,7 +23,7 @@ class Model(BaseModel):
     id: str
     # why do we have a model_type? because ChatNVIDIA can speak both chat and vlm.
     model_type: Optional[
-        Literal["chat", "vlm", "embedding", "ranking", "completions", "qa"]
+        Literal["chat", "vlm", "nv-vlm", "embedding", "ranking", "completions", "qa"]
     ] = None
     client: Optional[
         Literal["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank", "NVIDIA"]
@@ -41,7 +41,7 @@ def __hash__(self) -> int:
     def validate_client(self) -> "Model":
         if self.client:
             supported = {
-                "ChatNVIDIA": ("chat", "vlm", "qa"),
+                "ChatNVIDIA": ("chat", "vlm", "nv-vlm", "qa"),
                 "NVIDIAEmbeddings": ("embedding",),
                 "NVIDIARerank": ("ranking",),
                 "NVIDIA": ("completions",),
@@ -427,63 +427,56 @@ def validate_client(self) -> "Model":
 VLM_MODEL_TABLE = {
     "adept/fuyu-8b": Model(
         id="adept/fuyu-8b",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/adept/fuyu-8b",
         aliases=["ai-fuyu-8b", "playground_fuyu_8b", "fuyu_8b"],
     ),
     "google/deplot": Model(
         id="google/deplot",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/google/deplot",
         aliases=["ai-google-deplot", "playground_deplot", "deplot"],
     ),
     "microsoft/kosmos-2": Model(
         id="microsoft/kosmos-2",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/microsoft/kosmos-2",
         aliases=["ai-microsoft-kosmos-2", "playground_kosmos_2", "kosmos_2"],
     ),
     "nvidia/neva-22b": Model(
         id="nvidia/neva-22b",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/nvidia/neva-22b",
         aliases=["ai-neva-22b", "playground_neva_22b", "neva_22b"],
     ),
     "google/paligemma": Model(
         id="google/paligemma",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/google/paligemma",
         aliases=["ai-google-paligemma"],
     ),
     "microsoft/phi-3-vision-128k-instruct": Model(
         id="microsoft/phi-3-vision-128k-instruct",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/microsoft/phi-3-vision-128k-instruct",
         aliases=["ai-phi-3-vision-128k-instruct"],
     ),
-    "liuhaotian/llava-v1.6-mistral-7b": Model(
-        id="liuhaotian/llava-v1.6-mistral-7b",
+    "microsoft/phi-3.5-vision-instruct": Model(
+        id="microsoft/phi-3.5-vision-instruct",
         model_type="vlm",
         client="ChatNVIDIA",
-        endpoint="https://ai.api.nvidia.com/v1/stg/vlm/community/llava16-mistral-7b",
-        aliases=[
-            "ai-llava16-mistral-7b",
-            "community/llava16-mistral-7b",
-            "liuhaotian/llava16-mistral-7b",
-        ],
     ),
-    "liuhaotian/llava-v1.6-34b": Model(
-        id="liuhaotian/llava-v1.6-34b",
+    "nvidia/vila": Model(
+        id="nvidia/vila",
         model_type="vlm",
         client="ChatNVIDIA",
-        endpoint="https://ai.api.nvidia.com/v1/stg/vlm/community/llava16-34b",
-        aliases=["ai-llava16-34b", "community/llava16-34b", "liuhaotian/llava16-34b"],
+        endpoint="https://ai.api.nvidia.com/v1/vlm/nvidia/vila",
     ),
 }