From 7207e0bb15a7cedfce8580e0cb3f67b189873ed5 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 20 Nov 2024 21:55:41 -0800
Subject: [PATCH] replace llm test case

fix ci issues
---
 .../multimodal-rag-llamaindex.ipynb           | 141 +++++++++---------
 1 file changed, 68 insertions(+), 73 deletions(-)

diff --git a/notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb b/notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb
index eccdc78be06..fbf881aada2 100644
--- a/notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb
+++ b/notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb
@@ -56,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "ad6c48df",
    "metadata": {},
    "outputs": [
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "2c61cb01-9c46-46e3-bf22-20c4ca0da417",
    "metadata": {},
    "outputs": [],
@@ -135,10 +135,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "b4d0e724",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "config.json: 100%|█████████████████████████| 1.26k/1.26k [00:00<00:00, 11.1MB/s]\n",
+      "model.safetensors: 100%|███████████████████| 1.51G/1.51G [00:35<00:00, 42.5MB/s]\n",
+      "generation_config.json: 100%|██████████████| 4.25k/4.25k [00:00<00:00, 11.3MB/s]\n",
+      "tokenizer_config.json: 100%|█████████████████| 283k/283k [00:00<00:00, 1.89MB/s]\n",
+      "vocab.json: 100%|██████████████████████████| 1.04M/1.04M [00:00<00:00, 22.5MB/s]\n",
+      "tokenizer.json: 100%|██████████████████████| 2.48M/2.48M [00:00<00:00, 6.70MB/s]\n",
+      "merges.txt: 100%|████████████████████████████| 494k/494k [00:00<00:00, 2.60MB/s]\n",
+      "normalizer.json: 100%|█████████████████████| 52.7k/52.7k [00:00<00:00, 13.1MB/s]\n",
+      "added_tokens.json: 100%|███████████████████| 34.6k/34.6k [00:00<00:00, 8.16MB/s]\n",
+      "special_tokens_map.json: 100%|█████████████| 2.07k/2.07k [00:00<00:00, 7.74MB/s]\n",
+      "preprocessor_config.json: 100%|████████████████| 340/340 [00:00<00:00, 2.10MB/s]\n",
+      "Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.\n",
+      "/home2/ethan/intel/openvino_notebooks/openvino_venv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py:1071: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if input_features.shape[-1] != expected_seq_length:\n",
+      "/home2/ethan/intel/openvino_notebooks/openvino_venv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py:388: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim):\n",
+      "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n",
+      "/home2/ethan/intel/openvino_notebooks/openvino_venv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
+      "  if sequence_length != 1:\n"
+     ]
+    }
+   ],
    "source": [
     "asr_model_id = \"distil-whisper/distil-large-v3\"\n",
     "asr_model_path = asr_model_id.split(\"/\")[-1]\n",
@@ -163,16 +189,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "d2ea678c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "open_clip_pytorch_model.bin: 100%|████████████| 605M/605M [00:05<00:00, 103MB/s]\n",
+      "open_clip_config.json: 100%|███████████████████| 604/604 [00:00<00:00, 4.71MB/s]\n",
+      "tokenizer_config.json: 100%|███████████████████| 904/904 [00:00<00:00, 3.29MB/s]\n",
+      "vocab.json: 100%|████████████████████████████| 862k/862k [00:00<00:00, 3.28MB/s]\n",
+      "merges.txt: 100%|████████████████████████████| 525k/525k [00:00<00:00, 2.37MB/s]\n",
+      "tokenizer.json: 100%|██████████████████████| 2.22M/2.22M [00:00<00:00, 23.0MB/s]\n",
+      "special_tokens_map.json: 100%|█████████████████| 389/389 [00:00<00:00, 1.48MB/s]\n",
+      "preprocessor_config.json: 100%|████████████████| 316/316 [00:00<00:00, 1.05MB/s]\n",
+      "config.json: 100%|█████████████████████████| 4.36k/4.36k [00:00<00:00, 16.0MB/s]\n"
+     ]
+    }
+   ],
    "source": [
     "clip_model_id = \"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\"\n",
     "clip_model_path = clip_model_id.split(\"/\")[-1]\n",
     "\n",
     "if not Path(clip_model_path).exists():\n",
-    "    !optimum-cli export openvino -m {clip_model_id} {clip_model_path}"
+    "    !optimum-cli export openvino --model {clip_model_id} {clip_model_path}"
    ]
   },
   {
@@ -191,9 +233,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 6,
    "id": "eedcf36c",
-   "metadata": {},
+   "metadata": {
+    "test_replace": {
+     "microsoft/Phi-3.5-vision-instruct": "qnguyen3/nanoLLaVA"
+    }
+   },
    "outputs": [],
    "source": [
     "vlm_model_id = \"microsoft/Phi-3.5-vision-instruct\"\n",
@@ -205,71 +251,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 7,
    "id": "f4e131e3-0ab4-4e9e-ab0e-e68e7793cba5",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "939fa2c6f0a54623a74691926f6e5458",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Output()"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:nncf:Statistics of the bitwidth distribution:\n",
-      "┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑\n",
-      "│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │\n",
-      "┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥\n",
-      "│ int8_asym                 │ 42% (54 / 129)              │ 40% (53 / 128)                         │\n",
-      "├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤\n",
-      "│ int4_sym                  │ 58% (75 / 129)              │ 60% (75 / 128)                         │\n",
-      "┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙\n"
+      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
      ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "837ecab693704b029a9d35561a2b0237",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Output()"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -320,7 +311,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 8,
    "id": "093464db-893e-4813-a6cc-19473a1a890c",
    "metadata": {},
    "outputs": [],
@@ -351,14 +342,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 9,
    "id": "b6636cd0",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "09ba16da2e5d488594675f073745d7d7",
+       "model_id": "90ac5e539754491394e19cf6410985aa",
        "version_major": 2,
        "version_minor": 0
       },
@@ -366,7 +357,7 @@
        "Dropdown(description='Device:', index=2, options=('CPU', 'GPU', 'AUTO'), value='AUTO')"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -391,7 +382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 10,
    "id": "534c83b8-a8f4-499f-bfad-6799fdbabe8c",
    "metadata": {},
    "outputs": [],
@@ -407,7 +398,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 11,
    "id": "d300f17f-bf8d-4cc2-a61a-86fbb2529b3d",
    "metadata": {},
    "outputs": [],
@@ -859,7 +850,11 @@
    "cell_type": "code",
    "execution_count": 51,
    "id": "d5866d53-7c5c-48da-b3e2-6f173509e9fd",
-   "metadata": {},
+   "metadata": {
+    "test_replace": {
+     "Phi-3.5-vision-instruct/INT4": "nanoLLaVA/INT4"
+    }
+   },
    "outputs": [],
    "source": [
     "from transformers import AutoProcessor, AutoTokenizer\n",