Fix qwen vl image input bug

EvolvingLMMs-Lab · Nov 2, 2024 · 967bbb4 · 967bbb4
1 parent 1d4a6c9
commit 967bbb4
Showing 1 changed file with 7 additions and 6 deletions.
diff --git a/lmms_eval/models/qwen2_vl.py b/lmms_eval/models/qwen2_vl.py
@@ -234,12 +234,13 @@ def _collate(x):
 
             texts = [self.processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in messages]
             image_inputs, video_inputs = process_vision_info(messages)
-            total_frames = video_inputs[0].shape[0]
-            indices = np.linspace(0, total_frames - 1, self.max_num_frames, dtype=int)
-            # Append the last frame index if not already included
-            if total_frames - 1 not in indices:
-                indices = np.append(indices, total_frames - 1)
-            video_inputs[0] = video_inputs[0][indices]
+            if video_inputs is not None:
+                total_frames = video_inputs[0].shape[0]
+                indices = np.linspace(0, total_frames - 1, self.max_num_frames, dtype=int)
+                # Append the last frame index if not already included
+                if total_frames - 1 not in indices:
+                    indices = np.append(indices, total_frames - 1)
+                video_inputs[0] = video_inputs[0][indices]
             inputs = self.processor(text=texts, images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")
 
             if self.device_map == "auto":