Skip to content

Commit

Permalink
Fix qwen vl image input bug
Browse files Browse the repository at this point in the history
  • Loading branch information
brian.li committed Nov 2, 2024
1 parent 1d4a6c9 commit 967bbb4
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions lmms_eval/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,12 +234,13 @@ def _collate(x):

texts = [self.processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in messages]
image_inputs, video_inputs = process_vision_info(messages)
total_frames = video_inputs[0].shape[0]
indices = np.linspace(0, total_frames - 1, self.max_num_frames, dtype=int)
# Append the last frame index if not already included
if total_frames - 1 not in indices:
indices = np.append(indices, total_frames - 1)
video_inputs[0] = video_inputs[0][indices]
if video_inputs is not None:
total_frames = video_inputs[0].shape[0]
indices = np.linspace(0, total_frames - 1, self.max_num_frames, dtype=int)
# Append the last frame index if not already included
if total_frames - 1 not in indices:
indices = np.append(indices, total_frames - 1)
video_inputs[0] = video_inputs[0][indices]
inputs = self.processor(text=texts, images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt")

if self.device_map == "auto":
Expand Down

0 comments on commit 967bbb4

Please sign in to comment.