Skip to content

Commit

Permalink
Fix neptune doc to visual logic
Browse files Browse the repository at this point in the history
  • Loading branch information
kcz358 committed Nov 23, 2024
1 parent 42faaee commit 4f18c85
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 28 deletions.
17 changes: 3 additions & 14 deletions lmms_eval/models/llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from lmms_eval.api.instance import Instance
from lmms_eval.api.model import lmms
from lmms_eval.api.registry import register_model
from lmms_eval.models.model_utils.load_video import read_video_pyav
from lmms_eval.models.model_utils.load_video import load_video_decord, read_video_pyav

# Suppress warnings
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -367,17 +367,6 @@ def flatten(self, input):
new_list.append(j)
return new_list

def load_video(self, video_path, max_frames_num):
if type(video_path) == str:
vr = VideoReader(video_path, ctx=cpu(0))
else:
vr = VideoReader(video_path[0], ctx=cpu(0))
total_frame_num = len(vr)
uniform_sampled_frames = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int)
frame_idx = uniform_sampled_frames.tolist()
spare_frames = vr.get_batch(frame_idx).asnumpy()
return spare_frames # (frames, height, width, channels)

def generate_until(self, requests: List[Instance]) -> List[str]:
res = []

Expand Down Expand Up @@ -461,7 +450,7 @@ def _collate(x):
image_tensor = []
try:
if self.video_decode_backend == "decord":
frames = self.load_video(visual, self.max_frames_num)
frames = self.load_video_decord(visual, self.max_frames_num)
elif self.video_decode_backend == "pyav":
frames = read_video_pyav(visual[0], num_frm=self.max_frames_num)
frames = self._image_processor.preprocess(frames, return_tensors="pt")["pixel_values"].half().cuda()
Expand Down Expand Up @@ -672,7 +661,7 @@ def _collate(x):
image_tensor = []
try:
if self.video_decode_backend == "decord":
frames = self.load_video(visual, self.max_frames_num)
frames = load_video_decord(visual, self.max_frames_num)
elif self.video_decode_backend == "pyav":
frames = read_video_pyav(visual[0], num_frm=self.max_frames_num)
frames = self._image_processor.preprocess(frames, return_tensors="pt")["pixel_values"].half().cuda()
Expand Down
4 changes: 2 additions & 2 deletions lmms_eval/models/model_utils/load_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

def load_video_decord(video_path, max_frames_num):
if type(video_path) == str:
vr = VideoReader(video_path, ctx=cpu(0))
vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
else:
vr = VideoReader(video_path[0], ctx=cpu(0))
vr = VideoReader(video_path[0], ctx=cpu(0), num_threads=1)
total_frame_num = len(vr)
uniform_sampled_frames = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int)
frame_idx = uniform_sampled_frames.tolist()
Expand Down
3 changes: 2 additions & 1 deletion lmms_eval/tasks/neptune/neptune_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ dataset_path: lmms-lab/GoogleDeepMind-NEPTUNE
dataset_name: full
dataset_kwargs:
token: True
cache_dir: ./
cache_dir: neptune
video: True
task: "neptune_full"
test_split: test
output_type: generate_until
Expand Down
22 changes: 11 additions & 11 deletions lmms_eval/tasks/neptune/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import os
import sys
from glob import glob
from pathlib import Path

import yaml
import sys
from loguru import logger

from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
Expand All @@ -24,17 +25,16 @@

def neptune_full_doc_to_visual(doc):
cache_dir = os.path.join(base_cache_dir, cache_name)
video_path = doc["video_path"]
video_path = os.path.join(cache_dir, "downloads", video_path)
if os.path.exists(video_path):
video_path = video_path
elif os.path.exists(video_path.replace("mp4", "MP4")):
video_path = video_path.replace("mp4", "MP4")
elif os.path.exists(video_path.replace("mp4", "mkv")):
video_path = video_path.replace("mp4", "mkv")
video_path = doc["video_path"].split(".")[0] + "*.mp4"
video_path = os.path.join(cache_dir, video_path)
video_path = [f for f in glob(video_path) if "temp" not in f]
if len(video_path) > 1:
return video_path[:1]
elif len(video_path) > 0:
return video_path
else:
sys.exit(f"video path:{video_path} does not exist, please check")
return [video_path]
# Some stupid hardcode to skip this
return [f"video path:{video_path} does not exist, please check"]


def neptune_full_doc_to_text(doc, lmms_eval_specific_kwargs):
Expand Down

0 comments on commit 4f18c85

Please sign in to comment.