modelscope · BeachWang · May 13, 2024 · Apr 24, 2024 · Apr 24, 2024 · May 7, 2024
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -30,6 +30,7 @@ jobs:
         sudo apt-get install ffmpeg
         python -m pip install --upgrade pip
         pip install -v -e .[all]
+        pip install -v -e .[sandbox]
     - name: Increase swapfile
       run: |
         df -h

diff --git a/Dockerfile b/Dockerfile
@@ -26,6 +26,7 @@ RUN cat environments/* | xargs pip install --default-timeout 1000
 # install data-juicer then
 COPY . .
 RUN pip install -v -e .[all]
+RUN pip install -v -e .[sandbox]
 
 # install 3rd-party system dependencies
 RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
diff --git a/README.md b/README.md
@@ -178,12 +178,12 @@ The dependency options are listed below:
 | Tag              | Description                                                                                  |
 |------------------|----------------------------------------------------------------------------------------------|
 | `.` or `.[mini]` | Install minimal dependencies for basic Data-Juicer.                                          |
-| `.[all]`         | Install all optional dependencies (including minimal dependencies and all of the following). |
+| `.[all]`         | Install all dependencies except sandbox.                                                     |
 | `.[sci]`         | Install all dependencies for all OPs.                                                        |
-| `.[sandbox]`     | Install all dependencies for sandbox.                                                        |
 | `.[dist]`        | Install dependencies for distributed data processing. (Experimental)                         |
 | `.[dev]`         | Install dependencies for developing the package as contributors.                             |
 | `.[tools]`       | Install dependencies for dedicated tools, such as quality classifiers.                       |
+| `.[sandbox]`     | Install all dependencies for sandbox.                                                        |
 
 ### Using pip
 

diff --git a/README_ZH.md b/README_ZH.md
@@ -161,12 +161,12 @@ pip install -v -e .[tools] # 安装部分工具库的依赖
 | 标签               | 描述                           |
 |------------------|------------------------------|
 | `.` 或者 `.[mini]` | 安装支持 Data-Juicer 基础功能的最小依赖项  |
-| `.[all]`         | 安装所有可选依赖项（包括最小依赖项以及下面所有依赖项）  |
+| `.[all]`         | 安装除了沙河实验以外的所有依赖项  |
 | `.[sci]`         | 安装所有算子的全量依赖                  |
-| `.[sandbox]`     | 安装沙盒实验室的基础依赖                 |
 | `.[dist]`        | 安装以分布式方式进行数据处理的依赖（实验性功能）     |
 | `.[dev]`         | 安装作为贡献者开发 Data-Juicer 所需的依赖项 |
 | `.[tools]`       | 安装专用工具库（如质量分类器）所需的依赖项        |
+| `.[sandbox]`     | 安装沙盒实验室的基础依赖                 |
 
 ### 使用 pip 安装
 

diff --git a/configs/demo/sandbox/vbench_eval_config.yaml b/configs/demo/sandbox/vbench_eval_config.yaml
@@ -0,0 +1,27 @@
+type: vbench_video_evaluator
+
+# The vbench prompts for video generation.
+prompt_path: ./tools/mm_eval/vbench_metrics/VBench_full_info.json
+
+# The path to the dir of generated videos
+videos_path: /path/to/the/generated/videos
+
+# The dir to store the eval results
+result_dir: ./outputs/demo-sandbox/vbench_eval_results
+
+# Give a name for this eval
+eval_name: <eval_name>
+
+# If true, load the required model for VBench from the cache path of evironment parameter VBENCH_CACHE_DIR
+load_ckpt_from_local: false
+
+# The dimensions considered in this eval.
+# All dimensions include: ['subject_consistency', 'background_consistency', 'temporal_flickering',
+# 'motion_smoothness', 'dynamic_degree', 'aesthetic_quality', 'imaging_quality', 'object_class',
+# 'multiple_objects', 'human_action', 'color', 'spatial_relationship', 'scene', 'temporal_style',
+# 'appearance_style', 'overall_consistency']
+# NOTE: Current version of vbench in pypi lacks of a third party code for motion_smoothness.
+# NOTE: Besides, when len(dimension_list) > 1, it would occur an error in video loading.
+dimension_list:
+  - subject_consistency
+  - dynamic_degree
diff --git a/data_juicer/core/sandbox/evaluators.py b/data_juicer/core/sandbox/evaluators.py
@@ -1,6 +1,12 @@
+import json
 import os
 import shutil
 
+import torch
+from loguru import logger
+from vbench import VBench
+
+from data_juicer import cuda_device_count
 # TODO: cannot import tools correctly if DJ is installed by pypi. Maybe we need
 #       other importing methods.
 from tools.quality_classifier.predict import predict_score
@@ -74,6 +80,50 @@ def run(self, eval_type, eval_obj, **kwargs):
             'To be refactored from gpt4v related operators/tools.')
 
 
+class VBenchEvaluator(BaseEvaluator):
+
+    def get_score(self, result_path, dimension):
+        cur_result = json.load(open(result_path))
+        return cur_result[dimension][0]
+
+    def run(self, eval_type, eval_obj, **kwargs):
+        if eval_type == 'data':
+            prompt_path = self.eval_config.prompt_path
+            videos_path = self.eval_config.videos_path
+            result_dir = self.eval_config.result_dir
+            name = self.eval_config.eval_name
+            dimension_list = self.eval_config.dimension_list
+            local = self.eval_config.load_ckpt_from_local
+            if cuda_device_count() > 0:
+                device = torch.device('cuda')
+            else:
+                device = torch.device('cpu')
+            my_vbench = VBench(device, prompt_path, result_dir)
+            result_dict = {'mean_score': 0, 'detail': {}}
+            scores = []
+            for dimension in dimension_list:
+                logger.info(f'Evaluating for {dimension}')
+                my_vbench.evaluate(videos_path=videos_path,
+                                   name=f'{name}-{dimension}',
+                                   dimension_list=[dimension],
+                                   local=local)
+                score = self.get_score(result_path=os.path.join(
+                    result_dir, f'{name}-{dimension}_eval_results.json'),
+                                       dimension=dimension)
+                result_dict['detail'][dimension] = score
+                scores.append(score)
+            result_dict['mean_score'] = sum(scores) / len(scores)
+
+            with open(os.path.join(result_dir, name + '_merged_results.json'),
+                      'w') as f:
+                json.dump(result_dict, f)
+
+            return float(result_dict['mean_score'])
+        else:
+            raise NotImplementedError(
+                'Unsupported evaluation type: {}'.format(eval_type))
+
+
 class LmHarnessEvaluator(BaseEvaluator):
 
     def run(self, eval_type, eval_obj, **kwargs):

diff --git a/data_juicer/core/sandbox/factories.py b/data_juicer/core/sandbox/factories.py
@@ -1,4 +1,5 @@
-from data_juicer.core.sandbox.evaluators import Gpt3QualityEvaluator
+from data_juicer.core.sandbox.evaluators import (Gpt3QualityEvaluator,
+                                                 VBenchEvaluator)
 from data_juicer.core.sandbox.model_executors import (ModelscopeInferExecutor,
                                                       ModelscopeTrainExecutor)
 
@@ -15,9 +16,8 @@ def __call__(self, eval_cfg: dict = None, *args, **kwargs):
             return None
 
         evaluator = None
-        if eval_cfg.type == 'dj_video_evaluator':
-            pass
-            # evaluator = VideoEvaluator(eval_cfg)
+        if eval_cfg.type == 'vbench_video_evaluator':
+            evaluator = VBenchEvaluator(eval_cfg)
         if eval_cfg.type == 'dj_text_quality_classifier':
             evaluator = Gpt3QualityEvaluator(eval_cfg)
 

diff --git a/docs/Sandbox-ZH.md b/docs/Sandbox-ZH.md
@@ -88,6 +88,7 @@ python tools/sandbox_starter.py --config configs/demo/sandbox/sandbox.yaml
 | 组件 | 功能 | `run`方法说明 | 参考材料 |
 | --- | --- | --- | --- |
 | `Gpt3QualityEvaluator` | 使用Data-Juicer复现的GPT-3文本质量分类器对数据集进行质量评估 | <br />- `eval_type`：该评估器评估对象类型，目前只支持`"data"`<br />- `eval_obj`：待评估的数据集路径<br />- 返回值：待评估数据集样本质量打分均值<br /> | [Data-Juicer质量分类器工具集](https://github.com/modelscope/data-juicer/tree/main/tools/quality_classifier) |
+| `VBenchEvaluator` | 使用VBench对基于prompt生成的视频进行多维度的评估 | <br />- `eval_type`：该评估器评估对象类型，目前只支持`"data"`<br />- `eval_obj`：未使用的参数<br />- 返回值：待评生成视频集各维度打分均值<br /> | [VBench论文](https://arxiv.org/abs/2311.17982) |
 
 - 模型训练工厂 -- ModelTrainExecutorFactory
 

diff --git a/docs/Sandbox.md b/docs/Sandbox.md
@@ -88,6 +88,7 @@ The currently supported component factories and the components supported within
 | Component | Function | Desc. of Method `run` | Reference Materials |
 | --- | --- | --- | --- |
 | `Gpt3QualityEvaluator` | Evaluate the quality of a dataset using the GPT-3 text quality classifier reproduced by Data-Juicer. | <br />- `eval_type`: The type of the object to be evaluated by the evaluator, currently only supports `"data"`.<br />- `eval_obj`: The path to the dataset to be evaluated.<br />- Return: The average quality score of the dataset samples.<br /> | [Data-Juicer Quality Classifier Toolkit](https://github.com/modelscope/data-juicer/tree/main/tools/quality_classifier) |
+| `VBenchEvaluator` | Evaluate the generated videos according to given prompts in multi dimensions | <br />- `eval_type`: The type of the object to be evaluated by the evaluator, currently only supports `"data"`<br />- `eval_obj`: A useless parameter<br />- Return: The average score of generated videos in multi dimensions.<br /> | [VBench paper](https://arxiv.org/abs/2311.17982) |
 
 - ModelTrainExecutorFactory
 

diff --git a/environments/sandbox_requires.txt b/environments/sandbox_requires.txt
@@ -1,3 +1,9 @@
+torch>=1.11.0,<2.0.0
 wandb
+fire
+pyspark
+# vbench-related
+detectron2@git+https://github.com/facebookresearch/detectron2.git@b7c7f4ba82192ff06f2bbb162b9f67b00ea55867
+vbench
 # modelscope-related
 modelscope
diff --git a/environments/science_requires.txt b/environments/science_requires.txt
@@ -1,4 +1,4 @@
-torch>=1.11.0
+torch>=1.11.0,<2.0.0
 torchaudio
 easyocr
 fasttext-wheel

diff --git a/setup.py b/setup.py
@@ -41,10 +41,9 @@ def get_install_requirements(require_f_paths, env_dir='environments'):
     'tools':
     get_install_requirements(
         ['preprocess_requires.txt', 'quality_classifier_requires.txt']),
-    'sandbox':
-    get_install_requirements(['sandbox_requires.txt']),
 }
 extra_requires['all'] = [v for v in extra_requires.values()]
+extra_requires['sandbox'] = get_install_requirements(['sandbox_requires.txt'])
 
 with open('data_juicer/__init__.py', 'r') as f:
     version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(),

diff --git a/tools/mm_eval/README.md b/tools/mm_eval/README.md
@@ -0,0 +1,3 @@
+VBench from the paper "VBench: Comprehensive Benchmark Suite for Video Generative Models".
+
+Please refer to [GitHub](https://github.com/Vchitect/VBench) for more detail.
diff --git a/tools/mm_eval/README_ZH.md b/tools/mm_eval/README_ZH.md
@@ -0,0 +1,3 @@
+VBench来自paper："VBench: Comprehensive Benchmark Suite for Video Generative Models"。
+
+请跳转[GitHub](https://github.com/Vchitect/VBench)查看更多信息。
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		VBench from the paper "VBench: Comprehensive Benchmark Suite for Video Generative Models".

		Please refer to [GitHub](https://github.com/Vchitect/VBench) for more detail.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		VBench来自paper："VBench: Comprehensive Benchmark Suite for Video Generative Models"。

		请跳转[GitHub](https://github.com/Vchitect/VBench)查看更多信息。