From b803ae3d75c9fa0b4f501cb32f7e8410c3b4fa2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=93=E8=BE=95?= Date: Wed, 15 Nov 2023 09:36:37 +0800 Subject: [PATCH] minor fix according to suggestions by zhijian and yilun --- configs/config_all.yaml | 2 +- data_juicer/ops/filter/image_size_filter.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/configs/config_all.yaml b/configs/config_all.yaml index 25e518f2a..37c541922 100644 --- a/configs/config_all.yaml +++ b/configs/config_all.yaml @@ -122,7 +122,7 @@ process: any_or_all: any # keep this sample when any/all images meet the filter condition - image_size_filter: # filter samples according to the size of images (in bytes) within them min_size: "0" # the min size of filter range - max_ratio: "1TB" # the max size of filter range + max_size: "1TB" # the max size of filter range any_or_all: any # keep this sample when any/all images meet the filter condition - language_id_score_filter: # filter text in specific language with language scores larger than a specific max value lang: en # keep text in what language diff --git a/data_juicer/ops/filter/image_size_filter.py b/data_juicer/ops/filter/image_size_filter.py index 79e513ee3..254bf6141 100644 --- a/data_juicer/ops/filter/image_size_filter.py +++ b/data_juicer/ops/filter/image_size_filter.py @@ -4,11 +4,9 @@ from data_juicer.utils.mm_utils import get_image_size, size_to_bytes from ..base_op import OPERATORS, Filter -from ..op_fusion import LOADED_IMAGES @OPERATORS.register_module('image_size_filter') -@LOADED_IMAGES.register_module('image_size_filter') class ImageSizeFilter(Filter): """Keep data samples whose image size (in bytes/kb/MB/...) within a specific range.