From acaeb1088fd0e8742bfd013d9f3b9d5516e76320 Mon Sep 17 00:00:00 2001 From: "lielin.hyl" Date: Mon, 13 Nov 2023 16:17:09 +0800 Subject: [PATCH] * minor modifications --- configs/config_all.yaml | 2 +- data_juicer/config/config.py | 1 - data_juicer/ops/base_op.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/configs/config_all.yaml b/configs/config_all.yaml index ad2ea458d..708db5c22 100644 --- a/configs/config_all.yaml +++ b/configs/config_all.yaml @@ -116,7 +116,7 @@ process: use_words_aug: false # whether to augment words, especially for Chinese and Vietnamese words_aug_group_sizes: [2] # the group size of words to augment words_aug_join_char: "" # the join char between words to augment - - image_aspect_ratio_filter: # filter samples according to the aspect ratios of images in them + - image_aspect_ratio_filter: # filter samples according to the aspect ratios of images (a fraction of width by height, r=w/h) in them min_ratio: 0.333 # the min aspect ratio of filter range max_ratio: 3.0 # the max aspect ratio of filter range any_or_all: any # keep this sample when any/all images meet the filter condition diff --git a/data_juicer/config/config.py b/data_juicer/config/config.py index 4571b0718..f59d8cdc9 100644 --- a/data_juicer/config/config.py +++ b/data_juicer/config/config.py @@ -437,7 +437,6 @@ def sort_op_by_types_and_names(op_name_classes): def config_backup(cfg): cfg_path = cfg.config[0].absolute - cfg.cfg_path = os.path.dirname(cfg_path) # record the path of config work_dir = cfg.work_dir target_path = os.path.join(work_dir, os.path.basename(cfg_path)) logger.info(f'Back up the input config file [{cfg_path}] into the ' diff --git a/data_juicer/ops/base_op.py b/data_juicer/ops/base_op.py index b778b64d4..be45a9673 100644 --- a/data_juicer/ops/base_op.py +++ b/data_juicer/ops/base_op.py @@ -36,7 +36,7 @@ def __init__(self, image_key: str = None, ): """ - Base class that conducts text editing. + Base class that conducts data editing. :param text_key: the key name of field that stores sample texts to be processed.