From ff78d286ff0b94194f0d246d249d2a0dcb4dc762 Mon Sep 17 00:00:00 2001 From: "lielin.hyl" Date: Thu, 18 Apr 2024 13:02:02 +0800 Subject: [PATCH] * allow to change the config of opencc converter --- data_juicer/ops/mapper/chinese_convert_mapper.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/data_juicer/ops/mapper/chinese_convert_mapper.py b/data_juicer/ops/mapper/chinese_convert_mapper.py index 3ad124045..818f1b1d4 100644 --- a/data_juicer/ops/mapper/chinese_convert_mapper.py +++ b/data_juicer/ops/mapper/chinese_convert_mapper.py @@ -11,9 +11,15 @@ def prepare_converter(mode): + mode_path = mode + '.json' global OPENCC_CONVERTER if OPENCC_CONVERTER is None: - OPENCC_CONVERTER = opencc.OpenCC(mode + '.json') + # empty converter + OPENCC_CONVERTER = opencc.OpenCC(mode_path) + if not OPENCC_CONVERTER.config.endswith(mode_path): + # the config is actually a config path + # update and get a new converter with specified mode + OPENCC_CONVERTER = opencc.OpenCC(mode_path) @OPERATORS.register_module(OP_NAME)