Skip to content

Commit

Permalink
* fix for "undefined opencc" bug for chinese_convert_mapper
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Apr 18, 2024
1 parent c1a8aa8 commit b244565
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions data_juicer/ops/mapper/chinese_convert_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
with AvailabilityChecking(['opencc'], OP_NAME):
import opencc # noqa: F401

OPENCC_CONVERTER = None


def prepare_converter(mode):
global OPENCC_CONVERTER
OPENCC_CONVERTER = opencc.OpenCC(mode + '.json')
if OPENCC_CONVERTER is None:
OPENCC_CONVERTER = opencc.OpenCC(mode + '.json')


@OPERATORS.register_module(OP_NAME)
Expand Down Expand Up @@ -70,9 +73,11 @@ def __init__(self, mode: str = 's2t', *args, **kwargs):
]
assert mode in mode_list, 'Please make sure mode is one of {}'.format(
mode_list)
prepare_converter(mode)
self.mode = mode
prepare_converter(self.mode)

def process(self, sample):
prepare_converter(self.mode)

sample[self.text_key] = OPENCC_CONVERTER.convert(sample[self.text_key])
return sample

0 comments on commit b244565

Please sign in to comment.