From d7f9d6908cba283ebef6c53cf6e84f49653b3d55 Mon Sep 17 00:00:00 2001 From: mshannon-sil <131058912+mshannon-sil@users.noreply.github.com> Date: Tue, 24 Oct 2023 08:00:02 -0400 Subject: [PATCH] dynamically determine project root for path to custom normalizer (#48) --- .../translation/huggingface/hugging_face_nmt_model_trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/machine/translation/huggingface/hugging_face_nmt_model_trainer.py b/machine/translation/huggingface/hugging_face_nmt_model_trainer.py index 743696b..ddc8dfc 100644 --- a/machine/translation/huggingface/hugging_face_nmt_model_trainer.py +++ b/machine/translation/huggingface/hugging_face_nmt_model_trainer.py @@ -201,7 +201,8 @@ def add_tokens(tokenizer: Any, missing_tokens: List[str]) -> Any: ) else: norm_tok = PreTrainedTokenizerFast.from_pretrained( - "./machine/translation/huggingface/custom_normalizer", use_fast=True + str(Path(os.path.dirname(os.path.abspath(__file__))) / "custom_normalizer"), + use_fast=True, ) # using unofficially supported behavior to set the normalizer tokenizer.backend_tokenizer.normalizer = norm_tok.backend_tokenizer.normalizer # type: ignore