Skip to content

Commit

Permalink
xtts/tokenizer: merge duplicate implementations of preprocess_text
Browse files Browse the repository at this point in the history
This was found via ruff:

> F811 Redefinition of unused `preprocess_text` from line 570
  • Loading branch information
akx committed Nov 8, 2023
1 parent 64bb609 commit 1fcb25a
Showing 1 changed file with 5 additions and 20 deletions.
25 changes: 5 additions & 20 deletions TTS/tts/layers/xtts/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,14 +567,16 @@ def check_input_length(self, txt, lang):
print(f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio.")

def preprocess_text(self, txt, lang):
if lang in ["en", "es", "fr", "de", "pt", "it", "pl", "ar", "cs", "ru", "nl", "tr", "zh-cn"]:
if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "zh-cn"}:
txt = multilingual_cleaners(txt, lang)
if lang == "zh-cn":
if lang in {"zh", "zh-cn"}:
txt = chinese_transliterate(txt)
elif lang == "ja":
txt = japanese_cleaners(txt, self.katsu)
elif lang == "ko":
txt = korean_cleaners(txt)
else:
raise NotImplementedError()
raise NotImplementedError(f"Language '{lang}' is not supported.")
return txt

def encode(self, txt, lang):
Expand All @@ -593,23 +595,6 @@ def decode(self, seq):
txt = txt.replace("[UNK]", "")
return txt

def preprocess_text(self, txt, lang):
if lang in ["en", "es", "fr", "de", "pt", "it", "pl", "zh", "ar", "cs", "ru", "nl", "tr", "hu"]:
txt = multilingual_cleaners(txt, lang)
elif lang == "ja":
if self.katsu is None:
import cutlet

self.katsu = cutlet.Cutlet()
txt = japanese_cleaners(txt, self.katsu)
elif lang == "zh-cn" or lang == "zh":
txt = chinese_transliterate(txt)
elif lang == "ko":
txt = korean_cleaners(txt)
else:
raise NotImplementedError()
return txt

def __len__(self):
return self.tokenizer.get_vocab_size()

Expand Down

0 comments on commit 1fcb25a

Please sign in to comment.