-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add phonemizer for Belarusian language
- Loading branch information
Showing
6 changed files
with
125 additions
and
1 deletion.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import os | ||
|
||
finder = None | ||
|
||
|
||
def init(): | ||
try: | ||
import jpype | ||
import jpype.imports | ||
except ModuleNotFoundError: | ||
raise ModuleNotFoundError("Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`.") | ||
|
||
try: | ||
jar_path = os.environ["BEL_FANETYKA_JAR"] | ||
except KeyError: | ||
raise KeyError("You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file") | ||
|
||
jpype.startJVM(classpath=[jar_path]) | ||
|
||
# import the Java modules | ||
from org.alex73.korpus.base import GrammarDB2, GrammarFinder | ||
|
||
grammar_db = GrammarDB2.initializeFromJar() | ||
global finder | ||
finder = GrammarFinder(grammar_db) | ||
|
||
|
||
def belarusian_text_to_phonemes(text: str) -> str: | ||
# Initialize only on first run | ||
if finder is None: | ||
init() | ||
|
||
from org.alex73.fanetyka.impl import FanetykaText | ||
return str(FanetykaText(finder, text).ipa) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from typing import Dict | ||
|
||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer | ||
from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes | ||
|
||
_DEF_BE_PUNCS = ",!." # TODO | ||
|
||
|
||
class BEL_Phonemizer(BasePhonemizer): | ||
"""🐸TTS be phonemizer using functions in `TTS.tts.utils.text.belarusian.phonemizer` | ||
Args: | ||
punctuations (str): | ||
Set of characters to be treated as punctuation. Defaults to `_DEF_BE_PUNCS`. | ||
keep_puncs (bool): | ||
If True, keep the punctuations after phonemization. Defaults to False. | ||
""" | ||
|
||
language = "be" | ||
|
||
def __init__(self, punctuations=_DEF_BE_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument | ||
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) | ||
|
||
@staticmethod | ||
def name(): | ||
return "be_phonemizer" | ||
|
||
@staticmethod | ||
def phonemize_be(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument | ||
return belarusian_text_to_phonemes(text) | ||
|
||
def _phonemize(self, text, separator): | ||
return self.phonemize_be(text, separator) | ||
|
||
@staticmethod | ||
def supported_languages() -> Dict: | ||
return {"be": "Belarusian"} | ||
|
||
def version(self) -> str: | ||
return "0.0.1" | ||
|
||
def is_available(self) -> bool: | ||
return True | ||
|
||
|
||
if __name__ == "__main__": | ||
txt = "тэст" | ||
e = BEL_Phonemizer() | ||
print(e.supported_languages()) | ||
print(e.version()) | ||
print(e.language) | ||
print(e.name()) | ||
print(e.is_available()) | ||
print("`" + e.phonemize(txt) + "`") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import os | ||
import warnings | ||
import unittest | ||
|
||
from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes | ||
|
||
_TEST_CASES = """ | ||
Фанетычны канвертар/fanʲɛˈtɨt͡ʂnɨ kanˈvʲɛrtar | ||
Гэтак мы працавалі/ˈɣɛtak ˈmɨ prat͡saˈvalʲi | ||
""" | ||
|
||
|
||
class TestText(unittest.TestCase): | ||
def test_belarusian_text_to_phonemes(self): | ||
try: | ||
os.environ["BEL_FANETYKA_JAR"] | ||
except KeyError: | ||
warnings.warn( | ||
"You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer", | ||
Warning) | ||
return | ||
|
||
for line in _TEST_CASES.strip().split("\n"): | ||
text, phonemes = line.split("/") | ||
self.assertEqual(belarusian_text_to_phonemes(text), phonemes) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |