From fe8c99739ecb0acc02ad16704af93b3da1675a69 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Tue, 4 Jul 2023 12:11:50 +0100 Subject: [PATCH] feat/g2p (#9) phonemes plugin, allows mouth for movements for all TTS in the mk1 --- ovos_tts_plugin_mimic/__init__.py | 57 ++++++++++++++++++++++++++++++- setup.py | 4 ++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/ovos_tts_plugin_mimic/__init__.py b/ovos_tts_plugin_mimic/__init__.py index b0d1b86..9dd324a 100755 --- a/ovos_tts_plugin_mimic/__init__.py +++ b/ovos_tts_plugin_mimic/__init__.py @@ -13,7 +13,7 @@ import subprocess from distutils.spawn import find_executable from os.path import join, isfile, expanduser - +from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, OutOfVocabulary from ovos_plugin_manager.templates.tts import TTS, TTSValidator from ovos_utils.configuration import get_xdg_base from ovos_utils.configuration import read_mycroft_config @@ -21,6 +21,61 @@ from ovos_utils.xdg_utils import xdg_config_home +class MimicPhonemesPlugin(Grapheme2PhonemePlugin): + + def __init__(self, config=None): + super().__init__(config) + self.mimic_bin = expanduser(self.config.get("binary") or + find_executable("mimic") or + "mimic") + + @staticmethod + def parse_phonemes(phonemes, normalize=False): + """Parse mimic phoneme string into a list of phone, duration pairs. + Arguments + phonemes (bytes): phoneme output from mimic + Returns: + (list) list of phoneme duration pairs + """ + phon_str = phonemes.decode() + pairs = phon_str.replace("pau", ".").split(' ') + phones = [pair.split(':') for pair in pairs if ':' in pair] + # remove silence at start/end/repeated + if normalize: + for idx, (pho, dur) in enumerate(phones): + next_pho = phones[idx + 1][0] if idx + 1 < len(phones) else None + if pho == ".": + if idx == 0 or idx == len(phones) - 1 or next_pho == ".": + phones[idx] = None + return [p for p in phones if p is not None] + + def get_mimic_phonemes(self, sentence, normalize=True): + args = [self.mimic_bin, '-psdur', '-ssml', '-t', sentence, '-o', '/tmp/mimic.pho'] + phonemes = subprocess.check_output(args) + return self.parse_phonemes(phonemes, normalize) + + def get_arpa(self, word, lang, ignore_oov=True): + if lang.lower().startswith("en"): + return [p[0].upper() for p in self.get_mimic_phonemes(word)] + if ignore_oov: + return None + raise OutOfVocabulary + + def utterance2visemes(self, utterance, lang="en", default_dur=0.4): + phonemes = self.get_mimic_phonemes(utterance, normalize=False) + return [(VISIMES.get(pho[0], '4'), float(pho[1])) for pho in phonemes] + + @property + def available_languages(self): + """Return languages supported by this G2P implementation in this state + This property should be overridden by the derived class to advertise + what languages that engine supports. + Returns: + set: supported languages + """ + return {"en"} + + class MimicTTSPlugin(TTS): """Interface to Mimic TTS.""" diff --git a/setup.py b/setup.py index f469647..0b66bb2 100755 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ def required(requirements_file): PLUGIN_ENTRY_POINT = 'ovos-tts-plugin-mimic = ovos_tts_plugin_mimic:MimicTTSPlugin' +G2P_ENTRY_POINT = 'ovos-g2p-plugin-mimic = ovos_tts_plugin_mimic:MimicPhonemesPlugin' SAMPLE_CONFIGS = 'ovos-tts-plugin-mimic.config = ovos_tts_plugin_mimic:MimicTTSPluginConfig' setup( @@ -83,5 +84,6 @@ def required(requirements_file): ], keywords='mycroft plugin tts OVOS OpenVoiceOS', entry_points={'mycroft.plugin.tts': PLUGIN_ENTRY_POINT, - 'mycroft.plugin.tts.config': SAMPLE_CONFIGS} + 'mycroft.plugin.tts.config': SAMPLE_CONFIGS, + 'ovos.plugin.g2p': G2P_ENTRY_POINT} )