diff --git a/ovos_padatious/opm.py b/ovos_padatious/opm.py index 39618ab..b93fd1b 100644 --- a/ovos_padatious/opm.py +++ b/ovos_padatious/opm.py @@ -25,9 +25,11 @@ from ovos_padatious import IntentContainer as PadatiousIntentContainer from ovos_padatious.match_data import MatchData as PadatiousIntent from ovos_utils import flatten_list +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_utils.xdg_utils import xdg_data_home from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch +from langcodes import closest_match class PadatiousMatcher: @@ -47,7 +49,7 @@ def _match_level(self, utterances, limit, lang=None, message: Optional[Message] LOG.debug(f'Padatious Matching confidence > {limit}') # call flatten in case someone is sending the old style list of tuples utterances = flatten_list(utterances) - lang = lang or self.service.lang + lang = standardize_lang_tag(lang or self.service.lang) padatious_intent = self.service.calc_intent(utterances, lang, message) if padatious_intent is not None and padatious_intent.conf > limit: skill_id = padatious_intent.name.split(':')[0] @@ -92,8 +94,9 @@ def __init__(self, bus, config): self.bus = bus core_config = Configuration() - self.lang = core_config.get("lang", "en-us") + self.lang = standardize_lang_tag(core_config.get("lang", "en-US")) langs = core_config.get('secondary_langs') or [] + langs = [standardize_lang_tag(l) for l in langs] if self.lang not in langs: langs.append(self.lang) @@ -211,7 +214,7 @@ def register_intent(self, message): message (Message): message triggering action """ lang = message.data.get('lang', self.lang) - lang = lang.lower() + lang = standardize_lang_tag(lang) if lang in self.containers: self.registered_intents.append(message.data['name']) self._register_object(message, 'intent', self.containers[lang].add_intent) @@ -223,7 +226,7 @@ def register_entity(self, message): message (Message): message triggering action """ lang = message.data.get('lang', self.lang) - lang = lang.lower() + lang = standardize_lang_tag(lang) if lang in self.containers: self.registered_entities.append(message.data) self._register_object(message, 'entity', @@ -247,16 +250,32 @@ def calc_intent(self, utterances: List[str], lang: str = None, return None lang = lang or self.lang - lang = lang.lower() + + lang = self._get_closest_lang(lang) + if lang is None: # no intents registered for this lang + return None + sess = SessionManager.get(message) - if lang in self.containers: - intent_container = self.containers.get(lang) - intents = [_calc_padatious_intent(utt, intent_container, sess) - for utt in utterances] - intents = [i for i in intents if i is not None] - # select best - if intents: - return max(intents, key=lambda k: k.conf) + + intent_container = self.containers.get(lang) + intents = [_calc_padatious_intent(utt, intent_container, sess) + for utt in utterances] + intents = [i for i in intents if i is not None] + # select best + if intents: + return max(intents, key=lambda k: k.conf) + + def _get_closest_lang(self, lang: str) -> Optional[str]: + if self.containers: + lang = standardize_lang_tag(lang) + closest, score = closest_match(lang, list(self.containers.keys())) + # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values + # 0 -> These codes represent the same language, possibly after filling in values and normalizing. + # 1- 3 -> These codes indicate a minor regional difference. + # 4 - 10 -> These codes indicate a significant but unproblematic regional difference. + if score < 10: + return closest + return None def shutdown(self): self.bus.remove('padatious:register_intent', self.register_intent) diff --git a/requirements.txt b/requirements.txt index 0c84e7c..ae04446 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ fann2>=1.0.7, < 1.1.0 xxhash ovos-plugin-manager>=0.0.26 -ovos-workshop>=0.1.7,<2.0.0 \ No newline at end of file +ovos-workshop>=0.1.7,<2.0.0 +ovos-utils>=0.3.4,<1.0.0 +langcodes \ No newline at end of file diff --git a/tests/test_container.py b/tests/test_container.py index 57f5e20..cd99eb5 100644 --- a/tests/test_container.py +++ b/tests/test_container.py @@ -11,14 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from time import monotonic -import unittest import os -import pytest import random -from os import mkdir -from os.path import isdir, join -from shutil import rmtree +import unittest +from os.path import join +from time import monotonic + +import pytest from ovos_padatious.intent_container import IntentContainer @@ -34,7 +33,7 @@ class TestFromDisk(unittest.TestCase): other_entities = ['else\n', 'different\n'] def setUp(self): - self.cont = IntentContainer('temp') + self.cont = IntentContainer('/tmp/cache2') def _add_intent(self): self.cont.add_intent('test', self.test_lines) @@ -45,23 +44,19 @@ def _add_intent(self): self._write_train_data() def _write_train_data(self): - - if not isdir('temp'): - mkdir('temp') - - fn1 = join('temp', 'test.intent') + fn1 = join('/tmp/cache2', 'test.intent') with open(fn1, 'w') as f: f.writelines(self.test_lines_with_entities) - fn2 = join('temp', 'other.intent') + fn2 = join('/tmp/cache2', 'other.intent') with open(fn2, 'w') as f: f.writelines(self.other_lines_with_entities) - fn1 = join('temp', 'test.entity') + fn1 = join('/tmp/cache2', 'test.entity') with open(fn1, 'w') as f: f.writelines(self.test_entities) - fn2 = join('temp', 'other.entity') + fn2 = join('/tmp/cache2', 'other.entity') with open(fn2, 'w') as f: f.writelines(self.other_entities) @@ -70,7 +65,7 @@ def test_instantiate_from_disk(self): self._add_intent() # instantiate from disk (load cached files) - cont = IntentContainer('temp') + cont = IntentContainer('/tmp/cache2') cont.instantiate_from_disk() assert len(cont.intents.train_data.sent_lists) == 0 @@ -92,21 +87,18 @@ class TestIntentContainer(unittest.TestCase): other_entities = ['else\n', 'different\n'] def setUp(self): - self.cont = IntentContainer('temp') + self.cont = IntentContainer('/tmp/cache') def _add_intent(self): self.cont.add_intent('test', self.test_lines) self.cont.add_intent('other', self.other_lines) def test_load_intent(self): - if not isdir('temp'): - mkdir('temp') - - fn1 = join('temp', 'test.txt') + fn1 = join('/tmp', 'test.txt') with open(fn1, 'w') as f: f.writelines(self.test_lines) - fn2 = join('temp', 'other.txt') + fn2 = join('/tmp', 'other.txt') with open(fn2, 'w') as f: f.writelines(self.other_lines) @@ -122,7 +114,6 @@ def test(a, b): test(False, False) test(True, True) - def _create_large_intent(self, depth): if depth == 0: return '(a|b|)' @@ -183,8 +174,8 @@ def test_calc_intents(self): intents = self.cont.calc_intents('this is another test') assert ( - intents[0].conf > intents[1].conf) == ( - intents[0].name == 'test') + intents[0].conf > intents[1].conf) == ( + intents[0].name == 'test') assert self.cont.calc_intent('this is another test').name == 'test' def test_empty(self): @@ -252,7 +243,3 @@ def test_generalize(self): intent = self.cont.calc_intent('make a timer for 3 minute') assert intent.name == 'timer' assert intent.matches == {'time': '3'} - - def teardown(self): - if isdir('temp'): - rmtree('temp')