From 5aaf4243eb71b5c2800e77904c8f3b299e52893c Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Wed, 16 Oct 2024 03:20:50 +0100 Subject: [PATCH 1/3] fix:standardize lang code (#254) * fix:standardize_lang * fix:standardize_lang * fix:standardize_lang * fix:standardize_lang --- ovos_workshop/skills/ovos.py | 46 ++++++++++++------- test/unittests/skills/test_base.py | 20 ++++---- test/unittests/skills/test_fallback_skill.py | 2 +- test/unittests/test_decorators.py | 16 +++---- .../test_intent_service_interface.py | 4 +- test/unittests/test_resource_files.py | 12 ++--- 6 files changed, 57 insertions(+), 43 deletions(-) diff --git a/ovos_workshop/skills/ovos.py b/ovos_workshop/skills/ovos.py index 09ecfb5d..8cffdad8 100644 --- a/ovos_workshop/skills/ovos.py +++ b/ovos_workshop/skills/ovos.py @@ -14,6 +14,7 @@ from typing import Dict, Callable, List, Optional, Union from json_database import JsonStorage +from langcodes import closest_match from lingua_franca.format import pronounce_number, join_list from lingua_franca.parse import yes_or_no, extract_number from ovos_bus_client import MessageBusClient @@ -33,12 +34,11 @@ from ovos_utils.file_utils import FileWatcher from ovos_utils.gui import get_ui_directories from ovos_utils.json_helper import merge_dict +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_utils.parse import match_one from ovos_utils.process_utils import RuntimeRequirements from ovos_utils.skills import get_non_properties -from padacioso import IntentContainer - from ovos_workshop.decorators.killable import AbortEvent, killable_event, \ AbortQuestion from ovos_workshop.decorators.layers import IntentLayers @@ -49,6 +49,7 @@ CoreResources, find_resource, SkillResources from ovos_workshop.settings import PrivateSettings from ovos_workshop.settings import SkillSettingsManager +from padacioso import IntentContainer def simple_trace(stack_trace: List[str]) -> str: @@ -225,13 +226,26 @@ def register_converse_intent(self, intent_file, handler): self.add_event(name, handler, 'mycroft.skill.handler') + def _get_closest_lang(self, lang: str) -> Optional[str]: + if self.converse_matchers: + lang = standardize_lang_tag(lang) + closest, score = closest_match(lang, list(self.converse_matchers.keys())) + # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values + # 0 -> These codes represent the same language, possibly after filling in values and normalizing. + # 1- 3 -> These codes indicate a minor regional difference. + # 4 - 10 -> These codes indicate a significant but unproblematic regional difference. + if score < 10: + return closest + return None + def _handle_converse_intents(self, message): """ called before converse method this gives active skills a chance to parse their own intents and consume the utterance, see conversational_intent decorator for usage """ - if self.lang not in self.converse_matchers: - return False + lang = self._get_closest_lang(self.lang) + if lang is None: # no intents registered for this lang + return None best_score = 0 response = None @@ -569,14 +583,14 @@ def lang(self) -> str: message = dig_for_message() if message: lang = get_message_lang(message) - return lang.lower() + return standardize_lang_tag(lang) @property def core_lang(self) -> str: """ Get the configured default language as a BCP-47 language code. """ - return self.config_core.get("lang", "en-us").lower() + return standardize_lang_tag(self.config_core.get("lang", "en-US")) @property def secondary_langs(self) -> List[str]: @@ -586,7 +600,7 @@ def secondary_langs(self) -> List[str]: to `core_lang`. A skill may override this method to specify which languages intents are registered in. """ - return [lang.lower() for lang in self.config_core.get('secondary_langs', []) + return [standardize_lang_tag(lang) for lang in self.config_core.get('secondary_langs', []) if lang != self.core_lang] @property @@ -596,8 +610,8 @@ def native_langs(self) -> List[str]: and explicitly supported). This is equivalent to normalized secondary_langs + core_lang. """ - valid = set([lang.lower() for lang in self.secondary_langs if '-' in - lang and lang != self.core_lang] + [self.core_lang]) + valid = set([standardize_lang_tag(lang) for lang in self.secondary_langs + if lang != self.core_lang] + [self.core_lang]) return list(valid) @property @@ -618,7 +632,7 @@ def load_lang(self, root_directory: Optional[str] = None, @param lang: language to get resources for (default self.lang) @return: SkillResources object """ - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) root_directory = root_directory or self.res_dir if lang not in self._lang_resources: self._lang_resources[lang] = SkillResources(root_directory, lang, @@ -706,7 +720,7 @@ def find_resource(self, res_name: str, res_dirname: Optional[str] = None, Returns: string: The full path to the resource file or None if not found """ - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) x = find_resource(res_name, self.res_dir, res_dirname, lang) if x: return str(x) @@ -1174,7 +1188,7 @@ def _handle_converse_request(self, message: Message): params = signature(self.converse).parameters kwargs = {"message": message, "utterances": message.data['utterances'], - "lang": message.data['lang']} + "lang": standardize_lang_tag(message.data['lang'])} kwargs = {k: v for k, v in kwargs.items() if k in params} # call skill converse method, conditionally activating the skill @@ -1234,7 +1248,7 @@ def __handle_stop(self, message): sess = SessionManager.get(message) try: stopped = self.stop_session(sess) or self.stop() - print(f"{self.skill_id} stopped: {stopped}") + LOG.debug(f"{self.skill_id} stopped: {stopped}") if stopped: self.bus.emit(message.reply("mycroft.stop.handled", {"by": "skill:" + self.skill_id})) @@ -1421,7 +1435,7 @@ def register_vocabulary(self, entity: str, entity_type: str, @param lang: language of `entity` (default self.lang) """ keyword_type = self.alphanumeric_skill_id + entity_type - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) self.intent_service.register_adapt_keyword(keyword_type, entity, lang=lang) @@ -1434,7 +1448,7 @@ def register_regex(self, regex_str: str, lang: Optional[str] = None): self.log.debug('registering regex string: ' + regex_str) regex = munge_regex(regex_str, self.skill_id) re.compile(regex) # validate regex - self.intent_service.register_adapt_regex(regex, lang=lang or self.lang) + self.intent_service.register_adapt_regex(regex, lang=standardize_lang_tag(lang or self.lang)) # event/intent registering internal handlers def handle_enable_intent(self, message: Message): @@ -2057,7 +2071,7 @@ def voc_list(self, voc_filename: str, @param lang: language to get vocab for (default self.lang) @return: list of string vocab options """ - lang = lang or self.lang + lang = standardize_lang_tag(lang or self.lang) cache_key = lang + voc_filename if cache_key not in self._voc_cache: diff --git a/test/unittests/skills/test_base.py b/test/unittests/skills/test_base.py index 01fa8886..fe30c1a3 100644 --- a/test/unittests/skills/test_base.py +++ b/test/unittests/skills/test_base.py @@ -332,22 +332,22 @@ def test_register_intent_file(self): uk_intent_file = join(skill.res_dir, "locale", "uk-ua", "time.intent") # No secondary languages - skill.config_core["lang"] = "en-us" + skill.config_core["lang"] = "en-US" skill.config_core["secondary_langs"] = [] skill.register_intent_file("time.intent", Mock(__name__="test")) skill.intent_service.register_padatious_intent.assert_called_once_with( - f"{skill.skill_id}:time.intent", en_intent_file, "en-us") + f"{skill.skill_id}:time.intent", en_intent_file, "en-US") # With secondary language skill.intent_service.register_padatious_intent.reset_mock() - skill.config_core["secondary_langs"] = ["en-us", "uk-ua"] + skill.config_core["secondary_langs"] = ["en-US", "uk-UA"] skill.register_intent_file("time.intent", Mock(__name__="test")) self.assertEqual( skill.intent_service.register_padatious_intent.call_count, 2) skill.intent_service.register_padatious_intent.assert_any_call( - f"{skill.skill_id}:time.intent", en_intent_file, "en-us") + f"{skill.skill_id}:time.intent", en_intent_file, "en-US") skill.intent_service.register_padatious_intent.assert_any_call( - f"{skill.skill_id}:time.intent", uk_intent_file, "uk-ua") + f"{skill.skill_id}:time.intent", uk_intent_file, "uk-UA") def test_register_entity_file(self): skill = OVOSSkill(bus=self.bus, skill_id=self.skill_id) @@ -358,25 +358,25 @@ def test_register_entity_file(self): uk_file = join(skill.res_dir, "locale", "uk-ua", "dow.entity") # No secondary languages - skill.config_core["lang"] = "en-us" + skill.config_core["lang"] = "en-US" skill.config_core["secondary_langs"] = [] skill.register_entity_file("dow") skill.intent_service.register_padatious_entity.assert_called_once_with( f"{skill.skill_id}:dow_d446b2a6e46e7d94cdf7787e21050ff9", - en_file, "en-us") + en_file, "en-US") # With secondary language skill.intent_service.register_padatious_entity.reset_mock() - skill.config_core["secondary_langs"] = ["en-us", "uk-ua"] + skill.config_core["secondary_langs"] = ["en-US", "uk-ua"] skill.register_entity_file("dow") self.assertEqual( skill.intent_service.register_padatious_entity.call_count, 2) skill.intent_service.register_padatious_entity.assert_any_call( f"{skill.skill_id}:dow_d446b2a6e46e7d94cdf7787e21050ff9", - en_file, "en-us") + en_file, "en-US") skill.intent_service.register_padatious_entity.assert_any_call( f"{skill.skill_id}:dow_d446b2a6e46e7d94cdf7787e21050ff9", - uk_file, "uk-ua") + uk_file, "uk-UA") def test_handle_enable_intent(self): # TODO diff --git a/test/unittests/skills/test_fallback_skill.py b/test/unittests/skills/test_fallback_skill.py index 8e2cbad3..d5021a4e 100644 --- a/test/unittests/skills/test_fallback_skill.py +++ b/test/unittests/skills/test_fallback_skill.py @@ -59,7 +59,7 @@ def test_priority(self): FallbackSkill.fallback_config = {} def test_can_answer(self): - self.assertFalse(self.fallback_skill.can_answer([""], "en-us")) + self.assertFalse(self.fallback_skill.can_answer([""], "en-US")) # TODO def test_register_system_event_handlers(self): diff --git a/test/unittests/test_decorators.py b/test/unittests/test_decorators.py index 9405f4a1..ac3b0a73 100644 --- a/test/unittests/test_decorators.py +++ b/test/unittests/test_decorators.py @@ -115,7 +115,7 @@ def test_skills_abort_event(self): speak_msg = {'type': 'speak', 'data': {'utterance': 'still here', 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(start_msg, self.bus.emitted_msgs) self.assertIn(speak_msg, self.bus.emitted_msgs) self.assertTrue(self.skill.instance.my_special_var == "changed") @@ -138,7 +138,7 @@ def test_skills_abort_event(self): speak_msg = {'type': 'speak', 'data': {'utterance': 'I am dead', 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(speak_msg, self.bus.emitted_msgs) self.assertTrue(self.skill.instance.my_special_var == "default") @@ -158,7 +158,7 @@ def test_skill_stop(self): 'data': {'name': 'KillableSkill.handle_test_abort_intent'}} speak_msg = {'type': 'speak', 'data': {'utterance': 'still here', 'expect_response': False, - 'meta': {'skill': 'abort.test'}, 'lang': 'en-us'}} + 'meta': {'skill': 'abort.test'}, 'lang': 'en-US'}} self.assertIn(start_msg, self.bus.emitted_msgs) self.assertIn(speak_msg, self.bus.emitted_msgs) self.assertTrue(self.skill.instance.my_special_var == "changed") @@ -180,7 +180,7 @@ def test_skill_stop(self): speak_msg = {'type': 'speak', 'data': {'utterance': 'I am dead', 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(speak_msg, self.bus.emitted_msgs) self.assertTrue(self.skill.instance.my_special_var == "default") @@ -206,7 +206,7 @@ def test_get_response(self): 'data': {'utterance': 'this is a question', 'expect_response': True, 'meta': {'dialog': 'question', 'data': {}, 'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} activate_msg = {'type': 'intent.service.skills.activate', 'data': {'skill_id': 'abort.test'}} sleep(0.5) # fake wait_while_speaking @@ -232,7 +232,7 @@ def test_get_response(self): 'data': {'utterance': 'question aborted', 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(speak_msg, self.bus.emitted_msgs) def test_developer_stop_msg(self): @@ -249,7 +249,7 @@ def test_developer_stop_msg(self): 'data': {'utterance': "you can't abort me", 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(start_msg, self.bus.emitted_msgs) self.assertIn(speak_msg, self.bus.emitted_msgs) @@ -278,7 +278,7 @@ def test_developer_stop_msg(self): speak_msg = {'type': 'speak', 'data': {'utterance': 'I am dead', 'expect_response': False, 'meta': {'skill': 'abort.test'}, - 'lang': 'en-us'}} + 'lang': 'en-US'}} self.assertIn(speak_msg, self.bus.emitted_msgs) self.assertTrue(self.skill.instance.my_special_var == "default") diff --git a/test/unittests/test_intent_service_interface.py b/test/unittests/test_intent_service_interface.py index 61fea26e..363c1ecc 100644 --- a/test/unittests/test_intent_service_interface.py +++ b/test/unittests/test_intent_service_interface.py @@ -82,8 +82,8 @@ def test_register_keyword_with_aliases(self): def test_register_regex(self): intent_service = IntentServiceInterface(self.emitter) - intent_service.register_adapt_regex('.*', lang="en-us") - self.check_emitter([{'regex': '.*', 'lang': 'en-us'}]) + intent_service.register_adapt_regex('.*', lang="en-US") + self.check_emitter([{'regex': '.*', 'lang': 'en-US'}]) class KeywordIntentRegistrationTest(unittest.TestCase): diff --git a/test/unittests/test_resource_files.py b/test/unittests/test_resource_files.py index f34b44fb..e2ae7148 100644 --- a/test/unittests/test_resource_files.py +++ b/test/unittests/test_resource_files.py @@ -24,12 +24,12 @@ def test_find_resource(self): test_dir = join(dirname(__file__), "test_res") # Test valid nested request - valid_dialog = find_resource("test.dialog", test_dir, "dialog", "en-us") + valid_dialog = find_resource("test.dialog", test_dir, "dialog", "en-US") self.assertEqual(valid_dialog, Path(test_dir, "en-us", "dialog", "test.dialog")) # Test valid top-level lang resource - valid_vocab = find_resource("test.voc", test_dir, "vocab", "en-us") + valid_vocab = find_resource("test.voc", test_dir, "vocab", "en-US") self.assertEqual(valid_vocab, Path(test_dir, "en-us", "test.voc")) # Test lang-agnostic resource @@ -181,9 +181,9 @@ def tearDownClass(cls) -> None: def test_core_resources(self): from ovos_workshop.resource_files import CoreResources, SkillResources - core_res = CoreResources("en-us") + core_res = CoreResources("en-US") self.assertIsInstance(core_res, SkillResources) - self.assertEqual(core_res.language, "en-us") + self.assertEqual(core_res.language, "en-US") self.assertTrue(isdir(core_res.skill_directory)) @@ -202,9 +202,9 @@ def tearDownClass(cls) -> None: def test_user_resources(self): from ovos_workshop.resource_files import UserResources, SkillResources - user_res = UserResources("en-us", "test.skill") + user_res = UserResources("en-US", "test.skill") self.assertIsInstance(user_res, SkillResources) - self.assertEqual(user_res.language, "en-us") + self.assertEqual(user_res.language, "en-US") self.assertEqual(user_res.skill_directory, join(self.test_data_path, "mycroft", "resources", "test.skill")) From f580d2384e3ce54d9bd1149b786539948fee5a4c Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Wed, 16 Oct 2024 02:21:04 +0000 Subject: [PATCH 2/3] Increment Version to 1.0.1a1 --- ovos_workshop/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ovos_workshop/version.py b/ovos_workshop/version.py index a3d5a948..4aed6678 100644 --- a/ovos_workshop/version.py +++ b/ovos_workshop/version.py @@ -1,6 +1,6 @@ # START_VERSION_BLOCK VERSION_MAJOR = 1 VERSION_MINOR = 0 -VERSION_BUILD = 0 -VERSION_ALPHA = 0 +VERSION_BUILD = 1 +VERSION_ALPHA = 1 # END_VERSION_BLOCK From abf4fb3818200d12c141665941575527f4f9d14a Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Wed, 16 Oct 2024 02:21:31 +0000 Subject: [PATCH 3/3] Update Changelog --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index efcad2ff..4cdbb259 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,12 @@ # Changelog -## [1.0.0a1](https://github.com/OpenVoiceOS/OVOS-workshop/tree/1.0.0a1) (2024-10-15) +## [1.0.1a1](https://github.com/OpenVoiceOS/OVOS-workshop/tree/1.0.1a1) (2024-10-16) -[Full Changelog](https://github.com/OpenVoiceOS/OVOS-workshop/compare/0.1.7...1.0.0a1) +[Full Changelog](https://github.com/OpenVoiceOS/OVOS-workshop/compare/1.0.0...1.0.1a1) -**Breaking changes:** +**Merged pull requests:** -- refactor!:drop mycroft [\#235](https://github.com/OpenVoiceOS/OVOS-workshop/pull/235) ([JarbasAl](https://github.com/JarbasAl)) +- fix:standardize lang code [\#254](https://github.com/OpenVoiceOS/OVOS-workshop/pull/254) ([JarbasAl](https://github.com/JarbasAl))