Skip to content

Commit

Permalink
std lang codes (#13)
Browse files Browse the repository at this point in the history
* fix:standardize_lang

* fix:standardize_lang

* fix:standardize_lang

* fix:standardize_lang

* fix:standardize_lang
  • Loading branch information
JarbasAl authored Oct 16, 2024
1 parent 193a3b6 commit b0c46b8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 43 deletions.
45 changes: 32 additions & 13 deletions ovos_padatious/opm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
from ovos_padatious import IntentContainer as PadatiousIntentContainer
from ovos_padatious.match_data import MatchData as PadatiousIntent
from ovos_utils import flatten_list
from ovos_utils.lang import standardize_lang_tag
from ovos_utils.log import LOG
from ovos_utils.xdg_utils import xdg_data_home
from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch
from langcodes import closest_match


class PadatiousMatcher:
Expand All @@ -47,7 +49,7 @@ def _match_level(self, utterances, limit, lang=None, message: Optional[Message]
LOG.debug(f'Padatious Matching confidence > {limit}')
# call flatten in case someone is sending the old style list of tuples
utterances = flatten_list(utterances)
lang = lang or self.service.lang
lang = standardize_lang_tag(lang or self.service.lang)
padatious_intent = self.service.calc_intent(utterances, lang, message)
if padatious_intent is not None and padatious_intent.conf > limit:
skill_id = padatious_intent.name.split(':')[0]
Expand Down Expand Up @@ -92,8 +94,9 @@ def __init__(self, bus, config):
self.bus = bus

core_config = Configuration()
self.lang = core_config.get("lang", "en-us")
self.lang = standardize_lang_tag(core_config.get("lang", "en-US"))
langs = core_config.get('secondary_langs') or []
langs = [standardize_lang_tag(l) for l in langs]
if self.lang not in langs:
langs.append(self.lang)

Expand Down Expand Up @@ -211,7 +214,7 @@ def register_intent(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
lang = standardize_lang_tag(lang)
if lang in self.containers:
self.registered_intents.append(message.data['name'])
self._register_object(message, 'intent', self.containers[lang].add_intent)
Expand All @@ -223,7 +226,7 @@ def register_entity(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
lang = standardize_lang_tag(lang)
if lang in self.containers:
self.registered_entities.append(message.data)
self._register_object(message, 'entity',
Expand All @@ -247,16 +250,32 @@ def calc_intent(self, utterances: List[str], lang: str = None,
return None

lang = lang or self.lang
lang = lang.lower()

lang = self._get_closest_lang(lang)
if lang is None: # no intents registered for this lang
return None

sess = SessionManager.get(message)
if lang in self.containers:
intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

def _get_closest_lang(self, lang: str) -> Optional[str]:
if self.containers:
lang = standardize_lang_tag(lang)
closest, score = closest_match(lang, list(self.containers.keys()))
# https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values
# 0 -> These codes represent the same language, possibly after filling in values and normalizing.
# 1- 3 -> These codes indicate a minor regional difference.
# 4 - 10 -> These codes indicate a significant but unproblematic regional difference.
if score < 10:
return closest
return None

def shutdown(self):
self.bus.remove('padatious:register_intent', self.register_intent)
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
fann2>=1.0.7, < 1.1.0
xxhash
ovos-plugin-manager>=0.0.26
ovos-workshop>=0.1.7,<2.0.0
ovos-workshop>=0.1.7,<2.0.0
ovos-utils>=0.3.4,<1.0.0
langcodes
45 changes: 16 additions & 29 deletions tests/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from time import monotonic
import unittest
import os
import pytest
import random
from os import mkdir
from os.path import isdir, join
from shutil import rmtree
import unittest
from os.path import join
from time import monotonic

import pytest

from ovos_padatious.intent_container import IntentContainer

Expand All @@ -34,7 +33,7 @@ class TestFromDisk(unittest.TestCase):
other_entities = ['else\n', 'different\n']

def setUp(self):
self.cont = IntentContainer('temp')
self.cont = IntentContainer('/tmp/cache2')

def _add_intent(self):
self.cont.add_intent('test', self.test_lines)
Expand All @@ -45,23 +44,19 @@ def _add_intent(self):
self._write_train_data()

def _write_train_data(self):

if not isdir('temp'):
mkdir('temp')

fn1 = join('temp', 'test.intent')
fn1 = join('/tmp/cache2', 'test.intent')
with open(fn1, 'w') as f:
f.writelines(self.test_lines_with_entities)

fn2 = join('temp', 'other.intent')
fn2 = join('/tmp/cache2', 'other.intent')
with open(fn2, 'w') as f:
f.writelines(self.other_lines_with_entities)

fn1 = join('temp', 'test.entity')
fn1 = join('/tmp/cache2', 'test.entity')
with open(fn1, 'w') as f:
f.writelines(self.test_entities)

fn2 = join('temp', 'other.entity')
fn2 = join('/tmp/cache2', 'other.entity')
with open(fn2, 'w') as f:
f.writelines(self.other_entities)

Expand All @@ -70,7 +65,7 @@ def test_instantiate_from_disk(self):
self._add_intent()

# instantiate from disk (load cached files)
cont = IntentContainer('temp')
cont = IntentContainer('/tmp/cache2')
cont.instantiate_from_disk()

assert len(cont.intents.train_data.sent_lists) == 0
Expand All @@ -92,21 +87,18 @@ class TestIntentContainer(unittest.TestCase):
other_entities = ['else\n', 'different\n']

def setUp(self):
self.cont = IntentContainer('temp')
self.cont = IntentContainer('/tmp/cache')

def _add_intent(self):
self.cont.add_intent('test', self.test_lines)
self.cont.add_intent('other', self.other_lines)

def test_load_intent(self):
if not isdir('temp'):
mkdir('temp')

fn1 = join('temp', 'test.txt')
fn1 = join('/tmp', 'test.txt')
with open(fn1, 'w') as f:
f.writelines(self.test_lines)

fn2 = join('temp', 'other.txt')
fn2 = join('/tmp', 'other.txt')
with open(fn2, 'w') as f:
f.writelines(self.other_lines)

Expand All @@ -122,7 +114,6 @@ def test(a, b):
test(False, False)
test(True, True)


def _create_large_intent(self, depth):
if depth == 0:
return '(a|b|)'
Expand Down Expand Up @@ -183,8 +174,8 @@ def test_calc_intents(self):

intents = self.cont.calc_intents('this is another test')
assert (
intents[0].conf > intents[1].conf) == (
intents[0].name == 'test')
intents[0].conf > intents[1].conf) == (
intents[0].name == 'test')
assert self.cont.calc_intent('this is another test').name == 'test'

def test_empty(self):
Expand Down Expand Up @@ -252,7 +243,3 @@ def test_generalize(self):
intent = self.cont.calc_intent('make a timer for 3 minute')
assert intent.name == 'timer'
assert intent.matches == {'time': '3'}

def teardown(self):
if isdir('temp'):
rmtree('temp')

0 comments on commit b0c46b8

Please sign in to comment.