From c060962c7c041abf09ba1c42aaedacaccfbb6eb5 Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Sat, 3 Jun 2023 00:03:44 +0100 Subject: [PATCH 1/3] refactor and improve padatious matching merge _convert_intent into _match_level revert rename simplify dont rename import for compat --- ovos_core/intent_services/__init__.py | 4 +- ovos_core/intent_services/adapt_service.py | 3 +- ovos_core/intent_services/commonqa_service.py | 2 +- ovos_core/intent_services/converse_service.py | 2 +- ovos_core/intent_services/fallback_service.py | 4 +- .../intent_services/padatious_service.py | 185 ++++++------------ .../skills/test_utterance_intents.py | 7 +- 7 files changed, 74 insertions(+), 133 deletions(-) diff --git a/ovos_core/intent_services/__init__.py b/ovos_core/intent_services/__init__.py index 048c1c103de7..36ce93ba9f61 100644 --- a/ovos_core/intent_services/__init__.py +++ b/ovos_core/intent_services/__init__.py @@ -36,7 +36,7 @@ # skill_id: the skill this handler belongs to IntentMatch = namedtuple('IntentMatch', ['intent_service', 'intent_type', - 'intent_data', 'skill_id'] + 'intent_data', 'skill_id', 'utterance'] ) @@ -264,6 +264,8 @@ def handle_utterance(self, message): if match: break if match: + message.data["utterance"] = match.utterance + if match.skill_id: self.converse.activate_skill(match.skill_id) # If the service didn't report back the skill_id it diff --git a/ovos_core/intent_services/adapt_service.py b/ovos_core/intent_services/adapt_service.py index b59d795bf50b..ff371d219721 100644 --- a/ovos_core/intent_services/adapt_service.py +++ b/ovos_core/intent_services/adapt_service.py @@ -247,7 +247,8 @@ def take_best(intent, utt): self.update_context(best_intent) skill_id = best_intent['intent_type'].split(":")[0] ret = ovos_core.intent_services.IntentMatch( - 'Adapt', best_intent['intent_type'], best_intent, skill_id + 'Adapt', best_intent['intent_type'], best_intent, skill_id, + best_intent['utterance'] ) else: ret = None diff --git a/ovos_core/intent_services/commonqa_service.py b/ovos_core/intent_services/commonqa_service.py index 9a826ffcc6dd..a02acadb832a 100644 --- a/ovos_core/intent_services/commonqa_service.py +++ b/ovos_core/intent_services/commonqa_service.py @@ -104,7 +104,7 @@ def match(self, utterances, lang, message): message.data["utterance"] = utterance answered = self.handle_question(message) if answered: - match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None) + match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None, utterance) break return match diff --git a/ovos_core/intent_services/converse_service.py b/ovos_core/intent_services/converse_service.py index ba0d4918f048..bdac97671962 100644 --- a/ovos_core/intent_services/converse_service.py +++ b/ovos_core/intent_services/converse_service.py @@ -259,5 +259,5 @@ def converse_with_skills(self, utterances, lang, message): # check if any skill wants to handle utterance for skill_id in self._collect_converse_skills(): if self.converse(utterances, skill_id, lang, message): - return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id) + return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id, utterances[0]) return None diff --git a/ovos_core/intent_services/fallback_service.py b/ovos_core/intent_services/fallback_service.py index 29c65025b6f9..baac1ded0615 100644 --- a/ovos_core/intent_services/fallback_service.py +++ b/ovos_core/intent_services/fallback_service.py @@ -165,7 +165,7 @@ def _fallback_range(self, utterances, lang, message, fb_range): for skill_id, prio in sorted_handlers: result = self.attempt_fallback(utterances, skill_id, lang, message) if result: - return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None) + return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0]) # old style deprecated fallback skill singleton class LOG.debug("checking for FallbackSkillsV1") @@ -178,7 +178,7 @@ def _fallback_range(self, utterances, lang, message, fb_range): response = self.bus.wait_for_response(msg, timeout=10) if response and response.data['handled']: - return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None) + return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0]) return None def high_prio(self, utterances, lang, message): diff --git a/ovos_core/intent_services/padatious_service.py b/ovos_core/intent_services/padatious_service.py index 70047c8e3f19..acf3687de653 100644 --- a/ovos_core/intent_services/padatious_service.py +++ b/ovos_core/intent_services/padatious_service.py @@ -13,59 +13,58 @@ # limitations under the License. # """Intent service wrapping padatious.""" +from functools import lru_cache from multiprocessing import Pool from os import path from os.path import expanduser, isfile -from subprocess import call from threading import Event -from typing import List, Optional, Tuple +from time import time as get_time, sleep +from typing import List, Optional -from ovos_bus_client.message import Message from ovos_config.config import Configuration -from ovos_utils.xdg_utils import xdg_data_home from ovos_config.meta import get_xdg_base +from ovos_utils import flatten_list +from ovos_utils.log import LOG +from ovos_utils.xdg_utils import xdg_data_home from padacioso import IntentContainer as FallbackIntentContainer -from time import time as get_time, sleep import ovos_core.intent_services -from ovos_utils import flatten_list -from ovos_utils.log import LOG +from ovos_bus_client.message import Message try: import padatious as _pd - from padatious.match_data import MatchData as PadatiousIntent except ImportError: _pd = None - # padatious is optional, this class is just for compat - class PadatiousIntent: - """ - A set of data describing how a query fits into an intent - Attributes: - name (str): Name of matched intent - sent (str): The input utterance associated with the intent - conf (float): Confidence (from 0.0 to 1.0) - matches (dict of str -> str): Key is the name of the entity and - value is the extracted part of the sentence - """ - def __init__(self, name, sent, matches=None, conf=0.0): - self.name = name - self.sent = sent - self.matches = matches or {} - self.conf = conf +class PadatiousIntent: + """ + A set of data describing how a query fits into an intent + Attributes: + name (str): Name of matched intent + sent (str): The input utterance associated with the intent + conf (float): Confidence (from 0.0 to 1.0) + matches (dict of str -> str): Key is the name of the entity and + value is the extracted part of the sentence + """ - def __getitem__(self, item): - return self.matches.__getitem__(item) + def __init__(self, name, sent, matches=None, conf=0.0): + self.name = name + self.sent = sent + self.matches = matches or {} + self.conf = conf - def __contains__(self, item): - return self.matches.__contains__(item) + def __getitem__(self, item): + return self.matches.__getitem__(item) - def get(self, key, default=None): - return self.matches.get(key, default) + def __contains__(self, item): + return self.matches.__contains__(item) - def __repr__(self): - return repr(self.__dict__) + def get(self, key, default=None): + return self.matches.get(key, default) + + def __repr__(self): + return repr(self.__dict__) class PadatiousMatcher: @@ -74,10 +73,6 @@ class PadatiousMatcher: def __init__(self, service): self.service = service - self.has_result = False - self.ret = None - self.conf = None - def _match_level(self, utterances, limit, lang=None): """Match intent and make sure a certain level of confidence is reached. @@ -86,29 +81,21 @@ def _match_level(self, utterances, limit, lang=None): with optional normalized version. limit (float): required confidence level. """ + if self.service.is_regex_only: + LOG.debug(f'Padacioso Matching confidence > {limit}') + else: + LOG.debug(f'Padatious Matching confidence > {limit}') # call flatten in case someone is sending the old style list of tuples utterances = flatten_list(utterances) - if not self.has_result: - lang = lang or self.service.lang - LOG.debug(f'Padatious Matching confidence > {limit}') - if _pd: - LOG.info(f"Using legacy Padatious module") - padatious_intent = self._legacy_padatious_match(utterances, - lang) - else: - padatious_intent = self.service.threaded_calc_intent(utterances, - lang) - if padatious_intent: - skill_id = padatious_intent.name.split(':')[0] - self.ret = ovos_core.intent_services.IntentMatch( - 'Padatious', padatious_intent.name, - padatious_intent.matches, skill_id) - self.conf = padatious_intent.conf - self.has_result = True - if self.conf and self.conf > limit: - return self.ret - - def match_high(self, utterances, lang=None, __=None): + lang = lang or self.service.lang + padatious_intent = self.service.calc_intent(utterances, lang) + if padatious_intent is not None and padatious_intent.conf > limit: + skill_id = padatious_intent.name.split(':')[0] + return ovos_core.intent_services.IntentMatch( + 'Padatious', padatious_intent.name, + padatious_intent.matches, skill_id, padatious_intent.sent) + + def match_high(self, utterances, lang=None, message=None): """Intent matcher for high confidence. Args: @@ -117,7 +104,7 @@ def match_high(self, utterances, lang=None, __=None): """ return self._match_level(utterances, self.service.conf_high, lang) - def match_medium(self, utterances, lang=None, __=None): + def match_medium(self, utterances, lang=None, message=None): """Intent matcher for medium confidence. Args: @@ -126,7 +113,7 @@ def match_medium(self, utterances, lang=None, __=None): """ return self._match_level(utterances, self.service.conf_med, lang) - def match_low(self, utterances, lang=None, __=None): + def match_low(self, utterances, lang=None, message=None): """Intent matcher for low confidence. Args: @@ -135,24 +122,6 @@ def match_low(self, utterances, lang=None, __=None): """ return self._match_level(utterances, self.service.conf_low, lang) - def _legacy_padatious_match(self, utterances: List[str], - lang: str) -> Optional[PadatiousIntent]: - """ - Handle intent match with the Padatious intent parser. - @param utterances: List of string utterances to evaluate - @param lang: BCP-47 language of utterances - @return: PadatiousIntent if matched, else None - """ - padatious_intent = None - for utt in utterances: - intent = self.service.calc_intent(utt, lang) - if intent: - best = padatious_intent.conf if padatious_intent else 0.0 - if best < intent.conf: - padatious_intent = intent - padatious_intent.matches['utterance'] = utt - return padatious_intent - class PadatiousService: """Service class for padatious intent matching.""" @@ -172,20 +141,14 @@ def __init__(self, bus, config): self.conf_low = self.padatious_config.get("conf_low") or 0.5 if self.is_regex_only: - if not _pd: - LOG.info('Padatious not installed. ' - 'Falling back to Padacioso') - try: - call(['notify-send', 'Padatious not installed', - 'Falling back to Padacioso']) - except OSError: - pass LOG.debug('Using Padacioso intent parser.') self.containers = {lang: FallbackIntentContainer( self.padatious_config.get("fuzz")) - for lang in langs} + for lang in langs} else: - intent_cache = self.padatious_config.get('intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache" + LOG.debug('Using Padatious intent parser.') + intent_cache = self.padatious_config.get( + 'intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache" self.containers = { lang: _pd.IntentContainer(path.join(expanduser(intent_cache), lang)) for lang in langs} @@ -337,22 +300,7 @@ def register_entity(self, message): self._register_object(message, 'entity', self.containers[lang].add_entity) - def calc_intent(self, utt, lang=None): - """Cached version of container calc_intent. - - This improves speed when called multiple times for different confidence - levels. - - Args: - utt (str): utterance to calculate best intent for - """ - lang = lang or self.lang - lang = lang.lower() - if lang in self.containers: - return _calc_padatious_intent((utt, self.containers[lang])) - - def threaded_calc_intent(self, utterances: List[str], - lang: str = None) -> Optional[PadatiousIntent]: + def calc_intent(self, utterances: List[str], lang: str = None) -> Optional[PadatiousIntent]: """ Get the best intent match for the given list of utterances. Utilizes a thread pool for overall faster execution. Note that this method is NOT @@ -361,6 +309,8 @@ def threaded_calc_intent(self, utterances: List[str], @param lang: language of utterances @return: """ + if isinstance(utterances, str): + utterances = [utterances] # backwards compat when arg was a single string lang = lang or self.lang lang = lang.lower() if lang in self.containers: @@ -374,24 +324,15 @@ def threaded_calc_intent(self, utterances: List[str], pool.close() pool.join() else: - intents = (self.calc_intent(utt, lang) for utt in utterances) - padatious_intent = None - for intent in intents: - if intent: - best = \ - padatious_intent.conf if padatious_intent else 0.0 - if intent.conf > best: # this intent is the best so far - padatious_intent = intent - # TODO: This is for backwards-compat. but means that an - # entity "utterance" will be overridden - padatious_intent.matches['utterance'] = \ - padatious_intent.sent - if intent.conf == 1.0: - LOG.debug(f"Returning perfect match") - return intent - return padatious_intent + intents = [_calc_padatious_intent(utt, intent_container) for utt in utterances] + + intents = [i for i in intents if i is not None] + # select best + if intents: + return max(intents, key=lambda k: k.conf) +@lru_cache(maxsize=10) # repeat calls under different conf levels wont re-run code def _calc_padatious_intent(*args) -> \ Optional[PadatiousIntent]: """ @@ -400,7 +341,6 @@ def _calc_padatious_intent(*args) -> \ @return: matched PadatiousIntent """ try: - LOG.info(args) if len(args) == 1: args = args[0] utt = args[0] @@ -411,9 +351,8 @@ def _calc_padatious_intent(*args) -> \ intent["matches"] = intent.pop("entities") intent["sent"] = utt intent = PadatiousIntent(**intent) - else: - LOG.info(f"Overriding `intent.sent` with matched utterance") - intent.sent = utt + + intent.sent = utt return intent except Exception as e: LOG.error(e) diff --git a/test/unittests/skills/test_utterance_intents.py b/test/unittests/skills/test_utterance_intents.py index 8abe8b0d85be..1ef09d2908f6 100644 --- a/test/unittests/skills/test_utterance_intents.py +++ b/test/unittests/skills/test_utterance_intents.py @@ -126,16 +126,15 @@ def test_threaded_intent(self): utterances.append("tell me about Mycroft") intent_service.padatious_config['threaded_inference'] = False start = time() - intent = intent_service.threaded_calc_intent(utterances, "en-US") + intent = intent_service.calc_intent(utterances, "en-US") single_thread_time = time() - start self.assertEqual(intent.name, "test2") - self.assertEqual(intent.matches, {'thing': 'Mycroft', - 'utterance': utterances[0]}) + self.assertEqual(intent.matches, {'thing': 'Mycroft'}) self.assertEqual(intent.sent, utterances[0]) intent_service.padatious_config['threaded_inference'] = True start = time() - intent2 = intent_service.threaded_calc_intent(utterances, "en-US") + intent2 = intent_service.calc_intent(utterances, "en-US") multi_thread_time = time() - start self.assertEqual(intent.__dict__, intent2.__dict__) From 8042a96e53ca7a5f7eb7fc3ea27e33591c68294f Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Wed, 12 Jul 2023 11:31:03 +0100 Subject: [PATCH 2/3] use ProcessPoolExecutor --- .../intent_services/padatious_service.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/ovos_core/intent_services/padatious_service.py b/ovos_core/intent_services/padatious_service.py index acf3687de653..c15d9490e33d 100644 --- a/ovos_core/intent_services/padatious_service.py +++ b/ovos_core/intent_services/padatious_service.py @@ -13,8 +13,8 @@ # limitations under the License. # """Intent service wrapping padatious.""" +import concurrent.futures from functools import lru_cache -from multiprocessing import Pool from os import path from os.path import expanduser, isfile from threading import Event @@ -314,15 +314,35 @@ def calc_intent(self, utterances: List[str], lang: str = None) -> Optional[Padat lang = lang or self.lang lang = lang.lower() if lang in self.containers: + intents = [] intent_container = self.containers.get(lang) + if self.threaded_inference: - with Pool() as pool: - intents = (intent for intent in - pool.imap_unordered(_calc_padatious_intent, - ((utt, intent_container) - for utt in utterances))) - pool.close() - pool.join() + # differences between ThreadPoolExecutor and ProcessPoolExecutor + # ThreadPoolExecutor + # Uses Threads, not processes. + # Lightweight workers, not heavyweight workers. + # Shared Memory, not inter-process communication. + # Subject to the GIL, not parallel execution. + # Suited to IO-bound Tasks, not CPU-bound tasks. + # Create 10s to 1,000s Workers, not really constrained. + # + # ProcessPoolExecutor + # Uses Processes, not threads. + # Heavyweight Workers, not lightweight workers. + # Inter-Process Communication, not shared memory. + # Not Subject to the GIL, not constrained to sequential execution. + # Suited to CPU-bound Tasks, probably not IO-bound tasks. + # Create 10s of Workers, not 100s or 1,000s of tasks. + + self.workers = 4 # do the work in parallel instead of sequentially + with concurrent.futures.ProcessPoolExecutor(max_workers=self.workers) as executor: + future_to_source = { + executor.submit(_calc_padatious_intent, + (s, intent_container)): s + for s in utterances + } + intents = [future.result() for future in concurrent.futures.as_completed(future_to_source)] else: intents = [_calc_padatious_intent(utt, intent_container) for utt in utterances] From 6358661d43ebee4e64338959bebfeda6302e0aca Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Wed, 12 Jul 2023 11:50:00 +0100 Subject: [PATCH 3/3] add logs to confirm speed up --- ovos_core/intent_services/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ovos_core/intent_services/__init__.py b/ovos_core/intent_services/__init__.py index 36ce93ba9f61..8b2e54b3b04f 100644 --- a/ovos_core/intent_services/__init__.py +++ b/ovos_core/intent_services/__init__.py @@ -263,6 +263,7 @@ def handle_utterance(self, message): match = match_func(utterances, lang, message) if match: break + LOG.debug(f"intent matching took: {stopwatch.time}") if match: message.data["utterance"] = match.utterance