From c060962c7c041abf09ba1c42aaedacaccfbb6eb5 Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Sat, 3 Jun 2023 00:03:44 +0100
Subject: [PATCH 1/3] refactor and improve padatious matching

merge _convert_intent into _match_level

revert rename

simplify

dont rename import for compat
---
 ovos_core/intent_services/__init__.py         |   4 +-
 ovos_core/intent_services/adapt_service.py    |   3 +-
 ovos_core/intent_services/commonqa_service.py |   2 +-
 ovos_core/intent_services/converse_service.py |   2 +-
 ovos_core/intent_services/fallback_service.py |   4 +-
 .../intent_services/padatious_service.py      | 185 ++++++------------
 .../skills/test_utterance_intents.py          |   7 +-
 7 files changed, 74 insertions(+), 133 deletions(-)

diff --git a/ovos_core/intent_services/__init__.py b/ovos_core/intent_services/__init__.py
index 048c1c103de7..36ce93ba9f61 100644
--- a/ovos_core/intent_services/__init__.py
+++ b/ovos_core/intent_services/__init__.py
@@ -36,7 +36,7 @@
 # skill_id: the skill this handler belongs to
 IntentMatch = namedtuple('IntentMatch',
                          ['intent_service', 'intent_type',
-                          'intent_data', 'skill_id']
+                          'intent_data', 'skill_id', 'utterance']
                          )
 
 
@@ -264,6 +264,8 @@ def handle_utterance(self, message):
                     if match:
                         break
             if match:
+                message.data["utterance"] = match.utterance
+
                 if match.skill_id:
                     self.converse.activate_skill(match.skill_id)
                     # If the service didn't report back the skill_id it
diff --git a/ovos_core/intent_services/adapt_service.py b/ovos_core/intent_services/adapt_service.py
index b59d795bf50b..ff371d219721 100644
--- a/ovos_core/intent_services/adapt_service.py
+++ b/ovos_core/intent_services/adapt_service.py
@@ -247,7 +247,8 @@ def take_best(intent, utt):
             self.update_context(best_intent)
             skill_id = best_intent['intent_type'].split(":")[0]
             ret = ovos_core.intent_services.IntentMatch(
-                'Adapt', best_intent['intent_type'], best_intent, skill_id
+                'Adapt', best_intent['intent_type'], best_intent, skill_id,
+                best_intent['utterance']
             )
         else:
             ret = None
diff --git a/ovos_core/intent_services/commonqa_service.py b/ovos_core/intent_services/commonqa_service.py
index 9a826ffcc6dd..a02acadb832a 100644
--- a/ovos_core/intent_services/commonqa_service.py
+++ b/ovos_core/intent_services/commonqa_service.py
@@ -104,7 +104,7 @@ def match(self, utterances, lang, message):
                 message.data["utterance"] = utterance
                 answered = self.handle_question(message)
                 if answered:
-                    match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None)
+                    match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None, utterance)
                 break
         return match
 
diff --git a/ovos_core/intent_services/converse_service.py b/ovos_core/intent_services/converse_service.py
index ba0d4918f048..bdac97671962 100644
--- a/ovos_core/intent_services/converse_service.py
+++ b/ovos_core/intent_services/converse_service.py
@@ -259,5 +259,5 @@ def converse_with_skills(self, utterances, lang, message):
         # check if any skill wants to handle utterance
         for skill_id in self._collect_converse_skills():
             if self.converse(utterances, skill_id, lang, message):
-                return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id)
+                return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id, utterances[0])
         return None
diff --git a/ovos_core/intent_services/fallback_service.py b/ovos_core/intent_services/fallback_service.py
index 29c65025b6f9..baac1ded0615 100644
--- a/ovos_core/intent_services/fallback_service.py
+++ b/ovos_core/intent_services/fallback_service.py
@@ -165,7 +165,7 @@ def _fallback_range(self, utterances, lang, message, fb_range):
         for skill_id, prio in sorted_handlers:
             result = self.attempt_fallback(utterances, skill_id, lang, message)
             if result:
-                return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None)
+                return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0])
 
         # old style deprecated fallback skill singleton class
         LOG.debug("checking for FallbackSkillsV1")
@@ -178,7 +178,7 @@ def _fallback_range(self, utterances, lang, message, fb_range):
         response = self.bus.wait_for_response(msg, timeout=10)
 
         if response and response.data['handled']:
-            return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None)
+            return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0])
         return None
 
     def high_prio(self, utterances, lang, message):
diff --git a/ovos_core/intent_services/padatious_service.py b/ovos_core/intent_services/padatious_service.py
index 70047c8e3f19..acf3687de653 100644
--- a/ovos_core/intent_services/padatious_service.py
+++ b/ovos_core/intent_services/padatious_service.py
@@ -13,59 +13,58 @@
 # limitations under the License.
 #
 """Intent service wrapping padatious."""
+from functools import lru_cache
 from multiprocessing import Pool
 from os import path
 from os.path import expanduser, isfile
-from subprocess import call
 from threading import Event
-from typing import List, Optional, Tuple
+from time import time as get_time, sleep
+from typing import List, Optional
 
-from ovos_bus_client.message import Message
 from ovos_config.config import Configuration
-from ovos_utils.xdg_utils import xdg_data_home
 from ovos_config.meta import get_xdg_base
+from ovos_utils import flatten_list
+from ovos_utils.log import LOG
+from ovos_utils.xdg_utils import xdg_data_home
 from padacioso import IntentContainer as FallbackIntentContainer
-from time import time as get_time, sleep
 
 import ovos_core.intent_services
-from ovos_utils import flatten_list
-from ovos_utils.log import LOG
+from ovos_bus_client.message import Message
 
 try:
     import padatious as _pd
-    from padatious.match_data import MatchData as PadatiousIntent
 except ImportError:
     _pd = None
-    # padatious is optional, this class is just for compat
 
-    class PadatiousIntent:
-        """
-        A set of data describing how a query fits into an intent
-        Attributes:
-            name (str): Name of matched intent
-            sent (str): The input utterance associated with the intent
-            conf (float): Confidence (from 0.0 to 1.0)
-            matches (dict of str -> str): Key is the name of the entity and
-                value is the extracted part of the sentence
-        """
 
-        def __init__(self, name, sent, matches=None, conf=0.0):
-            self.name = name
-            self.sent = sent
-            self.matches = matches or {}
-            self.conf = conf
+class PadatiousIntent:
+    """
+    A set of data describing how a query fits into an intent
+    Attributes:
+        name (str): Name of matched intent
+        sent (str): The input utterance associated with the intent
+        conf (float): Confidence (from 0.0 to 1.0)
+        matches (dict of str -> str): Key is the name of the entity and
+            value is the extracted part of the sentence
+    """
 
-        def __getitem__(self, item):
-            return self.matches.__getitem__(item)
+    def __init__(self, name, sent, matches=None, conf=0.0):
+        self.name = name
+        self.sent = sent
+        self.matches = matches or {}
+        self.conf = conf
 
-        def __contains__(self, item):
-            return self.matches.__contains__(item)
+    def __getitem__(self, item):
+        return self.matches.__getitem__(item)
 
-        def get(self, key, default=None):
-            return self.matches.get(key, default)
+    def __contains__(self, item):
+        return self.matches.__contains__(item)
 
-        def __repr__(self):
-            return repr(self.__dict__)
+    def get(self, key, default=None):
+        return self.matches.get(key, default)
+
+    def __repr__(self):
+        return repr(self.__dict__)
 
 
 class PadatiousMatcher:
@@ -74,10 +73,6 @@ class PadatiousMatcher:
     def __init__(self, service):
         self.service = service
 
-        self.has_result = False
-        self.ret = None
-        self.conf = None
-
     def _match_level(self, utterances, limit, lang=None):
         """Match intent and make sure a certain level of confidence is reached.
 
@@ -86,29 +81,21 @@ def _match_level(self, utterances, limit, lang=None):
                                          with optional normalized version.
             limit (float): required confidence level.
         """
+        if self.service.is_regex_only:
+            LOG.debug(f'Padacioso Matching confidence > {limit}')
+        else:
+            LOG.debug(f'Padatious Matching confidence > {limit}')
         # call flatten in case someone is sending the old style list of tuples
         utterances = flatten_list(utterances)
-        if not self.has_result:
-            lang = lang or self.service.lang
-            LOG.debug(f'Padatious Matching confidence > {limit}')
-            if _pd:
-                LOG.info(f"Using legacy Padatious module")
-                padatious_intent = self._legacy_padatious_match(utterances,
-                                                                lang)
-            else:
-                padatious_intent = self.service.threaded_calc_intent(utterances,
-                                                                     lang)
-            if padatious_intent:
-                skill_id = padatious_intent.name.split(':')[0]
-                self.ret = ovos_core.intent_services.IntentMatch(
-                    'Padatious', padatious_intent.name,
-                    padatious_intent.matches, skill_id)
-                self.conf = padatious_intent.conf
-            self.has_result = True
-        if self.conf and self.conf > limit:
-            return self.ret
-
-    def match_high(self, utterances, lang=None, __=None):
+        lang = lang or self.service.lang
+        padatious_intent = self.service.calc_intent(utterances, lang)
+        if padatious_intent is not None and padatious_intent.conf > limit:
+            skill_id = padatious_intent.name.split(':')[0]
+            return ovos_core.intent_services.IntentMatch(
+                'Padatious', padatious_intent.name,
+                padatious_intent.matches, skill_id, padatious_intent.sent)
+
+    def match_high(self, utterances, lang=None, message=None):
         """Intent matcher for high confidence.
 
         Args:
@@ -117,7 +104,7 @@ def match_high(self, utterances, lang=None, __=None):
         """
         return self._match_level(utterances, self.service.conf_high, lang)
 
-    def match_medium(self, utterances, lang=None, __=None):
+    def match_medium(self, utterances, lang=None, message=None):
         """Intent matcher for medium confidence.
 
         Args:
@@ -126,7 +113,7 @@ def match_medium(self, utterances, lang=None, __=None):
         """
         return self._match_level(utterances, self.service.conf_med, lang)
 
-    def match_low(self, utterances, lang=None, __=None):
+    def match_low(self, utterances, lang=None, message=None):
         """Intent matcher for low confidence.
 
         Args:
@@ -135,24 +122,6 @@ def match_low(self, utterances, lang=None, __=None):
         """
         return self._match_level(utterances, self.service.conf_low, lang)
 
-    def _legacy_padatious_match(self, utterances: List[str],
-                                lang: str) -> Optional[PadatiousIntent]:
-        """
-        Handle intent match with the Padatious intent parser.
-        @param utterances: List of string utterances to evaluate
-        @param lang: BCP-47 language of utterances
-        @return: PadatiousIntent if matched, else None
-        """
-        padatious_intent = None
-        for utt in utterances:
-            intent = self.service.calc_intent(utt, lang)
-            if intent:
-                best = padatious_intent.conf if padatious_intent else 0.0
-                if best < intent.conf:
-                    padatious_intent = intent
-                    padatious_intent.matches['utterance'] = utt
-        return padatious_intent
-
 
 class PadatiousService:
     """Service class for padatious intent matching."""
@@ -172,20 +141,14 @@ def __init__(self, bus, config):
         self.conf_low = self.padatious_config.get("conf_low") or 0.5
 
         if self.is_regex_only:
-            if not _pd:
-                LOG.info('Padatious not installed. '
-                         'Falling back to Padacioso')
-                try:
-                    call(['notify-send', 'Padatious not installed',
-                          'Falling back to Padacioso'])
-                except OSError:
-                    pass
             LOG.debug('Using Padacioso intent parser.')
             self.containers = {lang: FallbackIntentContainer(
                 self.padatious_config.get("fuzz"))
-                               for lang in langs}
+                for lang in langs}
         else:
-            intent_cache = self.padatious_config.get('intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache"
+            LOG.debug('Using Padatious intent parser.')
+            intent_cache = self.padatious_config.get(
+                'intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache"
             self.containers = {
                 lang: _pd.IntentContainer(path.join(expanduser(intent_cache), lang))
                 for lang in langs}
@@ -337,22 +300,7 @@ def register_entity(self, message):
             self._register_object(message, 'entity',
                                   self.containers[lang].add_entity)
 
-    def calc_intent(self, utt, lang=None):
-        """Cached version of container calc_intent.
-
-        This improves speed when called multiple times for different confidence
-        levels.
-
-        Args:
-            utt (str): utterance to calculate best intent for
-        """
-        lang = lang or self.lang
-        lang = lang.lower()
-        if lang in self.containers:
-            return _calc_padatious_intent((utt, self.containers[lang]))
-
-    def threaded_calc_intent(self, utterances: List[str],
-                             lang: str = None) -> Optional[PadatiousIntent]:
+    def calc_intent(self, utterances: List[str], lang: str = None) -> Optional[PadatiousIntent]:
         """
         Get the best intent match for the given list of utterances. Utilizes a
         thread pool for overall faster execution. Note that this method is NOT
@@ -361,6 +309,8 @@ def threaded_calc_intent(self, utterances: List[str],
         @param lang: language of utterances
         @return:
         """
+        if isinstance(utterances, str):
+            utterances = [utterances]  # backwards compat when arg was a single string
         lang = lang or self.lang
         lang = lang.lower()
         if lang in self.containers:
@@ -374,24 +324,15 @@ def threaded_calc_intent(self, utterances: List[str],
                     pool.close()
                     pool.join()
             else:
-                intents = (self.calc_intent(utt, lang) for utt in utterances)
-            padatious_intent = None
-            for intent in intents:
-                if intent:
-                    best = \
-                        padatious_intent.conf if padatious_intent else 0.0
-                    if intent.conf > best:  # this intent is the best so far
-                        padatious_intent = intent
-                        # TODO: This is for backwards-compat. but means that an
-                        #   entity "utterance" will be overridden
-                        padatious_intent.matches['utterance'] = \
-                            padatious_intent.sent
-                        if intent.conf == 1.0:
-                            LOG.debug(f"Returning perfect match")
-                            return intent
-            return padatious_intent
+                intents = [_calc_padatious_intent(utt, intent_container) for utt in utterances]
+
+            intents = [i for i in intents if i is not None]
+            # select best
+            if intents:
+                return max(intents, key=lambda k: k.conf)
 
 
+@lru_cache(maxsize=10)  # repeat calls under different conf levels wont re-run code
 def _calc_padatious_intent(*args) -> \
         Optional[PadatiousIntent]:
     """
@@ -400,7 +341,6 @@ def _calc_padatious_intent(*args) -> \
     @return: matched PadatiousIntent
     """
     try:
-        LOG.info(args)
         if len(args) == 1:
             args = args[0]
         utt = args[0]
@@ -411,9 +351,8 @@ def _calc_padatious_intent(*args) -> \
                 intent["matches"] = intent.pop("entities")
             intent["sent"] = utt
             intent = PadatiousIntent(**intent)
-        else:
-            LOG.info(f"Overriding `intent.sent` with matched utterance")
-            intent.sent = utt
+
+        intent.sent = utt
         return intent
     except Exception as e:
         LOG.error(e)
diff --git a/test/unittests/skills/test_utterance_intents.py b/test/unittests/skills/test_utterance_intents.py
index 8abe8b0d85be..1ef09d2908f6 100644
--- a/test/unittests/skills/test_utterance_intents.py
+++ b/test/unittests/skills/test_utterance_intents.py
@@ -126,16 +126,15 @@ def test_threaded_intent(self):
             utterances.append("tell me about Mycroft")
         intent_service.padatious_config['threaded_inference'] = False
         start = time()
-        intent = intent_service.threaded_calc_intent(utterances, "en-US")
+        intent = intent_service.calc_intent(utterances, "en-US")
         single_thread_time = time() - start
         self.assertEqual(intent.name, "test2")
-        self.assertEqual(intent.matches, {'thing': 'Mycroft',
-                                          'utterance': utterances[0]})
+        self.assertEqual(intent.matches, {'thing': 'Mycroft'})
         self.assertEqual(intent.sent, utterances[0])
 
         intent_service.padatious_config['threaded_inference'] = True
         start = time()
-        intent2 = intent_service.threaded_calc_intent(utterances, "en-US")
+        intent2 = intent_service.calc_intent(utterances, "en-US")
         multi_thread_time = time() - start
         self.assertEqual(intent.__dict__, intent2.__dict__)
 

From 8042a96e53ca7a5f7eb7fc3ea27e33591c68294f Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Wed, 12 Jul 2023 11:31:03 +0100
Subject: [PATCH 2/3] use ProcessPoolExecutor

---
 .../intent_services/padatious_service.py      | 36 ++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/ovos_core/intent_services/padatious_service.py b/ovos_core/intent_services/padatious_service.py
index acf3687de653..c15d9490e33d 100644
--- a/ovos_core/intent_services/padatious_service.py
+++ b/ovos_core/intent_services/padatious_service.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 #
 """Intent service wrapping padatious."""
+import concurrent.futures
 from functools import lru_cache
-from multiprocessing import Pool
 from os import path
 from os.path import expanduser, isfile
 from threading import Event
@@ -314,15 +314,35 @@ def calc_intent(self, utterances: List[str], lang: str = None) -> Optional[Padat
         lang = lang or self.lang
         lang = lang.lower()
         if lang in self.containers:
+            intents = []
             intent_container = self.containers.get(lang)
+
             if self.threaded_inference:
-                with Pool() as pool:
-                    intents = (intent for intent in
-                               pool.imap_unordered(_calc_padatious_intent,
-                                                   ((utt, intent_container)
-                                                    for utt in utterances)))
-                    pool.close()
-                    pool.join()
+                # differences between ThreadPoolExecutor and ProcessPoolExecutor
+                # ThreadPoolExecutor
+                #     Uses Threads, not processes.
+                #     Lightweight workers, not heavyweight workers.
+                #     Shared Memory, not inter-process communication.
+                #     Subject to the GIL, not parallel execution.
+                #     Suited to IO-bound Tasks, not CPU-bound tasks.
+                #     Create 10s to 1,000s Workers, not really constrained.
+                #
+                # ProcessPoolExecutor
+                #     Uses Processes, not threads.
+                #     Heavyweight Workers, not lightweight workers.
+                #     Inter-Process Communication, not shared memory.
+                #     Not Subject to the GIL, not constrained to sequential execution.
+                #     Suited to CPU-bound Tasks, probably not IO-bound tasks.
+                #     Create 10s of Workers, not 100s or 1,000s of tasks.
+
+                self.workers = 4  # do the work in parallel instead of sequentially
+                with concurrent.futures.ProcessPoolExecutor(max_workers=self.workers) as executor:
+                    future_to_source = {
+                        executor.submit(_calc_padatious_intent,
+                                        (s, intent_container)): s
+                        for s in utterances
+                    }
+                    intents = [future.result() for future in concurrent.futures.as_completed(future_to_source)]
             else:
                 intents = [_calc_padatious_intent(utt, intent_container) for utt in utterances]
 

From 6358661d43ebee4e64338959bebfeda6302e0aca Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Wed, 12 Jul 2023 11:50:00 +0100
Subject: [PATCH 3/3] add logs to confirm speed up

---
 ovos_core/intent_services/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ovos_core/intent_services/__init__.py b/ovos_core/intent_services/__init__.py
index 36ce93ba9f61..8b2e54b3b04f 100644
--- a/ovos_core/intent_services/__init__.py
+++ b/ovos_core/intent_services/__init__.py
@@ -263,6 +263,7 @@ def handle_utterance(self, message):
                     match = match_func(utterances, lang, message)
                     if match:
                         break
+            LOG.debug(f"intent matching took: {stopwatch.time}")
             if match:
                 message.data["utterance"] = match.utterance