Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor and improve padatious matching #326

Merged
merged 3 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion ovos_core/intent_services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# skill_id: the skill this handler belongs to
IntentMatch = namedtuple('IntentMatch',
['intent_service', 'intent_type',
'intent_data', 'skill_id']
'intent_data', 'skill_id', 'utterance']
)


Expand Down Expand Up @@ -263,7 +263,10 @@ def handle_utterance(self, message):
match = match_func(utterances, lang, message)
if match:
break
LOG.debug(f"intent matching took: {stopwatch.time}")
if match:
message.data["utterance"] = match.utterance

if match.skill_id:
self.converse.activate_skill(match.skill_id)
# If the service didn't report back the skill_id it
Expand Down
3 changes: 2 additions & 1 deletion ovos_core/intent_services/adapt_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ def take_best(intent, utt):
self.update_context(best_intent)
skill_id = best_intent['intent_type'].split(":")[0]
ret = ovos_core.intent_services.IntentMatch(
'Adapt', best_intent['intent_type'], best_intent, skill_id
'Adapt', best_intent['intent_type'], best_intent, skill_id,
best_intent['utterance']
)
else:
ret = None
Expand Down
2 changes: 1 addition & 1 deletion ovos_core/intent_services/commonqa_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def match(self, utterances, lang, message):
message.data["utterance"] = utterance
answered = self.handle_question(message)
if answered:
match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None)
match = ovos_core.intent_services.IntentMatch('CommonQuery', None, {}, None, utterance)
break
return match

Expand Down
2 changes: 1 addition & 1 deletion ovos_core/intent_services/converse_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,5 @@ def converse_with_skills(self, utterances, lang, message):
# check if any skill wants to handle utterance
for skill_id in self._collect_converse_skills():
if self.converse(utterances, skill_id, lang, message):
return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id)
return ovos_core.intent_services.IntentMatch('Converse', None, None, skill_id, utterances[0])
return None
4 changes: 2 additions & 2 deletions ovos_core/intent_services/fallback_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _fallback_range(self, utterances, lang, message, fb_range):
for skill_id, prio in sorted_handlers:
result = self.attempt_fallback(utterances, skill_id, lang, message)
if result:
return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None)
return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0])

# old style deprecated fallback skill singleton class
LOG.debug("checking for FallbackSkillsV1")
Expand All @@ -178,7 +178,7 @@ def _fallback_range(self, utterances, lang, message, fb_range):
response = self.bus.wait_for_response(msg, timeout=10)

if response and response.data['handled']:
return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None)
return ovos_core.intent_services.IntentMatch('Fallback', None, {}, None, utterances[0])
return None

def high_prio(self, utterances, lang, message):
Expand Down
221 changes: 90 additions & 131 deletions ovos_core/intent_services/padatious_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,59 +13,58 @@
# limitations under the License.
#
"""Intent service wrapping padatious."""
from multiprocessing import Pool
import concurrent.futures
from functools import lru_cache
from os import path
from os.path import expanduser, isfile
from subprocess import call
from threading import Event
from typing import List, Optional, Tuple
from time import time as get_time, sleep
from typing import List, Optional

from ovos_bus_client.message import Message
from ovos_config.config import Configuration
from ovos_utils.xdg_utils import xdg_data_home
from ovos_config.meta import get_xdg_base
from ovos_utils import flatten_list
from ovos_utils.log import LOG
from ovos_utils.xdg_utils import xdg_data_home
from padacioso import IntentContainer as FallbackIntentContainer
from time import time as get_time, sleep

import ovos_core.intent_services
from ovos_utils import flatten_list
from ovos_utils.log import LOG
from ovos_bus_client.message import Message

try:
import padatious as _pd
from padatious.match_data import MatchData as PadatiousIntent
except ImportError:
_pd = None
# padatious is optional, this class is just for compat

class PadatiousIntent:
"""
A set of data describing how a query fits into an intent
Attributes:
name (str): Name of matched intent
sent (str): The input utterance associated with the intent
conf (float): Confidence (from 0.0 to 1.0)
matches (dict of str -> str): Key is the name of the entity and
value is the extracted part of the sentence
"""

def __init__(self, name, sent, matches=None, conf=0.0):
self.name = name
self.sent = sent
self.matches = matches or {}
self.conf = conf
class PadatiousIntent:
"""
A set of data describing how a query fits into an intent
Attributes:
name (str): Name of matched intent
sent (str): The input utterance associated with the intent
conf (float): Confidence (from 0.0 to 1.0)
matches (dict of str -> str): Key is the name of the entity and
value is the extracted part of the sentence
"""

def __getitem__(self, item):
return self.matches.__getitem__(item)
def __init__(self, name, sent, matches=None, conf=0.0):
self.name = name
self.sent = sent
self.matches = matches or {}
self.conf = conf

def __contains__(self, item):
return self.matches.__contains__(item)
def __getitem__(self, item):
return self.matches.__getitem__(item)

def get(self, key, default=None):
return self.matches.get(key, default)
def __contains__(self, item):
return self.matches.__contains__(item)

def __repr__(self):
return repr(self.__dict__)
def get(self, key, default=None):
return self.matches.get(key, default)

def __repr__(self):
return repr(self.__dict__)


class PadatiousMatcher:
Expand All @@ -74,10 +73,6 @@ class PadatiousMatcher:
def __init__(self, service):
self.service = service

self.has_result = False
self.ret = None
self.conf = None

def _match_level(self, utterances, limit, lang=None):
"""Match intent and make sure a certain level of confidence is reached.

Expand All @@ -86,29 +81,21 @@ def _match_level(self, utterances, limit, lang=None):
with optional normalized version.
limit (float): required confidence level.
"""
if self.service.is_regex_only:
LOG.debug(f'Padacioso Matching confidence > {limit}')
else:
LOG.debug(f'Padatious Matching confidence > {limit}')
# call flatten in case someone is sending the old style list of tuples
utterances = flatten_list(utterances)
if not self.has_result:
lang = lang or self.service.lang
LOG.debug(f'Padatious Matching confidence > {limit}')
if _pd:
LOG.info(f"Using legacy Padatious module")
padatious_intent = self._legacy_padatious_match(utterances,
lang)
else:
padatious_intent = self.service.threaded_calc_intent(utterances,
lang)
if padatious_intent:
skill_id = padatious_intent.name.split(':')[0]
self.ret = ovos_core.intent_services.IntentMatch(
'Padatious', padatious_intent.name,
padatious_intent.matches, skill_id)
self.conf = padatious_intent.conf
self.has_result = True
if self.conf and self.conf > limit:
return self.ret

def match_high(self, utterances, lang=None, __=None):
lang = lang or self.service.lang
padatious_intent = self.service.calc_intent(utterances, lang)
if padatious_intent is not None and padatious_intent.conf > limit:
skill_id = padatious_intent.name.split(':')[0]
return ovos_core.intent_services.IntentMatch(
'Padatious', padatious_intent.name,
padatious_intent.matches, skill_id, padatious_intent.sent)

def match_high(self, utterances, lang=None, message=None):
"""Intent matcher for high confidence.

Args:
Expand All @@ -117,7 +104,7 @@ def match_high(self, utterances, lang=None, __=None):
"""
return self._match_level(utterances, self.service.conf_high, lang)

def match_medium(self, utterances, lang=None, __=None):
def match_medium(self, utterances, lang=None, message=None):
"""Intent matcher for medium confidence.

Args:
Expand All @@ -126,7 +113,7 @@ def match_medium(self, utterances, lang=None, __=None):
"""
return self._match_level(utterances, self.service.conf_med, lang)

def match_low(self, utterances, lang=None, __=None):
def match_low(self, utterances, lang=None, message=None):
"""Intent matcher for low confidence.

Args:
Expand All @@ -135,24 +122,6 @@ def match_low(self, utterances, lang=None, __=None):
"""
return self._match_level(utterances, self.service.conf_low, lang)

def _legacy_padatious_match(self, utterances: List[str],
lang: str) -> Optional[PadatiousIntent]:
"""
Handle intent match with the Padatious intent parser.
@param utterances: List of string utterances to evaluate
@param lang: BCP-47 language of utterances
@return: PadatiousIntent if matched, else None
"""
padatious_intent = None
for utt in utterances:
intent = self.service.calc_intent(utt, lang)
if intent:
best = padatious_intent.conf if padatious_intent else 0.0
if best < intent.conf:
padatious_intent = intent
padatious_intent.matches['utterance'] = utt
return padatious_intent


class PadatiousService:
"""Service class for padatious intent matching."""
Expand All @@ -172,20 +141,14 @@ def __init__(self, bus, config):
self.conf_low = self.padatious_config.get("conf_low") or 0.5

if self.is_regex_only:
if not _pd:
LOG.info('Padatious not installed. '
'Falling back to Padacioso')
try:
call(['notify-send', 'Padatious not installed',
'Falling back to Padacioso'])
except OSError:
pass
LOG.debug('Using Padacioso intent parser.')
self.containers = {lang: FallbackIntentContainer(
self.padatious_config.get("fuzz"))
for lang in langs}
for lang in langs}
else:
intent_cache = self.padatious_config.get('intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache"
LOG.debug('Using Padatious intent parser.')
intent_cache = self.padatious_config.get(
'intent_cache') or f"{xdg_data_home()}/{get_xdg_base()}/intent_cache"
self.containers = {
lang: _pd.IntentContainer(path.join(expanduser(intent_cache), lang))
for lang in langs}
Expand Down Expand Up @@ -337,22 +300,7 @@ def register_entity(self, message):
self._register_object(message, 'entity',
self.containers[lang].add_entity)

def calc_intent(self, utt, lang=None):
"""Cached version of container calc_intent.

This improves speed when called multiple times for different confidence
levels.

Args:
utt (str): utterance to calculate best intent for
"""
lang = lang or self.lang
lang = lang.lower()
if lang in self.containers:
return _calc_padatious_intent((utt, self.containers[lang]))

def threaded_calc_intent(self, utterances: List[str],
lang: str = None) -> Optional[PadatiousIntent]:
def calc_intent(self, utterances: List[str], lang: str = None) -> Optional[PadatiousIntent]:
"""
Get the best intent match for the given list of utterances. Utilizes a
thread pool for overall faster execution. Note that this method is NOT
Expand All @@ -361,37 +309,50 @@ def threaded_calc_intent(self, utterances: List[str],
@param lang: language of utterances
@return:
"""
if isinstance(utterances, str):
utterances = [utterances] # backwards compat when arg was a single string
lang = lang or self.lang
lang = lang.lower()
if lang in self.containers:
intents = []
intent_container = self.containers.get(lang)

if self.threaded_inference:
with Pool() as pool:
intents = (intent for intent in
pool.imap_unordered(_calc_padatious_intent,
((utt, intent_container)
for utt in utterances)))
pool.close()
pool.join()
# differences between ThreadPoolExecutor and ProcessPoolExecutor
# ThreadPoolExecutor
# Uses Threads, not processes.
# Lightweight workers, not heavyweight workers.
# Shared Memory, not inter-process communication.
# Subject to the GIL, not parallel execution.
# Suited to IO-bound Tasks, not CPU-bound tasks.
# Create 10s to 1,000s Workers, not really constrained.
#
# ProcessPoolExecutor
# Uses Processes, not threads.
# Heavyweight Workers, not lightweight workers.
# Inter-Process Communication, not shared memory.
# Not Subject to the GIL, not constrained to sequential execution.
# Suited to CPU-bound Tasks, probably not IO-bound tasks.
# Create 10s of Workers, not 100s or 1,000s of tasks.

self.workers = 4 # do the work in parallel instead of sequentially
JarbasAl marked this conversation as resolved.
Show resolved Hide resolved
with concurrent.futures.ProcessPoolExecutor(max_workers=self.workers) as executor:
future_to_source = {
executor.submit(_calc_padatious_intent,
(s, intent_container)): s
for s in utterances
}
intents = [future.result() for future in concurrent.futures.as_completed(future_to_source)]
else:
intents = (self.calc_intent(utt, lang) for utt in utterances)
padatious_intent = None
for intent in intents:
if intent:
best = \
padatious_intent.conf if padatious_intent else 0.0
if intent.conf > best: # this intent is the best so far
padatious_intent = intent
# TODO: This is for backwards-compat. but means that an
# entity "utterance" will be overridden
padatious_intent.matches['utterance'] = \
padatious_intent.sent
if intent.conf == 1.0:
LOG.debug(f"Returning perfect match")
return intent
return padatious_intent
intents = [_calc_padatious_intent(utt, intent_container) for utt in utterances]

intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)


@lru_cache(maxsize=10) # repeat calls under different conf levels wont re-run code
def _calc_padatious_intent(*args) -> \
Optional[PadatiousIntent]:
"""
Expand All @@ -400,7 +361,6 @@ def _calc_padatious_intent(*args) -> \
@return: matched PadatiousIntent
"""
try:
LOG.info(args)
if len(args) == 1:
args = args[0]
utt = args[0]
Expand All @@ -411,9 +371,8 @@ def _calc_padatious_intent(*args) -> \
intent["matches"] = intent.pop("entities")
intent["sent"] = utt
intent = PadatiousIntent(**intent)
else:
LOG.info(f"Overriding `intent.sent` with matched utterance")
intent.sent = utt

intent.sent = utt
return intent
except Exception as e:
LOG.error(e)
Loading