Skip to content

Commit

Permalink
feat/common_qa_class (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl authored Apr 14, 2023
1 parent 480603c commit 210c568
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 0 deletions.
24 changes: 24 additions & 0 deletions ovos_workshop/res/text/cs-cz/noise_words.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
kde
co je
který
jim
oni
kdy
co
to
bude
od
z
že
také
kdo
jak
a
ale
také
proč
pro
je
to
nebo
do
52 changes: 52 additions & 0 deletions ovos_workshop/res/text/de-de/noise_words.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
wo
wohin
sie
ihnen
sie
man
wann
als
wo
was
welcher
welche
welches
der
die
das
dass
daß
werden
werde
wirst
wird
werdet
wollen
willst
von
auch
wer
wie
tat
taten
und
aber
auch
warum
für
ist
es
tun
tut
oder
zu
auf
bis
von
aus
um
ein
einer
eines
mal
bitte
30 changes: 30 additions & 0 deletions ovos_workshop/res/text/en-us/noise_words.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
where
what's
which
them
they
when
what
that
will
from
that
also
who
how
did
and
but
the
too
why
for
is
it
do
or
to
of
a


215 changes: 215 additions & 0 deletions ovos_workshop/skills/common_query_skill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import abstractmethod
from enum import IntEnum
from os.path import dirname

from ovos_utils.file_utils import resolve_resource_file
from ovos_utils.log import LOG

from ovos_workshop.skills.ovos import OVOSSkill


class CQSMatchLevel(IntEnum):
EXACT = 1 # Skill could find a specific answer for the question
CATEGORY = 2 # Skill could find an answer from a category in the query
GENERAL = 3 # The query could be processed as a general quer


# Copy of CQSMatchLevel to use if the skill returns visual media
CQSVisualMatchLevel = IntEnum('CQSVisualMatchLevel',
[e.name for e in CQSMatchLevel])

"""these are for the confidence calculation"""
# how much each topic word is worth
# when found in the answer
TOPIC_MATCH_RELEVANCE = 5

# elevate relevance above all else
RELEVANCE_MULTIPLIER = 2

# we like longer articles but only so much
MAX_ANSWER_LEN_FOR_CONFIDENCE = 50

# higher number - less bias for word length
WORD_COUNT_DIVISOR = 100


class CommonQuerySkill(OVOSSkill):
"""Question answering skills should be based on this class.
The skill author needs to implement `CQS_match_query_phrase` returning an
answer and can optionally implement `CQS_action` to perform additional
actions if the skill's answer is selected.
This class works in conjunction with skill-query which collects
answers from several skills presenting the best one available.
"""

def __init__(self, name=None, bus=None):
super().__init__(name, bus)
noise_words_filepath = f"text/{self.lang}/noise_words.list"
default_res = f"{dirname(dirname(__file__))}/res/text/{self.lang}/noise_words.list"
noise_words_filename = resolve_resource_file(noise_words_filepath) or \
resolve_resource_file(default_res)
self.translated_noise_words = []
if noise_words_filename:
with open(noise_words_filename) as f:
self.translated_noise_words = f.read().strip()
self.translated_noise_words = self.translated_noise_words.split()

# these should probably be configurable
self.level_confidence = {
CQSMatchLevel.EXACT: 0.9,
CQSMatchLevel.CATEGORY: 0.6,
CQSMatchLevel.GENERAL: 0.5
}

def bind(self, bus):
"""Overrides the default bind method of MycroftSkill.
This registers messagebus handlers for the skill during startup
but is nothing the skill author needs to consider.
"""
if bus:
super().bind(bus)
self.add_event('question:query', self.__handle_question_query, speak_errors=False)
self.add_event('question:action', self.__handle_query_action, speak_errors=False)

def __handle_question_query(self, message):
search_phrase = message.data["phrase"]
message.context["skill_id"] = self.skill_id
# First, notify the requestor that we are attempting to handle
# (this extends a timeout while this skill looks for a match)
self.bus.emit(message.response({"phrase": search_phrase,
"skill_id": self.skill_id,
"searching": True}))

# Now invoke the CQS handler to let the skill perform its search
try:
result = self.CQS_match_query_phrase(search_phrase)
except:
LOG.exception(f"error matching {search_phrase} with {self.skill_id}")
result = None

if result:
match = result[0]
level = result[1]
answer = result[2]
callback = result[3] if len(result) > 3 else None
confidence = self.__calc_confidence(
match, search_phrase, level, answer)
self.bus.emit(message.response({"phrase": search_phrase,
"skill_id": self.skill_id,
"answer": answer,
"callback_data": callback,
"conf": confidence}))
else:
# Signal we are done (can't handle it)
self.bus.emit(message.response({"phrase": search_phrase,
"skill_id": self.skill_id,
"searching": False}))

def remove_noise(self, phrase):
"""remove noise to produce essence of question"""
phrase = ' ' + phrase + ' '
for word in self.translated_noise_words:
mtch = ' ' + word + ' '
if phrase.find(mtch) > -1:
phrase = phrase.replace(mtch, " ")
phrase = ' '.join(phrase.split())
return phrase.strip()

def __calc_confidence(self, match, phrase, level, answer):
# Assume the more of the words that get consumed, the better the match
consumed_pct = len(match.split()) / len(phrase.split())
if consumed_pct > 1.0:
consumed_pct = 1.0
consumed_pct /= 10

# bonus for more sentences
num_sentences = float(float(len(answer.split("."))) / float(10))

# extract topic
topic = self.remove_noise(match)

# calculate relevance
answer = answer.lower()
matches = 0
for word in topic.split(' '):
if answer.find(word) > -1:
matches += TOPIC_MATCH_RELEVANCE

answer_size = len(answer.split(" "))
answer_size = min(MAX_ANSWER_LEN_FOR_CONFIDENCE, answer_size)

relevance = 0.0
if answer_size > 0:
relevance = float(float(matches) / float(answer_size))

relevance = relevance * RELEVANCE_MULTIPLIER

# extra credit for more words up to a point
wc_mod = float(float(answer_size) / float(WORD_COUNT_DIVISOR)) * 2

confidence = self.level_confidence[level] + \
consumed_pct + num_sentences + relevance + wc_mod

return confidence

def __handle_query_action(self, message):
"""Message handler for question:action.
Extracts phrase and data from message forward this to the skills
CQS_action method.
"""
if message.data["skill_id"] != self.skill_id:
# Not for this skill!
return
phrase = message.data["phrase"]
data = message.data.get("callback_data")
# Invoke derived class to provide playback data
self.CQS_action(phrase, data)

@abstractmethod
def CQS_match_query_phrase(self, phrase):
"""Analyze phrase to see if it is a play-able phrase with this skill.
Needs to be implemented by the skill.
Args:
phrase (str): User phrase, "What is an aardwark"
Returns:
(match, CQSMatchLevel[, callback_data]) or None: Tuple containing
a string with the appropriate matching phrase, the PlayMatch
type, and optionally data to return in the callback if the
match is selected.
"""
# Derived classes must implement this, e.g.
return None

def CQS_action(self, phrase, data):
"""Take additional action IF the skill is selected.
The speech is handled by the common query but if the chosen skill
wants to display media, set a context or prepare for sending
information info over e-mail this can be implemented here.
Args:
phrase (str): User phrase uttered after "Play", e.g. "some music"
data (dict): Callback data specified in match_query_phrase()
"""
# Derived classes may implement this if they use additional media
# or wish to set context after being called.
return None

0 comments on commit 210c568

Please sign in to comment.