Skip to content

Commit

Permalink
Fix error when saving utterances (#140)
Browse files Browse the repository at this point in the history
* Fix error when saving utterances

* Initial filename templating config

* wip [skip ci]

* wip [skip ci]

* wip [skip ci]

* Handle timezones

* wip

* wip

* Moved _TemplateFilenameFormatter to a util

* Rename special template keys and config options
  • Loading branch information
Erotemic authored Sep 16, 2024
1 parent 6111971 commit c959f2e
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 1 deletion.
109 changes: 109 additions & 0 deletions ovos_dinkum_listener/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import uuid
import string
from ovos_utils.time import now_local, now_utc


class _TemplateFilenameFormatter:
"""
Helper to dynamically filename parts based on a user-specified template.
Each instance of this builder can be customized to support different keys,
but some common ones are builtin like "uuid4", "now", and "utcnow"
Example:
>>> # Simple now and uuid4 keys are available by default.
>>> template = 'my_filename_{now}_{uuid4}'
>>> self = _TemplateFilenameFormatter()
>>> name = self.format(template)
>>> # xdoctest: +IGNORE_WANT
>>> print(f'name={name}')
name=my_filename_2024-09-14 18:53:22.619838-05:00_7fe91270-3266-42c1-89d9-0809b9facb9e
Example:
>>> # The now can use standard python format-string semantics
>>> template = 'my_filename_{now:%Y-%m-%dT%H%M%S%z}_{uuid4}'
>>> self = _TemplateFilenameFormatter()
>>> name = self.format(template)
>>> # xdoctest: +IGNORE_WANT
>>> print(f'name={name}')
name=my_filename_2024-09-14T185354-0500_6f0f6daf-cd81-4c5b-bf38-76a4466161c6
Example:
>>> # You can define how to handle custom keys
>>> template = '{mykey}.bar.{now:%Y-%z}-{uuid4}'
>>> self = _TemplateFilenameFormatter()
>>> @self.register('mykey')
>>> def custom_func():
... return 'myval'
>>> name = self.format(template)
>>> # xdoctest: +IGNORE_WANT
>>> print(f'name={name}')
name=myval.bar.2024--765176fa-7c80-431c-b43d-2ad14a58a249
Example:
>>> # should raise an error if template contains an unknown field
>>> template = '{doesnotexist}.bar.{now:%Y-%z}-{uuid4}'
>>> self = _TemplateFilenameFormatter()
>>> import pytest
>>> with pytest.raises(KeyError) as ex:
... name = self.format(template)
>>> # xdoctest: +IGNORE_WANT
>>> print(str(ex.value))
"Template string contained unsupported keys ['doesnotexist']. Supported keys are: ['uuid4', 'now', 'utcnow']"
"""
def __init__(self):
# import datetime as datetime_mod
# mapping of key to functions that build content for those keys
self.builders = {
'uuid4': uuid.uuid4,
'now': now_local,
'utcnow': now_utc,
}

def register(self, key):
"""
Decorator which will register a function called when the template
string contains ``key``.
"""
def _decor(func):
self.builders[key] = func
return func
return _decor

def _build_fmtkw(self, template, **kwargs):
"""
Builds the dictionary that can be passed to :func:`str.format`.
"""
builders = self.builders | kwargs

# Build the information requested for the file string.
formatter = string.Formatter()
fmtiter = formatter.parse(template)
fmtkw = {}
missing = []
for fmttup in fmtiter:
key = fmttup[1]

if key in builders:
builder = builders[key]
if callable(builder):
fmtkw[key] = builder()
else:
fmtkw[key] = builder
else:
missing.append(key)
if missing:
raise KeyError(
f'Template string contained unsupported keys {missing}. '
f'Supported keys are: {list(builders.keys())}'
)
return fmtkw

def format(self, template, **kwargs):
"""
Substitutes known keys with dynamically constructed values
"""
fmtkw = self._build_fmtkw(template, **kwargs)
text = template.format(**fmtkw)
return text
26 changes: 25 additions & 1 deletion ovos_dinkum_listener/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from ovos_dinkum_listener.transformers import AudioTransformersService
from ovos_dinkum_listener.voice_loop import DinkumVoiceLoop, ListeningMode, ListeningState
from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer
from ovos_dinkum_listener._util import _TemplateFilenameFormatter
try:
from ovos_backend_client.api import DatasetApi
except ImportError:
Expand Down Expand Up @@ -681,7 +682,30 @@ def _save_stt(self, audio_bytes, stt_meta, save_path=None):
stt_audio_dir = Path(f"{self.default_save_path}/utterances")
stt_audio_dir.mkdir(parents=True, exist_ok=True)

filename = hash_sentence(stt_meta["transcription"])
listener = self.config.get("listener", {})

# Documented in ovos_config/mycroft.conf
default_template = "{md5}-{uuid4}"
utterance_filename = listener.get("utterance_filename", default_template)
formatter = _TemplateFilenameFormatter()

@formatter.register('md5')
def transcription_md5():
# Build a hash of the transcription
try:
# handles legacy API
text = stt_meta.get('transcription')
except KeyError:
# handles new API
# transcriptions should be : List[Tuple[str, int]]
try:
text = stt_meta.get('transcriptions')[0][0]
except IndexError:
return 'null'
return hash_sentence(text)

filename = formatter.format(utterance_filename)

mic = self.voice_loop.mic
wav_path = stt_audio_dir / f"{filename}.wav"
meta_path = stt_audio_dir / f"{filename}.json"
Expand Down

0 comments on commit c959f2e

Please sign in to comment.