Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/playback_order #74

Merged
merged 1 commit into from
Jun 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 55 additions & 52 deletions ovos_audio/service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import base64
import binascii
import json
import os
import os.path
import time
Expand Down Expand Up @@ -62,6 +61,7 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error,
on_stopping=stopping_hook,
on_alive=alive_hook,
on_started=started_hook)
self.playback_lock = Lock()
self.status = ProcessStatus('audio', callback_map=callbacks)
self.status.set_started()

Expand Down Expand Up @@ -293,40 +293,42 @@ def handle_speak(self, message):

Parse sentences and invoke text to speech service.
"""

# if the message is targeted and audio is not the target don't
# don't synthesise speech
message.context = message.context or {}

# Get conversation ID
if 'ident' in message.context:
LOG.warning("'ident' context metadata is deprecated, use session_id instead")

sess = SessionManager.get(message)

stopwatch = Stopwatch()
stopwatch.start()

utterance = message.data['utterance']

# allow dialog transformers to rewrite speech
skill_id = message.data.get("meta", {}).get("skill") or message.context.get("skill_id")
if skill_id and skill_id not in self.dialog_transform.blacklisted_skills:
utt2, message.context = self.dialog_transform.transform(dialog=utterance,
context=message.context,
sess=sess)
if utterance != utt2:
LOG.debug(f"original dialog: {utterance}")
LOG.info(f"dialog transformed to: {utt2}")
utterance = utt2

listen = message.data.get('expect_response', False)
self.execute_tts(utterance, sess.session_id, listen, message)

stopwatch.stop()
report_timing(sess.session_id, stopwatch,
{'utterance': utterance,
'tts': self.tts.plugin_id})
# NOTE: lock is needed to avoid race conditions,
# dont allow queuing until TTS synth finishes
with self.playback_lock:
# if the message is targeted and audio is not the target don't
# don't synthesise speech
message.context = message.context or {}

# Get conversation ID
if 'ident' in message.context:
LOG.warning("'ident' context metadata is deprecated, use session_id instead")

sess = SessionManager.get(message)

stopwatch = Stopwatch()
stopwatch.start()

utterance = message.data['utterance']

# allow dialog transformers to rewrite speech
skill_id = message.data.get("meta", {}).get("skill") or message.context.get("skill_id")
if skill_id and skill_id not in self.dialog_transform.blacklisted_skills:
utt2, message.context = self.dialog_transform.transform(dialog=utterance,
context=message.context,
sess=sess)
if utterance != utt2:
LOG.debug(f"original dialog: {utterance}")
LOG.info(f"dialog transformed to: {utt2}")
utterance = utt2

listen = message.data.get('expect_response', False)
self.execute_tts(utterance, sess.session_id, listen, message)

stopwatch.stop()
report_timing(sess.session_id, stopwatch,
{'utterance': utterance,
'tts': self.tts.plugin_id})

def _maybe_reload_tts(self):
"""
Expand Down Expand Up @@ -481,23 +483,24 @@ def _path_from_hexdata(hex_audio, audio_ext=None):
def handle_queue_audio(self, message):
""" Queue a sound file to play in speech thread
ensures it doesnt play over TTS """
viseme = message.data.get("viseme")
audio_file = message.data.get("uri") or \
message.data.get("filename") # backwards compat
hex_audio = message.data.get("binary_data")
audio_ext = message.data.get("audio_ext")
if hex_audio:
audio_file = self._path_from_hexdata(hex_audio, audio_ext)

if not audio_file:
raise ValueError(f"message.data needs to provide 'uri' or 'binary_data': {message.data}")
audio_file = self._resolve_sound_uri(audio_file)

listen = message.data.get("listen", False)

# expected queue contents: (data, visemes, listen, tts_id, message)
# a sound does not have a tts_id, assign that to "sounds"
TTS.queue.put((str(audio_file), viseme, listen, "sounds", message))
with self.playback_lock:
viseme = message.data.get("viseme")
audio_file = message.data.get("uri") or \
message.data.get("filename") # backwards compat
hex_audio = message.data.get("binary_data")
audio_ext = message.data.get("audio_ext")
if hex_audio:
audio_file = self._path_from_hexdata(hex_audio, audio_ext)

if not audio_file:
raise ValueError(f"message.data needs to provide 'uri' or 'binary_data': {message.data}")
audio_file = self._resolve_sound_uri(audio_file)

listen = message.data.get("listen", False)

# expected queue contents: (data, visemes, listen, tts_id, message)
# a sound does not have a tts_id, assign that to "sounds"
TTS.queue.put((str(audio_file), viseme, listen, "sounds", message))

@require_native_source()
def handle_instant_play(self, message):
Expand Down
Loading