From f5372f73321d560de9ee6fb45a6bac2447836b06 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Thu, 24 Aug 2023 16:07:27 -0500 Subject: [PATCH] Refactoring + UDP --- .gitignore | 11 +++ assist_microphone/config.yaml | 8 +- assist_microphone/hass_satellite/__main__.py | 87 ++++++++++--------- assist_microphone/hass_satellite/mic.py | 27 +++++- assist_microphone/hass_satellite/snd.py | 48 +++++++++- assist_microphone/hass_satellite/state.py | 16 ++++ .../s6-overlay/s6-rc.d/assist_microphone/run | 8 ++ 7 files changed, 159 insertions(+), 46 deletions(-) create mode 100644 .gitignore create mode 100644 assist_microphone/hass_satellite/state.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..794cd0f --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.DS_Store +.idea +*.log +tmp/ + +*.py[cod] +*.egg +build +htmlcov + +.venv/ diff --git a/assist_microphone/config.yaml b/assist_microphone/config.yaml index f80f5ff..5417bdc 100644 --- a/assist_microphone/config.yaml +++ b/assist_microphone/config.yaml @@ -1,5 +1,5 @@ --- -version: 0.2.9 +version: 0.2.10 slug: assist_microphone name: assist_microphone description: Stream microphone audio to Assist @@ -16,6 +16,8 @@ options: awake_sound: true done_sound: true wake_buffer_seconds: 0.0 + udp_mic: false + udp_snd: false debug_logging: false schema: token: str @@ -26,8 +28,12 @@ schema: awake_sound: bool done_sound: bool wake_buffer_seconds: float + udp_mic: bool + udp_snd: bool debug_logging: bool init: false audio: true homeassistant_api: true +ports: + "5000/udp": null homeassistant: 2023.9.0.dev20230809 diff --git a/assist_microphone/hass_satellite/__main__.py b/assist_microphone/hass_satellite/__main__.py index 8f3f0d6..22232e1 100644 --- a/assist_microphone/hass_satellite/__main__.py +++ b/assist_microphone/hass_satellite/__main__.py @@ -1,20 +1,22 @@ #!/usr/bin/env python3 import argparse import asyncio +import contextlib +import functools import logging import shutil +import socket import sys import threading from collections import deque -from dataclasses import dataclass -from enum import Enum, auto from typing import Deque, Optional, Tuple import sounddevice as sd -from .mic import record +from .mic import record_stream, record_udp from .remote import stream -from .snd import play +from .snd import play_stream, play_udp +from .state import State, MicState from .vad import ( SileroVoiceActivityDetector, VoiceActivityDetector, @@ -24,18 +26,6 @@ _LOGGER = logging.getLogger(__name__) -class MicState(str, Enum): - NOT_RECORDING = auto() - WAIT_FOR_VAD = auto() - RECORDING = auto() - - -@dataclass -class State: - is_running: bool = True - mic: MicState = MicState.NOT_RECORDING - - async def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("host", help="Home Assistant server host") @@ -76,6 +66,9 @@ async def main() -> None: # parser.add_argument("--wake-buffer-seconds", type=float, default=0) # + parser.add_argument("--udp-mic", type=int, help="UDP port to receive input audio") + parser.add_argument("--udp-snd", type=int, help="UDP port to send output audio") + # parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to the console" ) @@ -130,21 +123,45 @@ async def main() -> None: ) mic_thread.start() + # Audio output + snd_socket: Optional[socket.socket] = None + try: while True: try: + if args.udp_snd is not None: + if snd_socket is None: + snd_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + snd_stream = contextlib.nullcontext() + play = functools.partial( + play_udp, + udp_socket=snd_socket, + udp_port=args.udp_snd, + state=state, + sample_rate=16000, + volume=args.volume, + ) + else: + snd_stream = sd.RawOutputStream( + device=args.snd_device, + samplerate=snd_sample_rate, + channels=1, + dtype="int16", + ) + play = functools.partial( + play_stream, + stream=snd_stream, + sample_rate=snd_sample_rate, + volume=args.volume, + ) + if args.vad: _LOGGER.debug("Waiting for speech") await speech_detected.wait() speech_detected.clear() _LOGGER.debug("Speech detected") - with sd.RawOutputStream( - device=args.snd_device, - samplerate=snd_sample_rate, - channels=1, - dtype="int16", - ) as snd_stream: + with snd_stream: async for _timestamp, event_type, event_data in stream( host=args.host, token=args.token, @@ -157,32 +174,19 @@ async def main() -> None: if event_type == "wake_word-end": if args.awake_sound: state.mic = MicState.NOT_RECORDING - play( - media=args.awake_sound, - stream=snd_stream, - sample_rate=snd_sample_rate, - volume=args.volume, - ) + play(media=args.awake_sound) state.mic = MicState.RECORDING elif event_type == "stt-end": # Stop recording until run ends state.mic = MicState.NOT_RECORDING if args.done_sound: - play( - media=args.done_sound, - stream=snd_stream, - sample_rate=snd_sample_rate, - volume=args.volume, - ) + play(media=args.done_sound) elif event_type == "tts-end": # Play TTS output tts_url = event_data.get("tts_output", {}).get("url") if tts_url: play( - media=f"{args.protocol}://{args.host}:{args.port}{tts_url}", - stream=snd_stream, - sample_rate=snd_sample_rate, - volume=args.volume, + media=f"{args.protocol}://{args.host}:{args.port}{tts_url}" ) elif event_type in ("run-end", "error"): # Start recording for next wake word @@ -220,7 +224,12 @@ def _mic_proc( else: _LOGGER.debug("No VAD") - for ts_chunk in record(args.mic_device): + if args.udp_mic is not None: + mic_stream = record_udp(args.udp_mic, state) + else: + mic_stream = record_stream(args.mic_device) + + for ts_chunk in mic_stream: if not state.is_running: break diff --git a/assist_microphone/hass_satellite/mic.py b/assist_microphone/hass_satellite/mic.py index 5d21b61..cd58aac 100644 --- a/assist_microphone/hass_satellite/mic.py +++ b/assist_microphone/hass_satellite/mic.py @@ -1,17 +1,18 @@ -import argparse -import asyncio -import sys +import socket import time from typing import Final, Iterable, Optional, Tuple, Union import sounddevice as sd +from .state import State + _RATE: Final = 16000 +_WIDTH: Final = 2 _CHANNELS: Final = 1 _SAMPLES_PER_CHUNK = int(0.03 * _RATE) # 30ms -def record( +def record_stream( device: Optional[Union[str, int]], samples_per_chunk: int = _SAMPLES_PER_CHUNK, ) -> Iterable[Tuple[int, bytes]]: @@ -27,3 +28,21 @@ def record( chunk, _overflowed = stream.read(samples_per_chunk) chunk = bytes(chunk) yield time.monotonic_ns(), chunk + + +def record_udp( + port: int, + state: State, + host: str = "0.0.0.0", + samples_per_chunk: int = _SAMPLES_PER_CHUNK, +) -> Iterable[Tuple[int, bytes]]: + udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + udp_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + udp_socket.bind((host, port)) + + while True: + chunk, addr = udp_socket.recvfrom(samples_per_chunk * _WIDTH) + if state.mic_host is None: + state.mic_host = addr[0] + + yield time.monotonic_ns(), chunk diff --git a/assist_microphone/hass_satellite/snd.py b/assist_microphone/hass_satellite/snd.py index be4d2b4..b9ffdfc 100644 --- a/assist_microphone/hass_satellite/snd.py +++ b/assist_microphone/hass_satellite/snd.py @@ -1,12 +1,16 @@ import logging +import socket import subprocess import wave import sounddevice as sd -_LOGGER = logging.getLogger() +from .state import State -def play( +_LOGGER = logging.getLogger() + + +def play_stream( media: str, stream: sd.RawOutputStream, sample_rate: int, @@ -40,3 +44,43 @@ def play( while chunk: stream.write(chunk) chunk = wav_file.readframes(samples_per_chunk) + + +def play_udp( + media: str, + udp_socket: socket.socket, + udp_port: int, + state: State, + sample_rate: int, + samples_per_chunk: int = 1024, + volume: float = 1.0, +) -> None: + assert state.mic_host is not None + + cmd = [ + "ffmpeg", + "-i", + media, + "-f", + "wav", + "-ar", + str(sample_rate), + "-ac", + "1", + "-filter:a", + f"volume={volume}", + "-", + ] + _LOGGER.debug("play: %s", cmd) + + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) as proc: + with wave.open(proc.stdout, "rb") as wav_file: + assert wav_file.getsampwidth() == 2 + chunk = wav_file.readframes(samples_per_chunk) + while chunk: + udp_socket.sendto(chunk, (state.mic_host, udp_port)) + chunk = wav_file.readframes(samples_per_chunk) diff --git a/assist_microphone/hass_satellite/state.py b/assist_microphone/hass_satellite/state.py new file mode 100644 index 0000000..b0142be --- /dev/null +++ b/assist_microphone/hass_satellite/state.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass +from enum import Enum, auto +from typing import Optional + + +class MicState(str, Enum): + NOT_RECORDING = auto() + WAIT_FOR_VAD = auto() + RECORDING = auto() + + +@dataclass +class State: + is_running: bool = True + mic: MicState = MicState.NOT_RECORDING + mic_host: Optional[str] = None diff --git a/assist_microphone/rootfs/etc/s6-overlay/s6-rc.d/assist_microphone/run b/assist_microphone/rootfs/etc/s6-overlay/s6-rc.d/assist_microphone/run index 5da9e0d..bc85422 100755 --- a/assist_microphone/rootfs/etc/s6-overlay/s6-rc.d/assist_microphone/run +++ b/assist_microphone/rootfs/etc/s6-overlay/s6-rc.d/assist_microphone/run @@ -18,6 +18,14 @@ if bashio::config.true 'done_sound'; then extra_args+=('--done-sound' '/usr/src/sounds/done.wav') fi +if bashio::config.true 'udp_mic'; then + extra_args+=('--udp-mic' 5000) +fi + +if bashio::config.true 'udp_snd'; then + extra_args+=('--udp-snd' 6055) +fi + exec python3 -m hass_satellite \ 'homeassistant' \ "$(bashio::config 'token')" \