From 66f5cdc0558dd8579147f811bc5e7d97088cafac Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 3 Sep 2019 05:35:28 -0500 Subject: [PATCH 1/2] add device index to client --- README.md | 10 ++++++++-- client.py | 13 ++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b7694e5..dc9b411 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ Recognized: delta echo foxtrot ``` λ py client.py -h usage: client.py [-h] [-s SERVER] [-a AGGRESSIVENESS] [--nospinner] - [-w SAVEWAV] + [-w SAVEWAV] [-d DEVICE] [-v] Streams raw audio data from microphone with VAD to server via WebSocket @@ -124,7 +124,13 @@ optional arguments: speech, 3 the most aggressive. Default: 3 --nospinner Disable spinner -w SAVEWAV, --savewav SAVEWAV - Save .wav files of utterences to given directory + Save .wav files of utterences to given directory. + Example for current directory: -w . + -d DEVICE, --device DEVICE + Set audio device for input, according to system. The + default utilizes system-specified recording device. + -v, --verbose Print debugging info + ``` ## Contributions diff --git a/client.py b/client.py index 8c96bd5..0c0ce2a 100644 --- a/client.py +++ b/client.py @@ -25,7 +25,7 @@ class Audio(object): CHANNELS = 1 BLOCKS_PER_SECOND = 50 - def __init__(self, callback=None, buffer_s=0, flush_queue=True): + def __init__(self, callback=None, buffer_s=0, flush_queue=True, device_index=None): def proxy_callback(in_data, frame_count, time_info, status): callback(in_data) return (None, pyaudio.paContinue) @@ -38,6 +38,7 @@ def proxy_callback(in_data, frame_count, time_info, status): channels=self.CHANNELS, rate=self.sample_rate, input=True, + input_device_index=device_index, frames_per_buffer=self.block_size, stream_callback=proxy_callback) self.stream.start_stream() @@ -89,8 +90,8 @@ def write_wav(self, filename, data): class VADAudio(Audio): """Filter & segment audio with voice activity detection.""" - def __init__(self, aggressiveness=3): - super().__init__() + def __init__(self, aggressiveness=3, device_index=None): + super().__init__(device_index=device_index) self.vad = webrtcvad.Vad(aggressiveness) def vad_collector_simple(self, pre_padding_ms, blocks=None): @@ -225,7 +226,7 @@ def main(): # TODO: compress? print_output("Connecting to '%s'..." % websocket.url) - vad_audio = VADAudio(aggressiveness=ARGS.aggressiveness) + vad_audio = VADAudio(aggressiveness=ARGS.aggressiveness, device_index=ARGS.device) print_output("Listening (ctrl-C to exit)...") audio_consumer_thread = threading.Thread(target=lambda: audio_consumer(vad_audio, websocket)) audio_consumer_thread.start() @@ -246,7 +247,7 @@ def consumer(self, blocks): else: print('.', end='', flush=True) length_ms = 0 - VADAudio(consumer) + VADAudio(consumer, device_index=ARGS.device) elif 1: VADAudio.test_vad(3) @@ -261,6 +262,8 @@ def consumer(self, blocks): help="Disable spinner") parser.add_argument('-w', '--savewav', help="Save .wav files of utterences to given directory. Example for current directory: -w .") + parser.add_argument('-d', '--device', type=int, default=None, + help="Set audio device for input, according to system. The default utilizes system-specified recording device.") parser.add_argument('-v', '--verbose', action='store_true', help="Print debugging info") ARGS = parser.parse_args() From dd46142f093188c6cc853c36b0aa67aa29a5640e Mon Sep 17 00:00:00 2001 From: Eric Date: Sat, 21 Sep 2019 06:29:40 -0500 Subject: [PATCH 2/2] add device list function --- client.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/client.py b/client.py index 0c0ce2a..b77b5f7 100644 --- a/client.py +++ b/client.py @@ -3,6 +3,7 @@ import threading, collections, queue, os, os.path import wave import pyaudio +import pprint import webrtcvad from lomond import WebSocket, events from halo import Halo @@ -62,6 +63,21 @@ def read_loop(self, callback): for block in iter(self): callback(block) + @staticmethod + def device_list(): + """Iterate and return the audio devices in the system.""" + local_pa = pyaudio.PyAudio() + device_info = { "Input":[], "Output":[] } + device_count = local_pa.get_device_count() + for idx_dev in range(device_count): + local_info = local_pa.get_device_info_by_index(idx_dev) + for local_type in ["Output", "Input"]: + local_channels = f"max{local_type}Channels" + if local_channels in local_info and local_info[local_channels] > 0: + device_info[local_type].append({"device":idx_dev, "name":local_info["name"], + "channels":local_info[local_channels]}) + return device_info + def __iter__(self): """Generator that yields all audio blocks from microphone.""" while True: @@ -210,7 +226,11 @@ def on_event(event): print_output("Connected!") ready = True elif isinstance(event, events.Text): - if 1: print_output("Recognized: %s" % event.text) + # TODO: modify for inclusion of timing information? + # TODO: what do we do with a rich / metadata return instead? + + if len(event.text): + print_output("Recognized: %s" % event.text) elif 1: logging.debug(event) @@ -222,15 +242,21 @@ def on_event(event): websocket.close() def main(): - websocket = WebSocket(ARGS.server) - # TODO: compress? - print_output("Connecting to '%s'..." % websocket.url) + if ARGS.listdevice: + dict_devices = Audio.device_list() + print_output("Available devices...") + print_output(pprint.pprint(dict_devices)) + return 0 vad_audio = VADAudio(aggressiveness=ARGS.aggressiveness, device_index=ARGS.device) - print_output("Listening (ctrl-C to exit)...") + + websocket = WebSocket(ARGS.server) + # TODO: compress? audio_consumer_thread = threading.Thread(target=lambda: audio_consumer(vad_audio, websocket)) + print_output("Listening (ctrl-C to exit)...") audio_consumer_thread.start() + print_output("Connecting to '%s'..." % websocket.url) websocket_runner(websocket) @@ -266,6 +292,8 @@ def consumer(self, blocks): help="Set audio device for input, according to system. The default utilizes system-specified recording device.") parser.add_argument('-v', '--verbose', action='store_true', help="Print debugging info") + parser.add_argument('-l', '--listdevice', action='store_true', + help="List available devices for live capture") ARGS = parser.parse_args() if ARGS.verbose: logging.getLogger().setLevel(10)