Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Mar 14, 2024
1 parent 608d4ba commit 9b51826
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 81 deletions.
55 changes: 18 additions & 37 deletions tools/speech_data_explorer/run_data_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,70 +4,50 @@

from sde.run_inside_docker import run_sde_inside_docker


def parse_args():
parser = argparse.ArgumentParser(description='Speech Data Explorer')
parser.add_argument(
'manifest',
help='path to JSON manifest file',
)
parser.add_argument(
'--vocab',
help='optional vocabulary to highlight OOV words'
)
parser.add_argument(
'--port',
default='8050',
help='serving port for establishing connection'
)
'manifest', help='path to JSON manifest file',
)
parser.add_argument('--vocab', help='optional vocabulary to highlight OOV words')
parser.add_argument('--port', default='8050', help='serving port for establishing connection')
parser.add_argument(
'--disable-caching-metrics',
action='store_true',
help='disable caching metrics for errors analysis'
)
'--disable-caching-metrics', action='store_true', help='disable caching metrics for errors analysis'
)
parser.add_argument(
'--estimate-audio-metrics',
'-a',
action='store_true',
help='estimate frequency bandwidth and signal level of audio recordings',
)
)
parser.add_argument(
'--audio-base-path',
default=None,
type=str,
help='A base path for the relative paths in manifest. It defaults to manifest path.',
)
parser.add_argument(
'--debug',
'-d',
action='store_true',
help='enable debug mode'
)
)
parser.add_argument('--debug', '-d', action='store_true', help='enable debug mode')
parser.add_argument(
'--names_compared',
'-nc',
nargs=2,
type=str,
help='names of the two fields that will be compared, example: pred_text_contextnet pred_text_conformer. "pred_text_" prefix IS IMPORTANT!',
)
)
parser.add_argument(
'--show_statistics',
'-shst',
type=str,
help='field name for which you want to see statistics (optional). Example: pred_text_contextnet.',
)
parser.add_argument(
'--gpu',
'-gpu',
action='store_true',
help='use GPU-acceleration',
'--gpu', '-gpu', action='store_true', help='use GPU-acceleration',
)
parser.add_argument(
'--inside_docker',
'-dckr',
action='store_true',
help='run SDE inside Docker container',
'--inside_docker', '-dckr', action='store_true', help='run SDE inside Docker container',
)

args = parser.parse_args()

if args.inside_docker:
Expand All @@ -81,15 +61,16 @@ def parse_args():
attr_value = getattr(args, arg.replace("-", "_"))
if attr_value:
sde_args.append(f"--{arg}={attr_value}")

for arg in ["disable-caching-metrics", "estimate-audio-metrics", "debug", "gpu"]:
attr_value = getattr(args, arg.replace("-", "_"))
if attr_value:
sde_args.append(f"--{arg}")

cmd.extend(sde_args)

subprocess.run(cmd)


if __name__ == "__main__":
parse_args()
parse_args()
89 changes: 45 additions & 44 deletions tools/speech_data_explorer/sde/run_inside_docker.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import docker
import json
import logging
import os
from pathlib import Path
import logging
import json

import docker


class DockerSDE:
Expand All @@ -16,7 +17,7 @@ def __init__(self, sde_args, image_tag: str = "rapidsai-nemo-sde", data_dir_path
self.container_name = Path(self.manifest_filename).stem
self.volume = None
self.container = None

self.gpus = self.sde_args.gpu
if self.gpus:
self._set_gpus_param()
Expand All @@ -27,24 +28,22 @@ def __init__(self, sde_args, image_tag: str = "rapidsai-nemo-sde", data_dir_path
self.data_dir_path = data_dir_path
if self.data_dir_path is None:
self._set_data_dir_path()

self.sde_args_line = f"/manifest/manifest.json --port={self.sde_args.port} "
self.set_sde_args_line()


def _set_gpus_param(self):
if self.gpus:
self.gpus == "all"
elif type(self.gpus) is int:
self.gpus = f"device={self.gpus}"
else:
raise f'Invalid value of "gpus" = {self.gpus}'



def _set_data_dir_path(self):
if self.sde_args.audio_base_path is not None:
self.data_dir_path = self.sde_args.audio_base_path

else:
with open(self.sde_args.manifest, 'r') as manifest:
line = manifest.readline()
Expand All @@ -65,31 +64,29 @@ def _set_data_dir_path(self):

logging.info(f"Data dir {self.data_dir_path} will be mounted to \"/data\" dir in container (mode: read-only).")


def build_docker_image(self):
image, logs = self.client.images.build(path=self.dockerfile_dir, dockerfile='Dockerfile', tag=self.image_tag)
logging.info(f"Image {self.image_tag} successfully built.")


def create_docker_volume(self):
self.volume = self.client.volumes.create(self.manifest_volume_name)
logging.info(f"Volume {self.manifest_volume_name} successfully built.")



def copy_manifest_to_volume(self):
manifest_dirpath = os.path.dirname(self.sde_args.manifest)
manifest_filename = os.path.basename(self.sde_args.manifest)

self.client.containers.run( image = "busybox",
remove = True,
volumes = { manifest_dirpath : {"bind" : "/host", "mode" : "ro"},
self.manifest_volume_name : {"bind" : "/manifest", "mode" : "rw"}
},
command = ["cp", f"/host/{manifest_filename}", "/manifest/manifest.json"]
)

self.client.containers.run(
image="busybox",
remove=True,
volumes={
manifest_dirpath: {"bind": "/host", "mode": "ro"},
self.manifest_volume_name: {"bind": "/manifest", "mode": "rw"},
},
command=["cp", f"/host/{manifest_filename}", "/manifest/manifest.json"],
)

logging.info(f"Manifest {self.sde_args.manifest} successfully copied to docker volume.")


def set_sde_args_line(self):
for sde_arg in ["vocab", "names_compared", "show_statistics"]:
Expand All @@ -102,35 +99,39 @@ def set_sde_args_line(self):
if attr_value:
sde_arg = sde_arg.replace("_", "-")
self.sde_args_line += f"--{sde_arg} "


def run_docker_container(self):
environment_vars = {}
environment_vars['SDE_ARGS'] = self.sde_args_line
environment_vars['INIT_DATA_DIR'] = self.data_dir_path

self.container = self.client.containers.run( image = self.image_tag,
name = self.container_name,
remove = True,
volumes = {
self.data_dir_path : {"bind": "/data", "mode": "ro"},
self.manifest_volume_name : {"bind": "/manifest", "mode": "rw"},
},
environment = environment_vars,
tty=True,
shm_size="8g",
ports={self.sde_args.port : self.sde_args.port},
ulimits=[docker.types.Ulimit(name='memlock', soft=-1, hard=-1),
docker.types.Ulimit(name='stack', soft=67108864, hard=67108864)],
device_requests=[docker.types.DeviceRequest(device_ids=["0,1"], capabilities=[['gpu']])],
detach = True
)

self.container = self.client.containers.run(
image=self.image_tag,
name=self.container_name,
remove=True,
volumes={
self.data_dir_path: {"bind": "/data", "mode": "ro"},
self.manifest_volume_name: {"bind": "/manifest", "mode": "rw"},
},
environment=environment_vars,
tty=True,
shm_size="8g",
ports={self.sde_args.port: self.sde_args.port},
ulimits=[
docker.types.Ulimit(name='memlock', soft=-1, hard=-1),
docker.types.Ulimit(name='stack', soft=67108864, hard=67108864),
],
device_requests=[docker.types.DeviceRequest(device_ids=["0,1"], capabilities=[['gpu']])],
detach=True,
)

self.container.exec_run("python /workspace/speech_data_explorer/sde/paths.py")
_, d = self.container.exec_run(f"python /workspace/speech_data_explorer/data_explorer.py {self.sde_args_line}", stream=True, detach=True )
_, d = self.container.exec_run(
f"python /workspace/speech_data_explorer/data_explorer.py {self.sde_args_line}", stream=True, detach=True
)

logging.info(f"Docker container {self.container_name} successfully started.")

def run_docker_sde(self):
self.build_docker_image()
self.create_docker_volume()
Expand All @@ -144,8 +145,8 @@ def run_docker_sde(self):
responce = input("To stop it enter 'no': ")
else:
self.container.stop()


def run_sde_inside_docker(sde_args):
docker_sde_obj = DockerSDE(sde_args=sde_args)
docker_sde_obj.run_docker_sde()
docker_sde_obj.run_docker_sde()

0 comments on commit 9b51826

Please sign in to comment.