From f5c89e7e9b7864be6b01b082bd25a18a58d25293 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Mon, 30 Dec 2024 14:01:08 -0500 Subject: [PATCH] deleted more functions from workspace.py --- benchmark/tpch/cli.py | 8 +--- dbms/postgres/cli.py | 6 +-- env/tests/gymlib_integtest_util.py | 4 +- util/workspace.py | 66 +----------------------------- 4 files changed, 8 insertions(+), 76 deletions(-) diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py index 014401da..9276b7ed 100644 --- a/benchmark/tpch/cli.py +++ b/benchmark/tpch/cli.py @@ -2,6 +2,7 @@ import click from gymlib.symlinks_paths import ( + get_scale_factor_string, get_tables_dirname, get_tables_symlink_path, get_workload_suffix, @@ -13,12 +14,7 @@ from benchmark.tpch.constants import DEFAULT_TPCH_SEED, NUM_TPCH_QUERIES from util.log import DBGYM_LOGGER_NAME from util.shell import subprocess_run -from util.workspace import ( - DBGymWorkspace, - fully_resolve_path, - get_scale_factor_string, - is_fully_resolved, -) +from util.workspace import DBGymWorkspace, fully_resolve_path, is_fully_resolved TPCH_KIT_DIRNAME = "tpch-kit" diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 702285ea..28dd1620 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -38,7 +38,7 @@ WORKSPACE_PATH_PLACEHOLDER, DBGymWorkspace, fully_resolve_path, - get_default_dbdata_parent_path, + get_tmp_path_from_workspace_path, is_fully_resolved, is_ssd, ) @@ -118,7 +118,7 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None: "--dbdata-parent-path", default=None, type=Path, - help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_default_dbdata_parent_path(WORKSPACE_PATH_PLACEHOLDER)}.", + help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_tmp_path_from_workspace_path(WORKSPACE_PATH_PLACEHOLDER)}.", ) def postgres_dbdata( dbgym_workspace: DBGymWorkspace, @@ -153,7 +153,7 @@ def _postgres_dbdata( if pgbin_path is None: pgbin_path = get_pgbin_symlink_path(dbgym_workspace.dbgym_workspace_path) if dbdata_parent_path is None: - dbdata_parent_path = get_default_dbdata_parent_path( + dbdata_parent_path = get_tmp_path_from_workspace_path( dbgym_workspace.dbgym_workspace_path ) diff --git a/env/tests/gymlib_integtest_util.py b/env/tests/gymlib_integtest_util.py index 577d1ffc..3c71b498 100644 --- a/env/tests/gymlib_integtest_util.py +++ b/env/tests/gymlib_integtest_util.py @@ -16,7 +16,7 @@ from util.workspace import ( DBGymWorkspace, fully_resolve_path, - get_default_dbdata_parent_path, + get_tmp_path_from_workspace_path, get_workspace_path_from_config, ) @@ -105,7 +105,7 @@ def get_default_metadata() -> TuningMetadata: ), ), dbdata_parent_path=fully_resolve_path( - get_default_dbdata_parent_path(dbgym_workspace.dbgym_workspace_path), + get_tmp_path_from_workspace_path(dbgym_workspace.dbgym_workspace_path), ), pgbin_path=fully_resolve_path( get_pgbin_symlink_path(dbgym_workspace.dbgym_workspace_path), diff --git a/util/workspace.py b/util/workspace.py index a49547eb..59784aa2 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -11,20 +11,10 @@ from pathlib import Path from typing import IO, Any, Optional -import redis import yaml -from benchmark.tpch.constants import DEFAULT_TPCH_SEED from util.log import DBGYM_LOGGER_NAME -from util.shell import subprocess_run -# Relative paths of different folders in the codebase -DBMS_PATH = Path("dbms") -POSTGRES_PATH = DBMS_PATH / "postgres" -TUNE_PATH = Path("tune") - -# Paths of different parts of the workspace -# I made these Path objects even though they're not real paths just so they can work correctly with my other helper functions WORKSPACE_PATH_PLACEHOLDER = Path("[workspace]") @@ -47,24 +37,7 @@ def get_latest_run_path_from_workspace_path(workspace_path: Path) -> Path: # Paths of config files in the codebase. These are always relative paths. # The reason these can be relative paths instead of functions taking in codebase_path as input is because relative paths are relative to the codebase root -DEFAULT_BOOT_CONFIG_PATH = POSTGRES_PATH / "default_boot_config.yaml" - - -SCALE_FACTOR_PLACEHOLDER: str = "[scale_factor]" - - -def get_scale_factor_string(scale_factor: float | str) -> str: - if type(scale_factor) is str and scale_factor == SCALE_FACTOR_PLACEHOLDER: - return scale_factor - else: - if float(int(scale_factor)) == scale_factor: - return str(int(scale_factor)) - else: - return str(scale_factor).replace(".", "point") - - -def get_default_dbdata_parent_path(workspace_path: Path) -> Path: - return get_tmp_path_from_workspace_path(workspace_path) +DEFAULT_BOOT_CONFIG_PATH = Path("dbms") / "postgres" / "default_boot_config.yaml" class DBGymWorkspace: @@ -508,43 +481,6 @@ def try_remove_file(path: Path) -> None: pass -# TODO: move this stuff to shell.py -def restart_ray(redis_port: int) -> None: - """ - Stop and start Ray. - This is good to do between each stage to avoid bugs from carrying over across stages - """ - subprocess_run("ray stop -f") - ncpu = os.cpu_count() - # --disable-usage-stats avoids a Y/N prompt - subprocess_run( - f"OMP_NUM_THREADS={ncpu} ray start --head --port={redis_port} --num-cpus={ncpu} --disable-usage-stats" - ) - - -def make_redis_started(port: int) -> None: - """ - Start Redis if it's not already started. - Note that Ray uses Redis but does *not* use this function. It starts Redis on its own. - One current use for this function to start/stop Redis for Boot. - """ - try: - r = redis.Redis(port=port) - r.ping() - # This means Redis is running, so we do nothing - do_start_redis = False - except (redis.ConnectionError, redis.TimeoutError): - # This means Redis is not running, so we start it - do_start_redis = True - - # I'm starting Redis outside of except so that errors in r.ping get propagated correctly - if do_start_redis: - subprocess_run(f"redis-server --port {port} --daemonize yes") - # When you start Redis in daemon mode, it won't let you know if it's started, so we ping again to check - r = redis.Redis(port=port) - r.ping() - - def is_ssd(path: Path) -> bool: try: device = (