Skip to content

Commit

Permalink
deleted more functions from workspace.py
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed Dec 30, 2024
1 parent e3ac705 commit f5c89e7
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 76 deletions.
8 changes: 2 additions & 6 deletions benchmark/tpch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import click
from gymlib.symlinks_paths import (
get_scale_factor_string,
get_tables_dirname,
get_tables_symlink_path,
get_workload_suffix,
Expand All @@ -13,12 +14,7 @@
from benchmark.tpch.constants import DEFAULT_TPCH_SEED, NUM_TPCH_QUERIES
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run
from util.workspace import (
DBGymWorkspace,
fully_resolve_path,
get_scale_factor_string,
is_fully_resolved,
)
from util.workspace import DBGymWorkspace, fully_resolve_path, is_fully_resolved

TPCH_KIT_DIRNAME = "tpch-kit"

Expand Down
6 changes: 3 additions & 3 deletions dbms/postgres/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
WORKSPACE_PATH_PLACEHOLDER,
DBGymWorkspace,
fully_resolve_path,
get_default_dbdata_parent_path,
get_tmp_path_from_workspace_path,
is_fully_resolved,
is_ssd,
)
Expand Down Expand Up @@ -118,7 +118,7 @@ def _postgres_build(dbgym_workspace: DBGymWorkspace, rebuild: bool) -> None:
"--dbdata-parent-path",
default=None,
type=Path,
help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_default_dbdata_parent_path(WORKSPACE_PATH_PLACEHOLDER)}.",
help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_tmp_path_from_workspace_path(WORKSPACE_PATH_PLACEHOLDER)}.",
)
def postgres_dbdata(
dbgym_workspace: DBGymWorkspace,
Expand Down Expand Up @@ -153,7 +153,7 @@ def _postgres_dbdata(
if pgbin_path is None:
pgbin_path = get_pgbin_symlink_path(dbgym_workspace.dbgym_workspace_path)
if dbdata_parent_path is None:
dbdata_parent_path = get_default_dbdata_parent_path(
dbdata_parent_path = get_tmp_path_from_workspace_path(
dbgym_workspace.dbgym_workspace_path
)

Expand Down
4 changes: 2 additions & 2 deletions env/tests/gymlib_integtest_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from util.workspace import (
DBGymWorkspace,
fully_resolve_path,
get_default_dbdata_parent_path,
get_tmp_path_from_workspace_path,
get_workspace_path_from_config,
)

Expand Down Expand Up @@ -105,7 +105,7 @@ def get_default_metadata() -> TuningMetadata:
),
),
dbdata_parent_path=fully_resolve_path(
get_default_dbdata_parent_path(dbgym_workspace.dbgym_workspace_path),
get_tmp_path_from_workspace_path(dbgym_workspace.dbgym_workspace_path),
),
pgbin_path=fully_resolve_path(
get_pgbin_symlink_path(dbgym_workspace.dbgym_workspace_path),
Expand Down
66 changes: 1 addition & 65 deletions util/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,10 @@
from pathlib import Path
from typing import IO, Any, Optional

import redis
import yaml

from benchmark.tpch.constants import DEFAULT_TPCH_SEED
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run

# Relative paths of different folders in the codebase
DBMS_PATH = Path("dbms")
POSTGRES_PATH = DBMS_PATH / "postgres"
TUNE_PATH = Path("tune")

# Paths of different parts of the workspace
# I made these Path objects even though they're not real paths just so they can work correctly with my other helper functions
WORKSPACE_PATH_PLACEHOLDER = Path("[workspace]")


Expand All @@ -47,24 +37,7 @@ def get_latest_run_path_from_workspace_path(workspace_path: Path) -> Path:

# Paths of config files in the codebase. These are always relative paths.
# The reason these can be relative paths instead of functions taking in codebase_path as input is because relative paths are relative to the codebase root
DEFAULT_BOOT_CONFIG_PATH = POSTGRES_PATH / "default_boot_config.yaml"


SCALE_FACTOR_PLACEHOLDER: str = "[scale_factor]"


def get_scale_factor_string(scale_factor: float | str) -> str:
if type(scale_factor) is str and scale_factor == SCALE_FACTOR_PLACEHOLDER:
return scale_factor
else:
if float(int(scale_factor)) == scale_factor:
return str(int(scale_factor))
else:
return str(scale_factor).replace(".", "point")


def get_default_dbdata_parent_path(workspace_path: Path) -> Path:
return get_tmp_path_from_workspace_path(workspace_path)
DEFAULT_BOOT_CONFIG_PATH = Path("dbms") / "postgres" / "default_boot_config.yaml"


class DBGymWorkspace:
Expand Down Expand Up @@ -508,43 +481,6 @@ def try_remove_file(path: Path) -> None:
pass


# TODO: move this stuff to shell.py
def restart_ray(redis_port: int) -> None:
"""
Stop and start Ray.
This is good to do between each stage to avoid bugs from carrying over across stages
"""
subprocess_run("ray stop -f")
ncpu = os.cpu_count()
# --disable-usage-stats avoids a Y/N prompt
subprocess_run(
f"OMP_NUM_THREADS={ncpu} ray start --head --port={redis_port} --num-cpus={ncpu} --disable-usage-stats"
)


def make_redis_started(port: int) -> None:
"""
Start Redis if it's not already started.
Note that Ray uses Redis but does *not* use this function. It starts Redis on its own.
One current use for this function to start/stop Redis for Boot.
"""
try:
r = redis.Redis(port=port)
r.ping()
# This means Redis is running, so we do nothing
do_start_redis = False
except (redis.ConnectionError, redis.TimeoutError):
# This means Redis is not running, so we start it
do_start_redis = True

# I'm starting Redis outside of except so that errors in r.ping get propagated correctly
if do_start_redis:
subprocess_run(f"redis-server --port {port} --daemonize yes")
# When you start Redis in daemon mode, it won't let you know if it's started, so we ping again to check
r = redis.Redis(port=port)
r.ping()


def is_ssd(path: Path) -> bool:
try:
device = (
Expand Down

0 comments on commit f5c89e7

Please sign in to comment.