Skip to content

Commit

Permalink
removed no startup check option
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed Jul 7, 2024
1 parent 4496145 commit 6a9ea38
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 65 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ These steps were tested on a fresh repository clone, Ubuntu ??.04.
./dependency/install_dependencies.sh
# Compile a custom fork of PostgreSQL, load TPC-H, train the Proto-X agent, and tune.
./scripts/quickstart.sh postgres tpch protox
./scripts/quickstart.sh postgres path/to/put/pgdata/in tpch 0.01 protox
```

## Overview
Expand Down
24 changes: 12 additions & 12 deletions experiments/protox_tpch_sf0point1/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ INTENDED_PGDATA_HARDWARE=ssd
echo $PGDATA_PARENT_DPATH

# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
# python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
exit 0

# benchmark
python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
python3 task.py benchmark tpch data $SCALE_FACTOR
python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR

# postgres
python3 task.py --no-startup-check dbms postgres build
python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py dbms postgres build
python3 task.py dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH

exit 0

# embedding
python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2

# agent
python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
26 changes: 13 additions & 13 deletions experiments/protox_tpch_sf10/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@ INTENDED_PGDATA_HARDWARE=ssd
. ./experiments/load_per_machine_envvars.sh

# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
exit 0

# benchmark
python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
python3 task.py benchmark tpch data $SCALE_FACTOR
python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR

# postgres
python3 task.py --no-startup-check dbms postgres build
python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py dbms postgres build
python3 task.py dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH

# embedding
python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10

# agent
python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
16 changes: 1 addition & 15 deletions misc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,11 @@ class DBGymConfig:
Global configurations that apply to all parts of DB-Gym
"""

def __init__(self, config_path, startup_check=False):
def __init__(self, config_path):
"""
Parameters
----------
config_path : Path
startup_check : bool
True if startup_check shoul
"""
assert is_base_git_dir(
os.getcwd()
Expand All @@ -188,18 +186,6 @@ def __init__(self, config_path, startup_check=False):
Path(yaml_config["dbgym_workspace_path"]).resolve().absolute()
)

# Quickly display options.
if startup_check:
msg = (
"💩💩💩 CMU-DB Database Gym: github.com/cmu-db/dbgym 💩💩💩\n"
f"\tdbgym_workspace_path: {dbgym_workspace_path}\n"
"\n"
"Proceed?"
)
if not click.confirm(msg):
print("Goodbye.")
sys.exit(0)

self.path: Path = config_path
self.cur_path_list: list[str] = ["dbgym"]
self.root_yaml: dict = yaml_config
Expand Down
26 changes: 13 additions & 13 deletions scripts/pat_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,27 @@ INTENDED_PGDATA_HARDWARE=ssd
. ./experiments/load_per_machine_envvars.sh

# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
exit 0

# benchmark
python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
python3 task.py benchmark tpch data $SCALE_FACTOR
python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR

# postgres
python3 task.py --no-startup-check dbms postgres build
python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
python3 task.py dbms postgres build
python3 task.py dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH

exit 0

# embedding
# python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --default-sample-limit 64 --file-limit 64 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # short datagen for testing
python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
# python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --default-sample-limit 64 --file-limit 64 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # short datagen for testing
python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2

# agent
python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
16 changes: 8 additions & 8 deletions scripts/wan_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
set -euxo pipefail

# Build Postgres
python3 task.py --no-startup-check dbms postgres repo
python3 task.py dbms postgres repo

# Generate TPC-H
python3 task.py --no-startup-check benchmark tpch generate-data 1
python3 task.py --no-startup-check benchmark tpch generate-workload queries_15721_15723 15721 15723
python3 task.py benchmark tpch generate-data 1
python3 task.py benchmark tpch generate-workload queries_15721_15723 15721 15723

# Create tpch_sf1.tgz
python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor 1
python3 task.py dbms postgres pgdata tpch --scale-factor 1

# Run Proto-X
python3 task.py --no-startup-check dbms postgres start
python3 task.py --no-startup-check tune protox embedding datagen tpch queries_15721_15723 --connection-str "host=localhost port=15721 dbname=tpch_sf1 user=noisepage_user password=noisepage_pass" --override-sample-limits "lineitem,32768"
python3 task.py --no-startup-check tune protox embedding train tpch queries_15721_15723 --iterations-per-epoch 1 --num-samples 4 --train-max-concurrent 4 --num-points-to-sample 32 --max-segments 3
python3 task.py --no-startup-check dbms postgres stop
python3 task.py dbms postgres start
python3 task.py tune protox embedding datagen tpch queries_15721_15723 --connection-str "host=localhost port=15721 dbname=tpch_sf1 user=noisepage_user password=noisepage_pass" --override-sample-limits "lineitem,32768"
python3 task.py tune protox embedding train tpch queries_15721_15723 --iterations-per-epoch 1 --num-samples 4 --train-max-concurrent 4 --num-points-to-sample 32 --max-segments 3
python3 task.py dbms postgres stop
5 changes: 2 additions & 3 deletions task.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@

@click.group()
@click.option("--config-path", default="config.yaml")
@click.option("--no-startup-check", is_flag=True)
@click.pass_context
def task(ctx, config_path, no_startup_check):
def task(ctx, config_path):
"""💩💩💩 CMU-DB Database Gym: github.com/cmu-db/dbgym 💩💩💩"""
ctx.obj = DBGymConfig(config_path, startup_check=not no_startup_check)
ctx.obj = DBGymConfig(config_path)


@click.group(name="config")
Expand Down

0 comments on commit 6a9ea38

Please sign in to comment.