Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

draft PR for CI #42

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/tests_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:

jobs:
ci:
# The code for the self-hosted runners is at https://github.com/wangpatrick57/dbgym-runners.
runs-on: self-hosted

steps:
Expand Down Expand Up @@ -47,9 +48,8 @@ jobs:

- name: Run integration tests
# Delete the workspace. Run once with a clean workspace. Run again from the existing workspace.
# Note that we need to run with a non-root user in order to start Postgres.
# Note that we need to run with a non-root user in order to start Postgres. This is configured in the .yaml
# file for our self-hosted GHA runners.
run: |
. "$HOME/.cargo/env"
rm -rf ../dbgym_integtest_workspace
./scripts/integration_test.sh ssd
./scripts/integration_test.sh ssd
python -m scripts.run_protox_integration_test ssd
File renamed without changes.
79 changes: 79 additions & 0 deletions analyze/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import json
import logging
import re
from pathlib import Path
from typing import Any

import click
import tensorflow
from google.protobuf.json_format import MessageToJson
from tensorflow.core.util.event_pb2 import Event

from util.log import DBGYM_OUTPUT_LOGGER_NAME


@click.group(name="analyze")
def analyze_group() -> None:
pass


@click.command(name="tfevents")
@click.argument("tfevents-path", type=Path)
def analyze_tfevents(tfevents_path: Path) -> None:
minimal_json = tboard_to_minimal_json(tfevents_path)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent resetting: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_reset$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent reconfiguring: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_shift_state$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent evaluating workload: {get_total_instr_time_event(minimal_json, r'.*Workload_execute$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent training agent: {get_total_instr_time_event(minimal_json, r'.*(WolpPolicy_train_actor|WolpPolicy_train_critic)$')}"
)


# The "minimal json" unwraps each summary so that we're left only with the parts that differ between summaries
def tboard_to_minimal_json(tfevent_fpath: Path) -> list[dict[str, Any]]:
minimal_json = []

raw_dataset = tensorflow.data.TFRecordDataset(tfevent_fpath)

for raw_record in raw_dataset:
event = Event()
event.ParseFromString(raw_record.numpy())

# Convert event to JSON
json_summary = json.loads(MessageToJson(event.summary))

# We get a {} at the very start
if json_summary == {}:
continue

assert "value" in json_summary
json_summary = json_summary["value"]
assert len(json_summary) == 1
json_summary = json_summary[0]

minimal_json.append(json_summary)

return minimal_json


# An "instr_time_event" is an event with a "tag" that looks like "instr_time/*"
def get_total_instr_time_event(
minimal_json: list[dict[str, Any]], event_regex: str
) -> float:
event_pattern = re.compile(event_regex)
total_time = 0

for json_summary in minimal_json:
if event_pattern.fullmatch(json_summary["tag"]) is not None:
total_time += json_summary["simpleValue"]

return total_time


analyze_group.add_command(analyze_tfevents)
Empty file added analyze/tests/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions analyze/tests/test_analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
from pathlib import Path

from analyze.cli import get_total_instr_time_event, tboard_to_minimal_json


class AnalyzeTests(unittest.TestCase):
def test_tfevents(self) -> None:
tfevents_path = Path("analyze/tests/unittest_analysis_files/out.tfevents")
minimal_json = tboard_to_minimal_json(tfevents_path)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*PostgresEnv_reset$"), 8.0046994
)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*PostgresEnv_shift_state$"),
12.4918935,
)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*Workload_execute$"),
31.831543260000004,
)
self.assertAlmostEqual(
get_total_instr_time_event(
minimal_json, r".*(WolpPolicy_train_actor|WolpPolicy_train_critic)$"
),
19.9834938712,
)


if __name__ == "__main__":
unittest.main()
Binary file not shown.
2 changes: 1 addition & 1 deletion benchmark/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import click

from benchmark.tpch.cli import tpch_group
from misc.utils import DBGymConfig
from util.workspace import DBGymConfig


@click.group(name="benchmark")
Expand Down
11 changes: 6 additions & 5 deletions benchmark/tpch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

import click

from misc.utils import (
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run
from util.workspace import (
DBGymConfig,
default_tables_dname,
get_scale_factor_string,
link_result,
workload_name_fn,
)
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run


@click.group(name="tpch")
Expand Down Expand Up @@ -134,7 +135,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
expected_tables_symlink_dpath = (
data_path / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
data_path / f"{default_tables_dname(scale_factor)}.link"
)
if expected_tables_symlink_dpath.exists():
logging.getLogger(DBGYM_LOGGER_NAME).info(
Expand All @@ -147,7 +148,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
)
subprocess_run(f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_dpath / "dbgen")
real_dir = dbgym_cfg.cur_task_runs_data_path(
f"tables_sf{get_scale_factor_string(scale_factor)}", mkdir=True
default_tables_dname(scale_factor), mkdir=True
)
subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")

Expand Down
4 changes: 2 additions & 2 deletions benchmark/tpch/load_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional

from dbms.load_info_base_class import LoadInfoBaseClass
from misc.utils import DBGymConfig, get_scale_factor_string
from util.workspace import DBGymConfig, default_tables_dname

TPCH_SCHEMA_FNAME = "tpch_schema.sql"
TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql"
Expand Down Expand Up @@ -45,7 +45,7 @@ def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float):
dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
)
tables_symlink_dpath = (
data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
data_root_dpath / f"{default_tables_dname(scale_factor)}.link"
)
tables_dpath = tables_symlink_dpath.resolve()
assert (
Expand Down
2 changes: 1 addition & 1 deletion dbms/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import click

from dbms.postgres.cli import postgres_group
from misc.utils import DBGymConfig
from util.workspace import DBGymConfig


@click.group(name="dbms")
Expand Down
24 changes: 12 additions & 12 deletions dbms/postgres/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,6 @@

from benchmark.tpch.load_info import TpchLoadInfo
from dbms.load_info_base_class import LoadInfoBaseClass
from misc.utils import (
WORKSPACE_PATH_PLACEHOLDER,
DBGymConfig,
conv_inputpath_to_realabspath,
default_dbdata_parent_dpath,
default_pgbin_path,
get_dbdata_tgz_name,
is_ssd,
link_result,
open_and_save,
save_file,
)
from util.log import DBGYM_LOGGER_NAME
from util.pg import (
DBGYM_POSTGRES_DBNAME,
Expand All @@ -42,6 +30,18 @@
sqlalchemy_conn_execute,
)
from util.shell import subprocess_run
from util.workspace import (
WORKSPACE_PATH_PLACEHOLDER,
DBGymConfig,
conv_inputpath_to_realabspath,
default_dbdata_parent_dpath,
default_pgbin_path,
get_dbdata_tgz_name,
is_ssd,
link_result,
open_and_save,
save_file,
)


@click.group(name="postgres")
Expand Down
1 change: 1 addition & 0 deletions dependencies/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ tomlkit==0.13.2
torch==2.4.0
tqdm==4.66.1
triton==3.0.0
types-protobuf==5.28.0.20240924
types-python-dateutil==2.9.0.20240821
types-pytz==2024.1.0.20240417
types-PyYAML==6.0.12.20240808
Expand Down
4 changes: 2 additions & 2 deletions manage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import click

from misc.utils import (
from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME
from util.workspace import (
DBGymConfig,
get_runs_path_from_workspace_path,
get_symlinks_path_from_workspace_path,
is_child_path,
parent_dpath_of_path,
)
from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME


# This is used in test_clean.py. It's defined here to avoid a circular import.
Expand Down
2 changes: 1 addition & 1 deletion manage/tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Any, NewType, cast

from manage.cli import MockDBGymConfig, clean_workspace
from misc.utils import path_exists_dont_follow_symlinks
from util.workspace import path_exists_dont_follow_symlinks

# This is here instead of on `if __name__ == "__main__"` because we often run individual tests, which
# does not go through the `if __name__ == "__main__"` codepath.
Expand Down
Empty file added scripts/__init__.py
Empty file.
26 changes: 0 additions & 26 deletions scripts/integration_test.sh

This file was deleted.

2 changes: 2 additions & 0 deletions scripts/pipfreeze.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
pip freeze >dependencies/requirements.txt
Loading
Loading