Skip to content

Commit

Permalink
Integration test now checks for files created (#43)
Browse files Browse the repository at this point in the history
**Summary**: The old integration test just checked that Proto-X didn't
crash. Now, it checks that files are actually created.

**Details**:
* The test is now Python-based instead of shell-based.
* Using this test, I added functionality for analyzing a `.tfevents`
file (a script I had previously written) and added a test for that as
well.
  • Loading branch information
wangpatrick57 authored Oct 29, 2024
1 parent 9e652d1 commit 6f27be5
Show file tree
Hide file tree
Showing 46 changed files with 473 additions and 148 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/tests_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:

jobs:
ci:
# The code for the self-hosted runners is at https://github.com/wangpatrick57/dbgym-runners.
runs-on: self-hosted

steps:
Expand Down Expand Up @@ -47,9 +48,8 @@ jobs:
- name: Run integration tests
# Delete the workspace. Run once with a clean workspace. Run again from the existing workspace.
# Note that we need to run with a non-root user in order to start Postgres.
# Note that we need to run with a non-root user in order to start Postgres. This is configured in the .yaml
# file for our self-hosted GHA runners.
run: |
. "$HOME/.cargo/env"
rm -rf ../dbgym_integtest_workspace
./scripts/integration_test.sh ssd
./scripts/integration_test.sh ssd
python -m scripts.run_protox_integration_test ssd
File renamed without changes.
79 changes: 79 additions & 0 deletions analyze/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import json
import logging
import re
from pathlib import Path
from typing import Any

import click
import tensorflow
from google.protobuf.json_format import MessageToJson
from tensorflow.core.util.event_pb2 import Event

from util.log import DBGYM_OUTPUT_LOGGER_NAME


@click.group(name="analyze")
def analyze_group() -> None:
pass


@click.command(name="tfevents")
@click.argument("tfevents-path", type=Path)
def analyze_tfevents(tfevents_path: Path) -> None:
minimal_json = tboard_to_minimal_json(tfevents_path)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent resetting: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_reset$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent reconfiguring: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_shift_state$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent evaluating workload: {get_total_instr_time_event(minimal_json, r'.*Workload_execute$')}"
)
logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info(
f"seconds spent training agent: {get_total_instr_time_event(minimal_json, r'.*(WolpPolicy_train_actor|WolpPolicy_train_critic)$')}"
)


# The "minimal json" unwraps each summary so that we're left only with the parts that differ between summaries
def tboard_to_minimal_json(tfevent_fpath: Path) -> list[dict[str, Any]]:
minimal_json = []

raw_dataset = tensorflow.data.TFRecordDataset(tfevent_fpath)

for raw_record in raw_dataset:
event = Event()
event.ParseFromString(raw_record.numpy())

# Convert event to JSON
json_summary = json.loads(MessageToJson(event.summary))

# We get a {} at the very start
if json_summary == {}:
continue

assert "value" in json_summary
json_summary = json_summary["value"]
assert len(json_summary) == 1
json_summary = json_summary[0]

minimal_json.append(json_summary)

return minimal_json


# An "instr_time_event" is an event with a "tag" that looks like "instr_time/*"
def get_total_instr_time_event(
minimal_json: list[dict[str, Any]], event_regex: str
) -> float:
event_pattern = re.compile(event_regex)
total_time = 0

for json_summary in minimal_json:
if event_pattern.fullmatch(json_summary["tag"]) is not None:
total_time += json_summary["simpleValue"]

return total_time


analyze_group.add_command(analyze_tfevents)
Empty file added analyze/tests/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions analyze/tests/test_analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
from pathlib import Path

from analyze.cli import get_total_instr_time_event, tboard_to_minimal_json


class AnalyzeTests(unittest.TestCase):
def test_tfevents(self) -> None:
tfevents_path = Path("analyze/tests/unittest_analysis_files/out.tfevents")
minimal_json = tboard_to_minimal_json(tfevents_path)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*PostgresEnv_reset$"), 8.0046994
)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*PostgresEnv_shift_state$"),
12.4918935,
)
self.assertAlmostEqual(
get_total_instr_time_event(minimal_json, r".*Workload_execute$"),
31.831543260000004,
)
self.assertAlmostEqual(
get_total_instr_time_event(
minimal_json, r".*(WolpPolicy_train_actor|WolpPolicy_train_critic)$"
),
19.9834938712,
)


if __name__ == "__main__":
unittest.main()
Binary file not shown.
2 changes: 1 addition & 1 deletion benchmark/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import click

from benchmark.tpch.cli import tpch_group
from misc.utils import DBGymConfig
from util.workspace import DBGymConfig


@click.group(name="benchmark")
Expand Down
11 changes: 6 additions & 5 deletions benchmark/tpch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

import click

from misc.utils import (
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run
from util.workspace import (
DBGymConfig,
default_tables_dname,
get_scale_factor_string,
link_result,
workload_name_fn,
)
from util.log import DBGYM_LOGGER_NAME
from util.shell import subprocess_run


@click.group(name="tpch")
Expand Down Expand Up @@ -134,7 +135,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
expected_tables_symlink_dpath = (
data_path / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
data_path / f"{default_tables_dname(scale_factor)}.link"
)
if expected_tables_symlink_dpath.exists():
logging.getLogger(DBGYM_LOGGER_NAME).info(
Expand All @@ -147,7 +148,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
)
subprocess_run(f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_dpath / "dbgen")
real_dir = dbgym_cfg.cur_task_runs_data_path(
f"tables_sf{get_scale_factor_string(scale_factor)}", mkdir=True
default_tables_dname(scale_factor), mkdir=True
)
subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")

Expand Down
4 changes: 2 additions & 2 deletions benchmark/tpch/load_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional

from dbms.load_info_base_class import LoadInfoBaseClass
from misc.utils import DBGymConfig, get_scale_factor_string
from util.workspace import DBGymConfig, default_tables_dname

TPCH_SCHEMA_FNAME = "tpch_schema.sql"
TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql"
Expand Down Expand Up @@ -45,7 +45,7 @@ def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float):
dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
)
tables_symlink_dpath = (
data_root_dpath / f"tables_sf{get_scale_factor_string(scale_factor)}.link"
data_root_dpath / f"{default_tables_dname(scale_factor)}.link"
)
tables_dpath = tables_symlink_dpath.resolve()
assert (
Expand Down
2 changes: 1 addition & 1 deletion dbms/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import click

from dbms.postgres.cli import postgres_group
from misc.utils import DBGymConfig
from util.workspace import DBGymConfig


@click.group(name="dbms")
Expand Down
24 changes: 12 additions & 12 deletions dbms/postgres/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,6 @@

from benchmark.tpch.load_info import TpchLoadInfo
from dbms.load_info_base_class import LoadInfoBaseClass
from misc.utils import (
WORKSPACE_PATH_PLACEHOLDER,
DBGymConfig,
conv_inputpath_to_realabspath,
default_dbdata_parent_dpath,
default_pgbin_path,
get_dbdata_tgz_name,
is_ssd,
link_result,
open_and_save,
save_file,
)
from util.log import DBGYM_LOGGER_NAME
from util.pg import (
DBGYM_POSTGRES_DBNAME,
Expand All @@ -42,6 +30,18 @@
sqlalchemy_conn_execute,
)
from util.shell import subprocess_run
from util.workspace import (
WORKSPACE_PATH_PLACEHOLDER,
DBGymConfig,
conv_inputpath_to_realabspath,
default_dbdata_parent_dpath,
default_pgbin_path,
get_dbdata_tgz_name,
is_ssd,
link_result,
open_and_save,
save_file,
)


@click.group(name="postgres")
Expand Down
1 change: 1 addition & 0 deletions dependencies/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ tomlkit==0.13.2
torch==2.4.0
tqdm==4.66.1
triton==3.0.0
types-protobuf==5.28.0.20240924
types-python-dateutil==2.9.0.20240821
types-pytz==2024.1.0.20240417
types-PyYAML==6.0.12.20240808
Expand Down
4 changes: 2 additions & 2 deletions manage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

import click

from misc.utils import (
from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME
from util.workspace import (
DBGymConfig,
get_runs_path_from_workspace_path,
get_symlinks_path_from_workspace_path,
is_child_path,
parent_dpath_of_path,
)
from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME


# This is used in test_clean.py. It's defined here to avoid a circular import.
Expand Down
2 changes: 1 addition & 1 deletion manage/tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Any, NewType, cast

from manage.cli import MockDBGymConfig, clean_workspace
from misc.utils import path_exists_dont_follow_symlinks
from util.workspace import path_exists_dont_follow_symlinks

# This is here instead of on `if __name__ == "__main__"` because we often run individual tests, which
# does not go through the `if __name__ == "__main__"` codepath.
Expand Down
Empty file added scripts/__init__.py
Empty file.
26 changes: 0 additions & 26 deletions scripts/integration_test.sh

This file was deleted.

2 changes: 2 additions & 0 deletions scripts/pipfreeze.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
pip freeze >dependencies/requirements.txt
Loading

0 comments on commit 6f27be5

Please sign in to comment.