-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
You can invoke these from the root of the repo like: ./scripts/benchmark.sh ./tests/data/index/dataset_500/dataset_500_observations.csv This will: - create a precovery database in a temporary directory - load the data into the database, measuring the time that takes - do a precovery search on 10 orbits, measuring the time that takes - print some results to stdout - write detailed results to two JSON files (precovery_create_benchmark_{timestamp}.json and precovery_search_benchmark_{timestamp}.json) in the current directory This code was largely written by GPT-4, with some minor manual tweaks.
- Loading branch information
Showing
3 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/bin/bash | ||
|
||
# Set default healpix order to 10 | ||
: "${ORDER:=10}" | ||
: "${N_ITER:=8}" | ||
|
||
while getopts ":d:o:n:" opt; do | ||
case $opt in | ||
d) | ||
DB_DIR=$OPTARG | ||
;; | ||
n) | ||
N_ITER=$OPTARG | ||
;; | ||
o) | ||
ORDER=$OPTARG | ||
;; | ||
\?) | ||
echo "Invalid option -$OPTARG" >&2 | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
shift $((OPTIND-1)) | ||
|
||
if [[ -z "${DB_DIR}" ]]; then | ||
DB_DIR=$(mktemp -d) | ||
CLEANUP_DB_DIR=true | ||
fi | ||
|
||
DATA_FILE=$1 | ||
|
||
if [[ -z "${DATA_FILE}" ]]; then | ||
echo "Usage: $0 <data file> [-d <database directory>] [-n <order>]" | ||
exit 1 | ||
fi | ||
|
||
NOW=$(date +"%Y-%m-%d_%H-%M-%S") | ||
|
||
echo "Creating precovery database in ${DB_DIR} with ORDER=${ORDER}..." | ||
python scripts/measure_precovery_db_creation_time.py "${DB_DIR}" "${DATA_FILE}" "testdata" -p "${ORDER}" -n "${N_ITER}" -o "precovery_create_benchmark_${NOW}.json" | ||
|
||
echo "Running precovery benchmark on ${DB_DIR}..." | ||
python scripts/measure_precover_search_time.py "${DB_DIR}" -o "precovery_search_benchmark_${NOW}.json" | ||
|
||
if [[ "${CLEANUP_DB_DIR}" == "true" ]]; then | ||
echo "Cleaning up temporary database directory..." | ||
rm -rf "${DB_DIR}" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import argparse | ||
import json | ||
import os | ||
import time | ||
from datetime import datetime | ||
from typing import List | ||
|
||
import pandas as pd | ||
|
||
from precovery.orbit import EpochTimescale, Orbit | ||
from precovery.precovery_db import FrameCandidate, PrecoveryCandidate, PrecoveryDatabase | ||
|
||
|
||
def sample_orbits(): | ||
sample_orbits_file = os.path.join( | ||
os.path.dirname(__file__), "..", "tests", "data", "sample_orbits.csv" | ||
) | ||
df = pd.read_csv(sample_orbits_file) | ||
orbits = [] | ||
for i in range(len(df)): | ||
orbit = Orbit.keplerian( | ||
i, | ||
df["a"].values[i], | ||
df["e"].values[i], | ||
df["i"].values[i], | ||
df["om"].values[i], | ||
df["w"].values[i], | ||
df["ma"].values[i], | ||
df["mjd_tt"].values[i], | ||
EpochTimescale.TT, | ||
df["H"].values[i], | ||
df["G"].values[i], | ||
) | ||
orbits.append(orbit) | ||
return orbits | ||
|
||
|
||
def measure_precover_performance( | ||
database_directory: str, orbits: List[Orbit] | ||
) -> tuple[PrecoveryDatabase, List[dict]]: | ||
db = PrecoveryDatabase.from_dir(database_directory) | ||
results = [] | ||
|
||
for orbit in orbits: | ||
start_time = time.time() | ||
precover_results = db.precover(orbit) | ||
elapsed_time = time.time() - start_time | ||
|
||
precovery_count = sum( | ||
isinstance(res, PrecoveryCandidate) for res in precover_results | ||
) | ||
frame_count = sum(isinstance(res, FrameCandidate) for res in precover_results) | ||
|
||
results.append( | ||
{ | ||
"elapsed_time": elapsed_time, | ||
"precovery_count": precovery_count, | ||
"frame_count": frame_count, | ||
} | ||
) | ||
|
||
return db, results | ||
|
||
|
||
def calculate_statistics(results: List[dict]): | ||
import numpy as np | ||
|
||
elapsed_times = [res["elapsed_time"] for res in results] | ||
|
||
mean_time = np.mean(elapsed_times) | ||
median_time = np.median(elapsed_times) | ||
p10 = np.percentile(elapsed_times, 10) | ||
p90 = np.percentile(elapsed_times, 90) | ||
stdev = np.std(elapsed_times) | ||
|
||
return { | ||
"mean_time": mean_time, | ||
"median_time": median_time, | ||
"p10": p10, | ||
"p90": p90, | ||
"stdev": stdev, | ||
} | ||
|
||
|
||
def save_results_to_json( | ||
db: PrecoveryDatabase, | ||
output_json_file: str, | ||
results: List[dict], | ||
database_directory: str, | ||
run_timestamp: str, | ||
): | ||
statistics = calculate_statistics(results) | ||
|
||
report = { | ||
"results": results, | ||
"statistics": statistics, | ||
"database_directory": database_directory, | ||
"run_timestamp": run_timestamp, | ||
"database": { | ||
"size_bytes": db.frames.idx.n_bytes(), | ||
"n_frames": db.frames.idx.n_frames(), | ||
"dataset_ids": list(db.frames.idx.get_dataset_ids()), | ||
}, | ||
} | ||
|
||
with open(output_json_file, "w") as f: | ||
json.dump(report, f, indent=4) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Measure the performance of the precover method on a PrecoveryDatabase." | ||
) | ||
parser.add_argument( | ||
"database_directory", help="Path to the PrecoveryDatabase directory." | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--output_json_file", | ||
default="precover_bench.json", | ||
help="Path to the output JSON file.", | ||
) | ||
args = parser.parse_args() | ||
|
||
orbits_to_test = sample_orbits()[:10] # Test with 10 sample orbits | ||
|
||
db, results = measure_precover_performance(args.database_directory, orbits_to_test) | ||
|
||
print("\nResults for precover method:") | ||
for i, result in enumerate(results, start=1): | ||
print( | ||
f"Orbit {i}: {result['elapsed_time']:.2f} seconds, " | ||
f"{result['precovery_count']} PrecoveryCandidates, {result['frame_count']} FrameCandidates" | ||
) | ||
|
||
run_timestamp = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") | ||
save_results_to_json( | ||
db, args.output_json_file, results, args.database_directory, run_timestamp | ||
) | ||
print(f"\nResults saved to JSON file: {args.output_json_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import argparse | ||
import json | ||
import shutil | ||
import time | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
|
||
from precovery.precovery_db import PrecoveryDatabase | ||
|
||
|
||
def count_lines(file_path): | ||
with open(file_path) as f: | ||
return sum(1 for _ in f) | ||
|
||
|
||
def measure_db_creation_time(database_directory, csv_file_path, dataset_id, nside): | ||
start_time = time.time() | ||
|
||
db = PrecoveryDatabase.create(database_directory, nside) | ||
db.frames.add_dataset(dataset_id) | ||
db.frames.load_csv(csv_file_path, dataset_id) | ||
|
||
elapsed_time = time.time() - start_time | ||
db.frames.close() | ||
|
||
return elapsed_time | ||
|
||
|
||
def print_statistics(elapsed_times): | ||
elapsed_times_array = np.array(elapsed_times) | ||
print("Summary statistics:") | ||
print(f" Mean time: {np.mean(elapsed_times_array):.2f} seconds") | ||
print(f" Median time: {np.median(elapsed_times_array):.2f} seconds") | ||
print(f" P10 time: {np.percentile(elapsed_times_array, 10):.2f} seconds") | ||
print(f" P90 time: {np.percentile(elapsed_times_array, 90):.2f} seconds") | ||
print(f" Standard deviation: {np.std(elapsed_times_array):.2f} seconds") | ||
|
||
|
||
def save_results_to_json( | ||
db, output_json_file, elapsed_times, num_lines, input_data_filename, run_timestamp | ||
): | ||
elapsed_times_array = np.array(elapsed_times) | ||
results = { | ||
"input_data_filename": input_data_filename, | ||
"number_of_rows": num_lines, | ||
"healpix_nside": db.frames.healpix_nside, | ||
"run_timestamp": run_timestamp, | ||
"database_statistics": { | ||
"number_of_frames": db.frames.idx.n_frames(), | ||
"database_size_bytes": db.frames.idx.n_bytes(), | ||
"unique_datasets": list(db.frames.idx.get_dataset_ids()), | ||
}, | ||
"summary_statistics": { | ||
"mean_time": np.mean(elapsed_times_array), | ||
"median_time": np.median(elapsed_times_array), | ||
"p10_time": np.percentile(elapsed_times_array, 10), | ||
"p90_time": np.percentile(elapsed_times_array, 90), | ||
"stdev_time": np.std(elapsed_times_array), | ||
}, | ||
"execution_times": elapsed_times, | ||
} | ||
|
||
with open(output_json_file, "w") as json_file: | ||
json.dump(results, json_file, indent=4) | ||
|
||
|
||
def main(args): | ||
elapsed_times = [] | ||
num_lines = count_lines(args.csv_file_path) | ||
|
||
for i in range(args.num_iterations): | ||
print(f"Running iteration {i + 1}/{args.num_iterations}") | ||
shutil.rmtree(args.database_directory, ignore_errors=True) | ||
elapsed_time = measure_db_creation_time( | ||
args.database_directory, | ||
args.csv_file_path, | ||
args.dataset_id, | ||
2**args.healpixel_order, | ||
) | ||
elapsed_times.append(elapsed_time) | ||
|
||
run_timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") | ||
if args.output_json_file is not None: | ||
output_json_file = args.output_json_file | ||
else: | ||
output_json_file = "precovery_create_benchmark.json" | ||
|
||
# Load the existing database for gathering statistics | ||
db = PrecoveryDatabase.from_dir(args.database_directory) | ||
|
||
save_results_to_json( | ||
db, | ||
output_json_file, | ||
elapsed_times, | ||
num_lines, | ||
args.csv_file_path, | ||
run_timestamp, | ||
) | ||
print(f"Results saved to {output_json_file}") | ||
|
||
print_statistics(elapsed_times) | ||
|
||
# Close the database instance | ||
db.frames.close() | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Measure Precovery database creation time." | ||
) | ||
parser.add_argument( | ||
"database_directory", help="Directory path for the Precovery database." | ||
) | ||
parser.add_argument("csv_file_path", help="Path to the input CSV file.") | ||
parser.add_argument("dataset_id", help="Dataset ID for the input data.") | ||
parser.add_argument( | ||
"-p", "--healpixel_order", type=int, default=12, help="Healpixel order to use." | ||
) | ||
parser.add_argument( | ||
"-n", | ||
"--num_iterations", | ||
type=int, | ||
default=20, | ||
help="Number of times to create the database (default: 20).", | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--output_json_file", | ||
help="Output JSON file (default: results_TIMESTAMP.json).", | ||
) | ||
parser.add_argument( | ||
"-v", "--verbose", action="store_true", help="Display verbose output." | ||
) | ||
args = parser.parse_args() | ||
|
||
if args.verbose: | ||
print(f"Running with the following arguments: {args}") | ||
|
||
main(args) |