From a940d261fe5bbff3c0b7ffc3c6c45c00ef7555b4 Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Thu, 14 Dec 2023 20:59:22 -0500
Subject: [PATCH 1/3] fix: rework testing framework

Started as a small tweak `leaves` -> `BenchmarkTools.leaves` (symbol no
longer exported)

Then CI wouldn't pass and now we're here.

On the plus side, the new system:
- use ReTestItems
- Actually creates a fake Example project
- more or less cleans up after it's self
---
 Project.toml                        |  11 ++
 test/Project.toml                   |  12 ---
 test/backward_compat.jl             |  48 ---------
 test/backward_compat_test.jl        |  49 +++++++++
 test/ci.jl                          |  85 ---------------
 test/ci_test.jl                     |  11 ++
 test/{judging.jl => judge_tests.jl} |  26 ++---
 test/locate_benchmarks.jl           |  76 -------------
 test/locate_benchmarks_tests.jl     | 102 ++++++++++++++++++
 test/runtests.jl                    |  37 +------
 test/{smoke.jl => smoke_tests.jl}   |  70 ++++++------
 test/{tune.jl => tune_tests.jl}     |  44 ++++----
 test/utils.jl                       | 117 --------------------
 test/utils_test.jl                  | 162 ++++++++++++++++++++++++++++
 14 files changed, 407 insertions(+), 443 deletions(-)
 delete mode 100644 test/Project.toml
 delete mode 100644 test/backward_compat.jl
 create mode 100644 test/backward_compat_test.jl
 delete mode 100644 test/ci.jl
 create mode 100644 test/ci_test.jl
 rename test/{judging.jl => judge_tests.jl} (66%)
 delete mode 100644 test/locate_benchmarks.jl
 create mode 100644 test/locate_benchmarks_tests.jl
 rename test/{smoke.jl => smoke_tests.jl} (70%)
 rename test/{tune.jl => tune_tests.jl} (66%)
 delete mode 100644 test/utils.jl
 create mode 100644 test/utils_test.jl

diff --git a/Project.toml b/Project.toml
index e7996ff..62cad62 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,8 +20,19 @@ BSON = "0.3"
 BenchmarkTools = "1"
 CodecZlib = "0.7"
 JSON = "0.21"
+Pkg = "1.9"
 Revise = "3"
+Statistics = "1.9"
 julia = "1.6"
 
 [extras]
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[targets]
+test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random"]
diff --git a/test/Project.toml b/test/Project.toml
deleted file mode 100644
index 6093fd7..0000000
--- a/test/Project.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-[deps]
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
-SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
diff --git a/test/backward_compat.jl b/test/backward_compat.jl
deleted file mode 100644
index bfff1c8..0000000
--- a/test/backward_compat.jl
+++ /dev/null
@@ -1,48 +0,0 @@
-using Test
-using JSON
-using PkgJogger
-using Example
-using CodecZlib
-using Dates
-
-include("utils.jl")
-
-# Get Benchmarking results
-@jog Example
-b = JogExample.benchmark()
-
-# Save using JSON
-function save_benchmarks(filename, results::BenchmarkTools.BenchmarkGroup)
-    # Collect system information to save
-    mkpath(dirname(filename))
-    out = Dict(
-        "julia" => PkgJogger.julia_info(),
-        "system" => PkgJogger.system_info(),
-        "datetime" => string(Dates.now()),
-        "benchmarks" => results,
-        "git" => PkgJogger.git_info(filename),
-    )
-
-    # Write benchmark to disk
-    open(GzipCompressorStream, filename, "w") do io
-        JSON.print(io, out)
-    end
-end
-
-@testset "Compat *.json.gz" begin
-    f = tempname(; cleanup=false) * ".json.gz"
-    finalizer(rm, f)
-    save_benchmarks(f, b)
-
-    # Check that the deprecated warming is logged
-    local b2
-    @test_logs (:warn, r"Legacy `\*\.json\.gz` format is deprecated.*") begin
-        b2 = JogExample.load_benchmarks(f)
-    end
-
-    # Check that benchmarks are still there
-    @test b2 isa Dict
-    @test haskey(b2, "benchmarks")
-    @test b2["benchmarks"] isa BenchmarkTools.BenchmarkGroup
-end
-
diff --git a/test/backward_compat_test.jl b/test/backward_compat_test.jl
new file mode 100644
index 0000000..13fc096
--- /dev/null
+++ b/test/backward_compat_test.jl
@@ -0,0 +1,49 @@
+@testitem "compat" setup=[ExamplePkg] begin
+    using Test
+    using JSON
+    using PkgJogger
+    using CodecZlib
+    using BenchmarkTools
+    using Dates
+
+    # Get Benchmarking results
+    jogger, cleanup = ExamplePkg.create_jogger()
+    b = jogger.benchmark()
+
+    # Save using JSON
+    function save_benchmarks(filename, results::BenchmarkTools.BenchmarkGroup)
+        # Collect system information to save
+        mkpath(dirname(filename))
+        out = Dict(
+            "julia" => PkgJogger.julia_info(),
+            "system" => PkgJogger.system_info(),
+            "datetime" => string(Dates.now()),
+            "benchmarks" => results,
+            "git" => PkgJogger.git_info(filename),
+        )
+
+        # Write benchmark to disk
+        open(GzipCompressorStream, filename, "w") do io
+            JSON.print(io, out)
+        end
+    end
+
+    @testset "Compat *.json.gz" begin
+        f = tempname(; cleanup=false) * ".json.gz"
+        finalizer(rm, f)
+        save_benchmarks(f, b)
+
+        # Check that the deprecated warming is logged
+        local b2
+        b2 = @test_logs (:warn, r"Legacy `\*\.json\.gz` format is deprecated.*") begin
+            jogger.load_benchmarks(f)
+        end
+
+        # Check that benchmarks are still there
+        @test b2 isa Dict
+        @test haskey(b2, "benchmarks")
+        @test b2["benchmarks"] isa BenchmarkTools.BenchmarkGroup
+    end
+
+    cleanup()
+end
diff --git a/test/ci.jl b/test/ci.jl
deleted file mode 100644
index 9076b54..0000000
--- a/test/ci.jl
+++ /dev/null
@@ -1,85 +0,0 @@
-using Test
-using PkgJogger
-
-include("utils.jl")
-
-function run_ci_workflow(pkg_dir)
-    # Create temporary default project
-    mktempdir() do temp_project
-        # Copy pkg_dir to temp_project, and fix permissions
-        cp(pkg_dir, temp_project; force=true)
-        chmod(temp_project, 0o700; recursive=true)
-
-        # Construct CI Command
-        pkgjogger_path = escape_string(PKG_JOGGER_PATH)
-        cli_script = """
-            using Pkg
-            Pkg.activate(temp=true)
-            Pkg.develop(path=\"$pkgjogger_path\")
-            using PkgJogger
-            PkgJogger.ci()
-        """
-        cmd = ignorestatus(Cmd(Vector{String}(filter(!isnothing, [
-            joinpath(Sys.BINDIR, "julia"),
-            "--startup-file=no",
-            Base.JLOptions().code_coverage > 0 ? "--code-coverage=all" : nothing,
-            "--eval",
-            cli_script,
-        ]))))
-
-        # Enable user project + stdlib but remove additional entries from JULIA_LOAD_PATH
-        # This replicates the behavior of `] test`
-        sep = Sys.iswindows() ? ";" : ":"
-        cmd = setenv(cmd, "JULIA_LOAD_PATH" => join(["@", "@stdlib"], sep))
-
-        # Check things are setup
-        @test isdir(temp_project)
-        @test isdir(Sys.BINDIR)
-        @test isdir(PKG_JOGGER_PATH)
-
-       # Capture stdout and stderror
-        cmd_stdout =  IOBuffer(;append=true)
-        cmd_stderr = IOBuffer(;append=true)
-        cmd = Cmd(cmd; dir=temp_project)
-        cmd = pipeline(cmd; stdout=cmd_stdout, stderr=cmd_stderr)
-
-        # Run workflow and return output
-        proc = run(cmd)
-        if proc.exitcode != 0
-            @info read(cmd_stdout, String)
-            @info read(cmd_stderr, String)
-            error("$cmd exited with $proc.exitcode")
-        end
-
-        # Check if benchmark results were saved
-        logs = read(cmd_stderr, String)
-        m = match(r"Saved benchmarks to (.*)\n", logs)
-        m !== nothing || print(logs)
-        @test m !== nothing
-
-        @test length(m.captures) == 1
-        results_file = m.captures[1]
-        @test isfile(results_file)
-
-        # Check that results are in the right place
-        trial_dir = joinpath(PkgJogger.benchmark_dir(temp_project), "trial")
-        @test isfile(joinpath(trial_dir, splitpath(results_file)[end]))
-
-        # Check that results file is valid
-        results = PkgJogger.load_benchmarks(results_file)
-        test_loaded_results(results)
-
-        # Further checks
-        return results
-    end
-end
-
-@testset "Example.jl" begin
-    project = joinpath(@__DIR__, "Example.jl")
-    results = run_ci_workflow(project)
-    cleanup_example()
-
-    # Check timer results are decent (sleep isn't very accurate)
-    isapprox((time∘minimum)(results["benchmarks"][["bench_timer.jl", "1ms"]]), 1e6; atol=3e6)
-    isapprox((time∘minimum)(results["benchmarks"][["bench_timer.jl", "2ms"]]), 2e6; atol=3e6)
-end
diff --git a/test/ci_test.jl b/test/ci_test.jl
new file mode 100644
index 0000000..47552d6
--- /dev/null
+++ b/test/ci_test.jl
@@ -0,0 +1,11 @@
+@testitem "ci" setup=[ExamplePkg] begin
+    pkgjogger_env = Base.active_project()
+    jogger, cleanup = ExamplePkg.create_jogger()
+    dir = abspath(joinpath(jogger.BENCHMARK_DIR, ".."))
+    env = copy(ENV)
+    env["JULIA_LOAD_PATH"] = join(["@", "@stdlib", pkgjogger_env], Sys.iswindows() ? ";" : ":")
+    cmd = Cmd(`$(Base.julia_cmd()) --startup-file=no -e 'using PkgJogger; PkgJogger.ci()'`; env, dir)
+    p = run(cmd; wait=true)
+    @test success(p)
+    cleanup()
+end
diff --git a/test/judging.jl b/test/judge_tests.jl
similarity index 66%
rename from test/judging.jl
rename to test/judge_tests.jl
index 7d809d2..647f774 100644
--- a/test/judging.jl
+++ b/test/judge_tests.jl
@@ -1,14 +1,13 @@
+@testitem "judging" setup=[ExamplePkg, BenchmarkTests] begin
 using PkgJogger
 using BenchmarkTools
-using Example
-@jog Example
 
-include("utils.jl")
+jogger, cleanup = ExamplePkg.create_jogger()
 
 function gen_example()
-    results = JogExample.benchmark()
-    filename = JogExample.save_benchmarks(results)
-    dict = JogExample.load_benchmarks(filename)
+    results = jogger.benchmark()
+    filename = jogger.save_benchmarks(results)
+    dict = jogger.load_benchmarks(filename)
     uuid = get_uuid(filename)
     return results, filename, dict, uuid
 end
@@ -25,7 +24,7 @@ new = gen_example()
 old = gen_example()
 
 @testset "JogPkgName.judge($(typeof(n)), $(typeof(o)))" for (n, o) in Iterators.product(new, old)
-    test_judge(JogExample.judge, n, o)
+    test_judge(jogger.judge, n, o)
 end
 
 @testset "PkgJogger.judge($(typeof(n)), $(typeof(o)))" for (n, o) in Iterators.product(new[1:3], old[1:3])
@@ -35,24 +34,25 @@ end
 @testset "Missing Results - $(typeof(n))" for n in new
     @testset "Empty Suite" begin
         # Expect an empty judgement
-        judgement = test_judge(JogExample.judge, n, BenchmarkGroup())
+        judgement = test_judge(jogger.judge, n, BenchmarkGroup())
         isempty(judgement)
     end
     @testset "Missing Benchmark Judgement" begin
         # Get a suite of results to modify
         ref = deepcopy(first(new))
-        ref_leaves = first.(leaves(ref))
+        ref_leaves = first.(BenchmarkTools.leaves(ref))
 
         # Add a new Trial results
-        name, trial = first(leaves(ref))
+        name, trial = first(BenchmarkTools.leaves(ref))
         name[end] = rand()
         ref[name] = deepcopy(trial)
 
         # Expect the extra benchmark to be skipped
-        judgement = test_judge(JogExample.judge, n, ref)
-        judgement_leaves = first.(leaves(judgement))
+        judgement = test_judge(jogger.judge, n, ref)
+        judgement_leaves = first.(BenchmarkTools.leaves(judgement))
         @test Set(judgement_leaves) == Set(ref_leaves)
     end
 end
+cleanup()
 
-cleanup_example()
+end
diff --git a/test/locate_benchmarks.jl b/test/locate_benchmarks.jl
deleted file mode 100644
index a6155df..0000000
--- a/test/locate_benchmarks.jl
+++ /dev/null
@@ -1,76 +0,0 @@
-using Test
-using BenchmarkTools
-using PkgJogger
-using Example
-
-include("utils.jl")
-
-function check_suite(jogger; add=nothing)
-    # Default Suite
-    reference = [
-        ["bench_timer.jl", "1ms"],
-        ["bench_timer.jl", "2ms"],
-    ] |> Set
-
-    if add !== nothing
-        reference = union(reference, add)
-    end
-
-    # Get suite of jogger
-    suite = Set(jogger.suite() |> leaves |> x -> map(first, x))
-    @test suite == reference
-end
-
-@testset "default suite" begin
-    jogger = @eval @jog Example
-    check_suite(jogger)
-end
-
-@testset "Add benchmarks" begin
-    suite, cleanup = add_benchmark(Example, "bench_foo_$(rand(UInt16)).jl")
-    jogger = @eval @jog Example
-    check_suite(jogger; add=suite)
-
-    # Add a non-benchmarking file (Should not be added)
-    cleanup2 = add_benchmark(Example, "foo_$(rand(UInt16)).jl")[2]
-    check_suite(jogger; add=suite)
-
-    # Add another file (should not be added)
-    suite3, cleanup3 = add_benchmark(Example, "bench_foo_$(rand(UInt16)).jl")
-    check_suite(jogger; add=suite)
-
-    # Regenerate jogger to get new suite -> Should now just be suite3 + suite
-    jogger = @eval @jog Example
-    check_suite(jogger; add=union(suite, suite3))
-
-    cleanup()
-    cleanup2()
-    cleanup3()
-end
-
-@testset "Benchmarks in subfolders" begin
-    # Add a subfolder -> Don't track
-    jogger = @eval @jog Example
-    tempdir = mktempdir(jogger.BENCHMARK_DIR; cleanup=false)
-    check_suite(jogger)
-    rm(tempdir)
-
-    # Add an empty bench_ subfolder -> Ignore
-    tempdir = mktempdir(jogger.BENCHMARK_DIR; prefix="bench_", cleanup=false)
-    check_suite(jogger)
-    rm(tempdir)
-
-    # Add a benchmark to a subfolder -> track
-    path = joinpath("bench_subdir_$(rand(UInt16))", "bench_foo_$(rand(UInt16)).jl")
-    suite, cleanup = add_benchmark(Example, path)
-    check_suite(@eval @jog Example; add=suite)
-    cleanup()
-
-    # Two Levels Deep
-    dir = "bench_subdir_$(rand(UInt16))"
-    suite, cleanup = add_benchmark(Example, joinpath(dir, "bench_foo_$(rand(UInt16)).jl"))
-    union!(suite, add_benchmark(Example, joinpath(dir, "bench_l2", "bench_foo_$(rand(UInt16)).jl"))[1])
-    add_benchmark(Example, joinpath(dir, "skip_me.jl"))
-    check_suite(@eval @jog Example; add=suite)
-    cleanup()
-end
diff --git a/test/locate_benchmarks_tests.jl b/test/locate_benchmarks_tests.jl
new file mode 100644
index 0000000..5fdd402
--- /dev/null
+++ b/test/locate_benchmarks_tests.jl
@@ -0,0 +1,102 @@
+@testsetup module SuiteCheck
+using Test
+using BenchmarkTools
+using PkgJogger
+
+export add_benchmark, check_suite
+
+function check_suite(jogger; add=nothing)
+    # Default Suite
+    reference = [
+        ["bench_timer.jl", "1ms"],
+        ["bench_timer.jl", "2ms"],
+    ] |> Set
+
+    if add !== nothing
+        reference = union(reference, add)
+    end
+
+    # Get suite of jogger
+    suite = Set(jogger.suite() |> BenchmarkTools.leaves |> x -> map(first, x))
+    @test suite == reference
+end
+
+function add_benchmark(jogger, path)
+    filename = joinpath(jogger.BENCHMARK_DIR, path)
+    dir = dirname(filename)
+    mkpath(dir)
+
+    open(filename, "w") do io
+        """
+        using BenchmarkTools
+
+        suite = BenchmarkGroup()
+        suite["foo"] = @benchmarkable sin(rand())
+        """ |> s -> write(io, s)
+    end
+
+    return Set{Vector{String}}([[splitpath(path)..., "foo"]])
+end
+
+end
+
+@testitem "default suite" setup=[ExamplePkg, SuiteCheck] begin
+    jogger, cleanup = ExamplePkg.create_jogger()
+    check_suite(jogger)
+    cleanup()
+end
+
+@testitem "Add benchmarks" setup=[ExamplePkg, SuiteCheck] begin
+    fakepkg, cleanup = ExamplePkg.create_example()
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    suite = add_benchmark(jogger, "bench_foo_$(rand(UInt16)).jl")
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger; add=suite)
+
+    # Add a non-benchmarking file (Should not be added)
+    add_benchmark(jogger, "foo_$(rand(UInt16)).jl")
+    check_suite(jogger; add=suite)
+
+    # Add another file (should not be added)
+    suite3 = add_benchmark(jogger, "bench_foo_$(rand(UInt16)).jl")
+    check_suite(jogger; add=suite)
+
+    # Regenerate jogger to get new suite -> Should now just be suite3 + suite
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger; add=union(suite, suite3))
+
+    cleanup()
+end
+
+@testitem "Benchmarks in subfolders" setup=[ExamplePkg, SuiteCheck] begin
+    # Add a subfolder -> Don't track
+    fakepkg, cleanup = ExamplePkg.create_example()
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    tempdir = mktempdir(jogger.BENCHMARK_DIR; cleanup=false)
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger)
+    rm(tempdir)
+
+    # Add an empty bench_ subfolder -> Ignore
+    tempdir = mktempdir(jogger.BENCHMARK_DIR; prefix="bench_", cleanup=false)
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger)
+
+    # Add a benchmark to a subfolder -> track
+    path = joinpath("bench_subdir_$(rand(UInt16))", "bench_foo_$(rand(UInt16)).jl")
+    suite = add_benchmark(jogger, path)
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger; add=suite)
+
+    # Two Levels Deep
+    dir = "bench_subdir_$(rand(UInt16))"
+    suite = union(
+        suite,
+        add_benchmark(jogger, joinpath(dir, "bench_foo_$(rand(UInt16)).jl")),
+        add_benchmark(jogger, joinpath(dir, "bench_l2", "bench_foo_$(rand(UInt16)).jl")),
+    )
+    add_benchmark(jogger, joinpath(dir, "skip_me.jl"))
+    jogger = ExamplePkg.create_jogger(fakepkg)
+    check_suite(jogger; add=suite)
+    cleanup()
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index cd9ad67..ebb5bb8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,35 +1,2 @@
-using Test
-using SafeTestsets
-using PkgJogger
-
-@testset "PkgJogger.jl" begin
-    # Check that all modules listed in PkgJogger.JOGGER_PKGS are loaded
-    # Do this first so later loadings don't pollute loaded_modules
-    @testset "Loaded Modules" begin
-        @testset "Check $(m.name) was loaded" for m in PkgJogger.JOGGER_PKGS
-            @test haskey(Base.loaded_modules, m)
-        end
-    end
-
-    # Load Example
-    example_path = joinpath(@__DIR__, "Example.jl")
-    if example_path ∉ LOAD_PATH
-        push!(LOAD_PATH, example_path)
-    end
-
-    # Run the rest of the unit testing suite
-    @safetestset "Smoke Tests" begin include("smoke.jl") end
-    @safetestset "Judging" begin include("judging.jl") end
-    @safetestset "CI Workflow" begin include("ci.jl") end
-    @safetestset "Locate Benchmarks" begin include("locate_benchmarks.jl") end
-    @safetestset "Tuning Suites" begin include("tune.jl") end
-    @safetestset "Backwards Compatibility" begin include("backward_compat.jl") end
-    @safetestset "Doc Tests" begin
-        using PkgJogger
-        using Documenter
-        doctest(PkgJogger)
-    end
-end
-
-# Tests should cleanup trials after finishing
-@test !isdir(joinpath(@__DIR__, "Example.jl", "benchmark", "trial"))
+using ReTestItems, PkgJogger
+runtests(PkgJogger)
diff --git a/test/smoke.jl b/test/smoke_tests.jl
similarity index 70%
rename from test/smoke.jl
rename to test/smoke_tests.jl
index 0a5f41a..b1d4272 100644
--- a/test/smoke.jl
+++ b/test/smoke_tests.jl
@@ -1,16 +1,13 @@
-using Test
-using Test: TestLogger
-using Logging: with_logger
-using PkgJogger
-using UUIDs
-using Example
-import BenchmarkTools
+@testitem "canonical" setup=[ExamplePkg, BenchmarkTests] begin
+    using PkgJogger
+    import Test
+    import UUIDs
+    import BenchmarkTools
 
-include("utils.jl")
-
-@testset "canonical" begin
-    @jog Example
-    @test @isdefined JogExample
+    # Create a jogger
+    Example, cleanup = create_example()
+    eval(Expr(:macrocall, Symbol("@jog"), LineNumberNode(@__LINE__, @__FILE__), Example))
+    JogExample = getproperty(@__MODULE__, Symbol(:Jog, Example))
 
     # Run Benchmarks
     r = JogExample.benchmark()
@@ -23,9 +20,6 @@ include("utils.jl")
     r = JogExample.run()
     @test typeof(r) <: BenchmarkTools.BenchmarkGroup
 
-    # BENCHMARK_DIR
-    @test JogExample.BENCHMARK_DIR == PkgJogger.benchmark_dir(Example)
-
     # Saving and Loading
     file = JogExample.save_benchmarks(r)
     @test isfile(file)
@@ -37,12 +31,13 @@ include("utils.jl")
     @testset "Jogger's load_benchmarks" begin
         uuid = get_uuid(file)
         r3 = JogExample.load_benchmarks(uuid)
-        r4 = JogExample.load_benchmarks(UUID(uuid))
+        r4 = JogExample.load_benchmarks(UUIDs.UUID(uuid))
         r5 = JogExample.load_benchmarks(:latest)
-        @test r3 == r4
+        @test results_match(r2, r3)
+        @test results_match(r3, r4)
+        @test results_match(r4, r5)
         @test r3["benchmarks"] == r
         @test r4["benchmarks"] == r
-        @test r2 == r3 == r4 == r5
 
         # Check that we error for invalid uuids
         @test_throws ErrorException JogExample.load_benchmarks("not-a-uuid")
@@ -62,11 +57,6 @@ include("utils.jl")
     # Test Judging
     @test_nowarn JogExample.judge(file, file)
 
-    # If this is a git repo, there should be a git entry
-    if isdir(joinpath(PKG_JOGGER_PATH, ".git"))
-        @test r2["git"] !== nothing
-    end
-
     # Test results location
     trial_dir = joinpath(JogExample.BENCHMARK_DIR, "trial")
     test_subfile(trial_dir, file)
@@ -77,22 +67,31 @@ include("utils.jl")
 
     # Test @test_benchmarks
     @testset "test_benchmarks" begin
-        ts = @test_benchmarks Example
+        ts = eval(Expr(
+            :macrocall,
+            Symbol("@test_benchmarks"),
+            LineNumberNode(@__LINE__, @__FILE__),
+            Example
+        ))
         @test ts isa Vector
         @test all(map(x -> x isa Test.AbstractTestSet, ts))
     end
 
     # No Benchmarks
     @test_throws LoadError @eval(@jog PkgJogger)
+
+    cleanup()
 end
 
-@testset "benchmark and save" begin
-    @jog Example
-    @test @isdefined JogExample
-    cleanup_example()
+@testitem "benchmark and save" setup=[ExamplePkg, BenchmarkTests] begin
+    using PkgJogger
+    using Test
+    Example, cleanup = ExamplePkg.create_example()
+    eval(Expr(:macrocall, Symbol("@jog"), LineNumberNode(@__LINE__, @__FILE__), Example))
+    JogExample = getproperty(@__MODULE__, Symbol(:Jog, Example))
 
-    logger = TestLogger()
-    with_logger(logger) do
+    logger = Test.TestLogger()
+    Test.with_logger(logger) do
         JogExample.benchmark(save = true)
     end
 
@@ -109,7 +108,7 @@ end
     # Currently only have one result Saved
     r_latest = JogExample.load_benchmarks(:latest)
     r_oldest = JogExample.load_benchmarks(:oldest)
-    @test r == r_latest == r_oldest
+    @test results_match(r, r_latest) && results_match(r, r_oldest)
 
     # Check that :latest and :oldest return different results
     # Now have two results saved, so :latest and :oldest should return different results
@@ -117,10 +116,7 @@ end
     JogExample.save_benchmarks(r["benchmarks"])
     r_latest = JogExample.load_benchmarks(:latest)
     r_oldest = JogExample.load_benchmarks(:oldest)
-    @test r != r_latest
-    @test r == r_oldest
-    @test r["benchmarks"] == r_latest["benchmarks"] == r_oldest["benchmarks"]
-
+    @test !results_match(r, r_latest)
+    @test results_match(r, r_oldest)
+    cleanup()
 end
-
-cleanup_example()
diff --git a/test/tune.jl b/test/tune_tests.jl
similarity index 66%
rename from test/tune.jl
rename to test/tune_tests.jl
index b1a8dcf..99dd155 100644
--- a/test/tune.jl
+++ b/test/tune_tests.jl
@@ -1,10 +1,7 @@
+@testsetup module TestTune
 using Test
 using BenchmarkTools
-using PkgJogger
-using Example
-
-include("utils.jl")
-
+export random_tune, random_tune!, test_tune
 macro test_tune(s, ref)
     quote
         s = $(esc(s))
@@ -12,7 +9,7 @@ macro test_tune(s, ref)
         s_keys = collect(keys(s))
         ref_keys = collect(keys(ref))
         @test isempty(setdiff(s_keys, ref_keys))
-        for ((k1, v1), (k2, v2)) in zip(leaves(s), leaves(ref))
+        for ((k1, v1), (k2, v2)) in zip(BenchmarkTools.leaves(s), BenchmarkTools.leaves(ref))
             @test v1.params.evals == v2.params.evals
         end
     end
@@ -25,29 +22,34 @@ function random_tune(suite)
 end
 
 function random_tune!(suite)
-    for (_, b) in leaves(suite)
+    for (_, b) in BenchmarkTools.leaves(suite)
         b.params.evals = rand(1:typemax(Int))
     end
     return suite
 end
+end
 
-@testset "unit tests" begin
-    jogger = @eval @jog Example
-    ref_suite = () -> deepcopy(jogger.suite())
+@testitem "unit tests" setup=[ExamplePkg, TestTune] begin
+    using Test
+    using BenchmarkTools
+    using PkgJogger
 
+    # Create Reference Tune
+    jogger, cleanup = ExamplePkg.create_jogger()
+    ref_suite = () -> deepcopy(jogger.suite())
     ref_tune = ref_suite()
     tune!(ref_tune)
 
     @testset "Fall back to BenchmarkTools.tune!" begin
-        @test_tune PkgJogger.tune!(ref_suite()) ref_tune
-        @test_tune PkgJogger.tune!(ref_suite(), nothing) ref_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite()) ref_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite(), nothing) ref_tune
         @test_throws AssertionError PkgJogger.tune!(ref_suite(), Dict())
     end
 
     @testset "Reuse prior tune" begin
         rand_tune = random_tune(ref_tune)
-        @test_tune PkgJogger.tune!(ref_suite(), rand_tune) rand_tune
-        @test_tune PkgJogger.tune!(ref_suite(), Dict("benchmarks" => rand_tune)) rand_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite(), rand_tune) rand_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite(), Dict("benchmarks" => rand_tune)) rand_tune
     end
 
     @testset "Partial Tune" begin
@@ -63,7 +65,7 @@ end
         expected_tune["bench_tune.jl"] = deepcopy(new_suite["bench_tune.jl"])
         tune!(expected_tune["bench_tune.jl"])
 
-        @test_tune PkgJogger.tune!(new_suite, rand_tune) expected_tune
+        TestTune.@test_tune PkgJogger.tune!(new_suite, rand_tune) expected_tune
     end
 
     @testset "Missing Tune" begin
@@ -71,7 +73,7 @@ end
         @testset "Empty Suite" begin
             new_suite = random_tune(ref_suite())
             ref = BenchmarkGroup()
-            @test_tune PkgJogger.tune!(new_suite, ref) ref_tune
+            TestTune.@test_tune PkgJogger.tune!(new_suite, ref) ref_tune
         end
 
         # Retune using a missing benchmark -> Only it should be tuned
@@ -80,13 +82,13 @@ end
             ref = random_tune(ref_suite())
 
             # Add a new benchmark to new_suite to be tunned
-            n, b = first(leaves(new_suite))
+            n, b = first(BenchmarkTools.leaves(new_suite))
             n[end] = rand()
             new_suite[n] = deepcopy(b)
 
             # Everything except the new benchmark should be tuned
             r = PkgJogger.tune!(new_suite, ref)
-            @test_tune r[ref] ref
+            TestTune.@test_tune r[ref] ref
         end
     end
 
@@ -97,7 +99,9 @@ end
         expected_tune = deepcopy(rand_tune)
         rand_tune["bench_tune.jl"] = deepcopy(first(rand_tune)[2])
 
-        @test_tune PkgJogger.tune!(ref_suite(), rand_tune) expected_tune
-        @test_tune PkgJogger.tune!(ref_suite(), Dict("benchmarks" => rand_tune)) expected_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite(), rand_tune) expected_tune
+        TestTune.@test_tune PkgJogger.tune!(ref_suite(), Dict("benchmarks" => rand_tune)) expected_tune
     end
+
+    cleanup()
 end
diff --git a/test/utils.jl b/test/utils.jl
deleted file mode 100644
index 3d5800d..0000000
--- a/test/utils.jl
+++ /dev/null
@@ -1,117 +0,0 @@
-using BenchmarkTools
-using Test
-
-# Reduce Benchmarking Duration for faster testing
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.1
-
-const PKG_JOGGER_PATH = joinpath(@__DIR__, "..") |> abspath
-
-function test_loaded_results(r::Dict)
-    @test haskey(r, "julia")
-    @test haskey(r, "system")
-    @test haskey(r, "datetime")
-    @test haskey(r, "benchmarks")
-    @test r["benchmarks"] isa BenchmarkTools.BenchmarkGroup
-    @testset "git" begin
-        @test haskey(r, "git")
-        if r["git"] !== nothing
-            @test haskey(r["git"], "commit")
-            @test haskey(r["git"], "is_dirty")
-            @test haskey(r["git"], "datetime")
-        end
-    end
-end
-
-"""
-    test_subfile(parent, child)
-
-Test that `child` is a child of `parent`
-"""
-function test_subfile(parent, child)
-    @testset "$child in $parent" begin
-        @test isfile(child)
-        @test isdir(parent)
-
-        # Get full path split into parts
-        parent_path = splitpath(abspath(parent))
-        child_path = splitpath(abspath(child))
-
-        # Check that parent is a root of child
-        n = length(parent_path)
-        @assert n < length(child_path)
-        @test all( parent_path .== child_path[1:n])
-    end
-end
-
-"""
-    get_uuid(filename)
-
-Extract benchmark UUID from filename
-"""
-function get_uuid(filename)
-    splitpath(filename)[end] |> x -> split(x, ".")[1]
-end
-
-"""
-    test_benchmark(target::BenchmarkGroup, ref)
-
-Checks that target and ref are from equivalent benchmarking suite
-"""
-function test_benchmark(target, ref::BenchmarkGroup)
-    @test typeof(target) <: BenchmarkGroup
-    @test isempty(setdiff(keys(target), keys(ref)))
-    map(test_benchmark, target, ref)
-end
-test_benchmark(target, ref) = @test typeof(target) <: typeof(ref)
-function test_benchmark(target, ref::BenchmarkTools.Trial)
-    @test typeof(target) <: BenchmarkTools.Trial
-    @test params(target) == params(ref)
-end
-
-function add_benchmark(pkg, path)
-
-    # Create Dummy Benchmark
-    filename = joinpath(PkgJogger.benchmark_dir(pkg), path)
-    dir = dirname(filename)
-    cleanup = isdir(dir) ? () -> rm(filename) : () -> rm(dir; recursive=true)
-    mkpath(dir)
-
-    open(filename, "w") do io
-        """
-        using BenchmarkTools
-
-        suite = BenchmarkGroup()
-        suite["foo"] = @benchmarkable sin(rand())
-        """ |> s -> write(io, s)
-    end
-
-    suite = Set([[splitpath(path)..., "foo"]])
-    return suite, cleanup
-end
-
-"""
-    cleanup_example()
-
-Remove generated files from Example.jl
-"""
-function cleanup_example()
-    example_dir = joinpath(PKG_JOGGER_PATH, "test", "Example.jl")
-    rm(joinpath(example_dir, "benchmark", "trial"); force=true, recursive=true)
-end
-
-import Base: ==
-"""
-    ==(a::Base.Sys.CPUinfo, b::Base.Sys.CPUinfo)
-
-Mark two CPUinfo objects as equal if all of their fields match
-"""
-function ==(a::Base.Sys.CPUinfo, b::Base.Sys.CPUinfo)
-    for f in propertynames(a)
-        af = getproperty(a, f)
-        bf = getproperty(b, f)
-        if af != bf
-            return false
-        end
-    end
-    return true
-end
diff --git a/test/utils_test.jl b/test/utils_test.jl
new file mode 100644
index 0000000..3f9c623
--- /dev/null
+++ b/test/utils_test.jl
@@ -0,0 +1,162 @@
+@testsetup module BenchmarkTests
+
+using Test
+using BenchmarkTools
+
+export test_loaded_results, test_subfile, get_uuid, test_benchmark, results_match
+
+# Reduce Benchmarking Duration for faster testing
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.1
+
+function test_loaded_results(r::Dict)
+    @test haskey(r, "julia")
+    @test haskey(r, "system")
+    @test haskey(r, "datetime")
+    @test haskey(r, "benchmarks")
+    @test r["benchmarks"] isa BenchmarkTools.BenchmarkGroup
+    @testset "git" begin
+        @test haskey(r, "git")
+        if r["git"] !== nothing
+            @test haskey(r["git"], "commit")
+            @test haskey(r["git"], "is_dirty")
+            @test haskey(r["git"], "datetime")
+        end
+    end
+end
+
+function results_match(x::Dict, y::Dict)
+    x["benchmarks"] == y["benchmarks"] || return false
+    x["julia"] == y["julia"] || return false
+    x["pkgjogger"] == y["pkgjogger"] || return false
+    x["datetime"] == y["datetime"] || return false
+    return true
+end
+
+"""
+    test_subfile(parent, child)
+
+Test that `child` is a child of `parent`
+"""
+function test_subfile(parent, child)
+    @testset "subfile-check" begin
+        @test isfile(child)
+        @test isdir(parent)
+
+        # Get full path split into parts
+        parent_path = splitpath(abspath(parent))
+        child_path = splitpath(abspath(child))
+
+        # Check that parent is a root of child
+        n = length(parent_path)
+        @assert n < length(child_path)
+        @test all( parent_path .== child_path[1:n])
+    end
+end
+
+"""
+    get_uuid(filename)
+
+Extract benchmark UUID from filename
+"""
+function get_uuid(filename)
+    splitpath(filename)[end] |> x -> split(x, ".")[1]
+end
+
+"""
+    test_benchmark(target::BenchmarkGroup, ref)
+
+Checks that target and ref are from equivalent benchmarking suite
+"""
+function test_benchmark(target, ref::BenchmarkGroup)
+    @test typeof(target) <: BenchmarkGroup
+    @test isempty(setdiff(keys(target), keys(ref)))
+    map(test_benchmark, target, ref)
+end
+test_benchmark(target, ref) = @test typeof(target) <: typeof(ref)
+function test_benchmark(target, ref::BenchmarkTools.Trial)
+    @test typeof(target) <: BenchmarkTools.Trial
+    @test params(target) == params(ref)
+end
+
+end
+
+@testsetup module ExamplePkg
+using PkgJogger
+using TOML
+using UUIDs
+using Random
+using Pkg
+export create_example, add_benchmark
+
+function copy_fake(src, dst, fake)
+    lines = open(readlines, src, "r")
+    lines = map(l -> replace(l, r"Example" => fake), lines)
+    open(dst, "w") do io
+        println.(io, lines)
+    end
+    return dst
+end
+
+function create_jogger()
+    Example, cleanup = create_example()
+    jogger = create_jogger(Example)
+    return jogger, cleanup
+end
+
+function create_jogger(pkg::Symbol)
+    eval(Expr(:macrocall, Symbol("@jog"), LineNumberNode(@__LINE__, @__FILE__), pkg))
+    return getproperty(@__MODULE__, Symbol(:Jog, pkg))
+end
+
+# Create Dummy Example Package for Testing
+function create_example()
+    # Copy Example package to temp directory
+    dir = mktempdir()
+    fakename = "Example" * randstring(8)
+    chmod(dir, 0o700)
+    src_dir =  joinpath(PkgJogger.pkgdir(PkgJogger), "test", "Example.jl")
+    for (root, _, files) in walkdir(src_dir)
+        dst_root = abspath(joinpath(dir, relpath(root, src_dir)))
+        mkpath(dst_root; mode=0o700)
+        for file in files
+            dst = joinpath(dst_root, file)
+            copy_fake(joinpath(root, file), dst, fakename)
+            chmod(dst, 0o600)
+        end
+    end
+
+    # Setup Finalizer to cleanup
+    function cleanup()
+        rm(dir; force=true, recursive=true)
+        filter!(!=(dir), LOAD_PATH)
+    end
+
+    # Fake a new module
+    project_file = joinpath(dir, "Project.toml")
+    project = TOML.parsefile(project_file)
+    project["name"] = fakename
+    project["uuid"] = string(UUIDs.uuid4())
+    open(io -> TOML.print(io, project), project_file, "w")
+    mv(joinpath(dir, "src", "Example.jl"), joinpath(dir, "src", fakename * ".jl"))
+
+    # Create dummy module
+    name = Symbol(fakename)
+    try
+        @info "Creating $fakename for testing in $dir"
+        project = Base.active_project()
+        Pkg.activate(dir)
+        Pkg.instantiate(; verbose=false)
+        push!(LOAD_PATH, dir)
+        @eval import $name
+        Pkg.activate(project)
+    catch e
+        cleanup()
+        rethrow(e)
+    end
+
+    return name, cleanup
+end
+
+
+
+end

From dd71030eea6f31679a6e43403f1acf036d23fc4b Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Thu, 14 Dec 2023 21:18:18 -0500
Subject: [PATCH 2/3] breaking: increase min julia to 1.9

---
 .github/workflows/CI.yml | 1 -
 Project.toml             | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 269a1e7..111f07e 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -12,7 +12,6 @@ jobs:
       matrix:
         version:
           - '*'     # Latest Release
-          - '~1.6'  # Current LTS
         os:
           - ubuntu-latest
           - windows-latest
diff --git a/Project.toml b/Project.toml
index 62cad62..413e628 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,7 +23,7 @@ JSON = "0.21"
 Pkg = "1.9"
 Revise = "3"
 Statistics = "1.9"
-julia = "1.6"
+julia = "1.9"
 
 [extras]
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
@@ -35,4 +35,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [targets]
-test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random"]
+test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg"]

From 3da62ff230b0d296660018485a6c9f33d31511bb Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Fri, 15 Dec 2023 09:28:54 -0500
Subject: [PATCH 3/3] fix: drop strict (depreciated) and add compat for docs

---
 docs/Project.toml | 3 +++
 docs/make.jl      | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 2054432..22b972f 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,3 +3,6 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Example = "4b09cd0b-9172-4840-a79f-b48550c7f881"
 PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
+
+[compat]
+Documenter = "1.2"
diff --git a/docs/make.jl b/docs/make.jl
index f291f1a..9675630 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -45,7 +45,7 @@ makedocs(;
         "Continuous Benchmarking" => "ci.md",
         "Reference" => "reference.md",
     ],
-    strict=true,
+    checkdocs=:all,
 )
 
 deploydocs(;