From c54672d18307ef4a2e800e7380a58ecc38d1dafc Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Wed, 16 Aug 2023 16:15:42 -0400
Subject: [PATCH 1/6] breaking: Add support for running a subset of the suite

Can now do `JogAwesome.benchmark("bench_foo.jl")` to only run
benchmarks within `bench_foo.jl`.

The downside is that specifying arguments to `BenchmarkTools.run` is
no longer possible. That is a breaking change, but the lost is the
ability to run with different parameters. So hopefully minor.
---
 docs/src/reference.md |   1 +
 src/jogger.jl         | 310 ++++++++++++++++++++++++++----------------
 src/utils.jl          |  37 ++++-
 3 files changed, 228 insertions(+), 120 deletions(-)

diff --git a/docs/src/reference.md b/docs/src/reference.md
index 35abad3..fb71ac4 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -6,6 +6,7 @@ PkgJogger.locate_benchmarks
 PkgJogger.judge
 PkgJogger.test_benchmarks
 PkgJogger.tune!
+PkgJogger.getsuite
 ```
 
 ## Internal
diff --git a/src/jogger.jl b/src/jogger.jl
index 19c5fdb..0f9e8d4 100644
--- a/src/jogger.jl
+++ b/src/jogger.jl
@@ -31,6 +31,12 @@ using AwesomePkg, PkgJogger
 results = JogAwesomePkg.benchmark()
 file = JogAwesomePkg.save_benchmarks(results)
 ```
+
+Compare benchmarking results to the latest saved results
+```julia
+results = JogAwesomePkg.benchmark()
+JogAwesomePkg.judge(results, :latest)
+```
 """
 macro jog(pkg)
     # Module Name
@@ -41,7 +47,6 @@ macro jog(pkg)
     if !isdir(bench_dir)
         error("No benchmark directory found for $pkg. Expected: $bench_dir")
     end
-
     # Generate Using Statements
     using_statements = Expr[]
     for pkg in JOGGER_PKGS
@@ -68,150 +73,217 @@ macro jog(pkg)
     # Generate Module for Jogging pkg
     quote
         @eval module $modname
-            using $pkg
-            $(using_statements...)
+        using $pkg
+        $(using_statements...)
 
-            # Set Revise Mode and put submodules here
-            __revise_mode__ = :eval
-            $(suite_exp...)
+        # Set Revise Mode and put submodules here
+        __revise_mode__ = :eval
+        $(suite_exp...)
 
-            """
-            BENCHMARK_DIR
+        """
+        BENCHMARK_DIR
 
-            Directory of benchmarks for $($pkg)
-            """
-            const BENCHMARK_DIR = $bench_dir
+        Directory of where benchmarking results are saved for $($pkg)
+        """
+        const BENCHMARK_DIR = $bench_dir
 
-            """
-                suite()::BenchmarkGroup
+        """
+            suite()::BenchmarkGroup
 
-            The BenchmarkTools suite for $($pkg)
-            """
-            function suite()
-                suite = BenchmarkTools.BenchmarkGroup()
-                $(suite_expressions...)
-                suite
-            end
+        The BenchmarkTools suite for $($pkg)
+        """
+        function suite()
+            suite = BenchmarkTools.BenchmarkGroup()
+            $(suite_expressions...)
+            suite
+        end
 
-            # Dispatch calls to tune! here so we can use the jogger variant of load_benchmarks
-            __tune!(group::BenchmarkTools.BenchmarkGroup, ref::BenchmarkTools.BenchmarkGroup; kwargs...) = PkgJogger.tune!(group, ref; kwargs...)
-            __tune!(group::BenchmarkTools.BenchmarkGroup, ref; kwargs...) = PkgJogger.tune!(group, load_benchmarks(ref); kwargs...)
-            __tune!(group::BenchmarkTools.BenchmarkGroup, ::Nothing; kwargs...) = BenchmarkTools.tune!(group; kwargs...)
-
-            """
-                benchmark(; verbose = false, save = false, ref = nothing)
-
-            Warmup, tune and run the benchmarking suite for $($pkg).
-
-            If `save = true`, will save the results using [`$($mod_str).save_benchmarks`](@ref)
-            and display the filename using `@info`.
-
-            To reuse prior tuning results set `ref` to a BenchmarkGroup or suitable identifier
-            for [`$($mod_str).load_benchmarks`](@ref). See [`PkgJogger.tune!`](@ref) for
-            more information about re-using tuning results.
-            """
-            function benchmark(; verbose = false, save = false, ref = nothing)
-                s = suite()
-                __tune!(s, ref; verbose = verbose)
-                results = BenchmarkTools.run(s; verbose = verbose)
-                if save
-                    filename = save_benchmarks(results)
-                    @info "Saved results to $filename"
-                end
-                return results
-            end
+        """
+            suite(select...)
 
-            """
-                run(args...; verbose::Bool = false, kwargs)
+        Returns the benchmarking suite for $($pkg), optionally filtering based on `select...`.
+        At it's simplest, `$($mod_str).suite(a, b, ...)` is equivalent to `$($mod_str).suite()[a][b]...`
 
-            Run the benchmarking suite for $($pkg). See
-            [`BenchmarkTools.run`](https://juliaci.github.io/BenchmarkTools.jl/stable/reference/#Base.run)
-            for more options
-            """
-            function run(args...; verbose = false, kwargs...)
-                BenchmarkTools.run(suite(), args...; verbose = verbose, kwargs...)
-            end
+        ## Supported Indices
+
+        - `:` - Accepts any entry at that level in the tree
+        - `r"Regexp"` - Accepts any entry matching the regular-expression
+        - `key::Any` - Accepts any entry with a matching `key`
+        - `@tagged` - Filters the suite to only include `BenchmarkGroup`s with a matching tag.
+        See [Indexing into a BenchmarkGroup using @tagged](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Indexing-into-a-BenchmarkGroup-using-@tagged)
 
-            """
-                save_benchmarks(results::BenchmarkGroup)::String
+        !!! warning
+            An entry in `suite` must match all indices to be returned. For example,
+            `$($mod_str).suite(:, "bar")` would exclude a benchmark at `suite["bat"]` as
+            the benchmark isn't matched by **both** `:` and `"bar"`.
 
-            Saves benchmarking results for $($pkg) to `BENCHMARK_DIR/trial/uuid4().bson.gz`,
-            and returns the path to the saved results
+        ## Examples
+        - The suite in `bench_foo.jl`: `$($mod_str).suite("bench_foo.jl")`
+        - Any benchmark matching `r"feature"` in any `bench_*.jl`: `$($mod_str).suite(:, r"feature")`
 
-            > Meta Data such as cpu load, time stamp, etc. are collected on save, not during
-            > benchmarking. For representative metadata, results should be saved immediately
-            > after benchmarking.
+        """
+        suite(select...) = PkgJogger.getsuite(suite(), select...)
 
-            Results can be loaded with [`PkgJogger.load_benchmarks`](@ref) or
-            [`$($mod_str).load_benchmarks`](@ref)
+        # Dispatch calls to tune! here so we can use the jogger variant of load_benchmarks
+        __tune!(group::BenchmarkTools.BenchmarkGroup, ref::BenchmarkTools.BenchmarkGroup; kwargs...) = PkgJogger.tune!(group, ref; kwargs...)
+        __tune!(group::BenchmarkTools.BenchmarkGroup, ref; kwargs...) = PkgJogger.tune!(group, load_benchmarks(ref); kwargs...)
+        __tune!(group::BenchmarkTools.BenchmarkGroup, ::Nothing; kwargs...) = BenchmarkTools.tune!(group; kwargs...)
 
-            ## Example
+        """
+            benchmark([select...]; verbose = false, save = false, ref = nothing)
 
-            Running a benchmark suite and then saving the results
+        Warmup, tune and run the benchmarking suite for $($pkg).
 
-            ```julia
-            r = $($mod_str).benchmark()
-            filename = $($mod_str).save_benchmarks(r)
-            ```
+        If `save = true`, will save the results using [`$($mod_str).save_benchmarks`](@ref)
+        and display the filename using `@info`.
 
-            > Equivalently: `$($mod_str).benchmark(; save = true)`
+        To reuse prior tuning results set `ref` to a BenchmarkGroup or suitable identifier
+        for [`$($mod_str).load_benchmarks`](@ref). See [`PkgJogger.tune!`](@ref) for
+        more information about re-using tuning results.
 
-            """
-            function save_benchmarks(results)
-                filename = joinpath(BENCHMARK_DIR, "trial", "$(UUIDs.uuid4()).bson.gz")
-                PkgJogger.save_benchmarks(filename, results)
-                filename
+        Optionally, benchmark a subset of the full suite by providing a set of filters.
+        See [`PkgJogger.getsuite`](@ref) for more information.
+        """
+        function benchmark(select...; verbose=false, save=false, ref=nothing)
+            s = suite(select...)
+            BenchmarkTools.warmup(s; verbose)
+            __tune!(s, ref; verbose=verbose)
+            results = BenchmarkTools.run(s; verbose=verbose)
+            if save
+                filename = save_benchmarks(results)
+                @info "Saved results to $filename"
             end
+            return results
+        end
 
-            """
-                load_benchmarks(id)::Dict
+        """
+            run([select...]; verbose::Bool = false, kwargs)
 
-            Loads benchmarking results for $($pkg) from `BENCHMARK_DIR/trial` based on `id`.
-            The following are supported `id` types:
+        Run the benchmarking suite for $($pkg). See
+        [`BenchmarkTools.run`](https://juliaci.github.io/BenchmarkTools.jl/stable/reference/#Base.run)
+        for more options
 
-                - `filename::String`: Loads results from `filename`
-                - `uuid::Union{String, UUID}`: Loads results with the given UUID
-                - `:latest` loads the latest (By mtime) results from `BENCHMARK_DIR/trial`
-                - `:oldest` loads the oldest (By mtime) results from `BENCHMARK_DIR/trial`
-            """
-            load_benchmarks(id) = PkgJogger.load_benchmarks(joinpath(BENCHMARK_DIR, "trial"), id)
+        Optionally, run a subset of the full suite by providing a set of filters.
+        See [`PkgJogger.getsuite`](@ref) for more information.
+        """
+        function run(select...; verbose=false, kwargs...)
+            BenchmarkTools.run(suite(select...); verbose=verbose, kwargs...)
+        end
 
-            """
-                judge(new, old; metric=Statistics.median, kwargs...)
+        """
+            save_benchmarks(results::BenchmarkGroup)::String
 
-            Compares benchmarking results from `new` vs `old` for regressions/improvements
-            using `metric` as a basis. Additional `kwargs` are passed to `BenchmarkTools.judge`
+        Saves benchmarking results for $($pkg) to `BENCHMARK_DIR/trial/uuid4().bson.gz`,
+        and returns the path to the saved results
 
-            Identical to [`PkgJogger.judge`](@ref), but accepts any identifier supported by
-            [`$($mod_str).load_benchmarks`](@ref)
+        > Meta Data such as cpu load, time stamp, etc. are collected on save, not during
+        > benchmarking. For representative metadata, results should be saved immediately
+        > after benchmarking.
 
-            ## Examples
+        Results can be loaded with [`PkgJogger.load_benchmarks`](@ref) or
+        [`$($mod_str).load_benchmarks`](@ref)
 
-            ```julia
-            # Judge the latest results vs. the oldest
-            $($mod_str).judge(:latest, :oldest)
-            [...]
-            ```
+        ## Examples
 
-            ```julia
-            # Judge results by UUID
-            $($mod_str).judge("$(UUIDs.uuid4())", "$(UUIDs.uuid4())")
-            [...]
-            ```
+        Running a benchmark suite and then saving the results
 
-            ```julia
-            # Judge using the minimum, instead of the median, time
-            $($mod_str).judge("path/to/results.bson.gz", "$(UUIDs.uuid4())"; metric=minimum)
-            [...]
-            ```
+        ```julia
+        r = $($mod_str).benchmark()
+        filename = $($mod_str).save_benchmarks(r)
+        ```
 
-            """
-            function judge(new, old; kwargs...)
-                PkgJogger.judge(_get_benchmarks(new), _get_benchmarks(old); kwargs...)
-            end
-            _get_benchmarks(b) = load_benchmarks(b)
-            _get_benchmarks(b::Dict) = PkgJogger._get_benchmarks(b)
-            _get_benchmarks(b::BenchmarkTools.BenchmarkGroup) = b
+        > Equivalently: `$($mod_str).benchmark(; save = true)`
+
+        """
+        function save_benchmarks(results)
+            filename = joinpath(BENCHMARK_DIR, "trial", "$(UUIDs.uuid4()).bson.gz")
+            PkgJogger.save_benchmarks(filename, results)
+            filename
+        end
+
+        """
+            load_benchmarks(id)::Dict
+
+        Loads benchmarking results for $($pkg) from `BENCHMARK_DIR/trial` based on `id`.
+        The following are supported `id` types:
+
+            - `filename::String`: Loads results from `filename`
+            - `uuid::Union{String, UUID}`: Loads results with the given UUID
+            - `:latest` loads the latest (By mtime) results from `BENCHMARK_DIR/trial`
+            - `:oldest` loads the oldest (By mtime) results from `BENCHMARK_DIR/trial`
+        """
+        load_benchmarks(id) = PkgJogger.load_benchmarks(joinpath(BENCHMARK_DIR, "trial"), id)
+
+        """
+            judge(new, old; metric=Statistics.median, kwargs...)
+
+        Compares benchmarking results from `new` vs `old` for regressions/improvements
+        using `metric` as a basis. Additional `kwargs` are passed to `BenchmarkTools.judge`
+
+        Identical to [`PkgJogger.judge`](@ref), but accepts any identifier supported by
+        [`$($mod_str).load_benchmarks`](@ref)
+
+        ## Examples
+
+        ```julia
+        # Judge the latest results vs. the oldest
+        $($mod_str).judge(:latest, :oldest)
+        [...]
+        ```
+
+        ```julia
+        # Judge results by UUID
+        $($mod_str).judge("$(UUIDs.uuid4())", "$(UUIDs.uuid4())")
+        [...]
+        ```
+
+        ```julia
+        # Judge using the minimum, instead of the median, time
+        $($mod_str).judge("path/to/results.bson.gz", "$(UUIDs.uuid4())"; metric=minimum)
+        [...]
+        ```
+        """
+        function judge(new, old; kwargs...)
+            PkgJogger.judge(_get_benchmarks(new), _get_benchmarks(old); kwargs...)
+        end
+        _get_benchmarks(b) = load_benchmarks(b)
+        _get_benchmarks(b::Dict) = PkgJogger._get_benchmarks(b)
+        _get_benchmarks(b::BenchmarkTools.BenchmarkGroup) = b
+
+        """
+            profile(select...; profiler=:cpu, verbose=false, ref=nothing, kwargs...)
+
+        Profile the benchmarking suite using the given `profiler`, the benchmark is
+        warmed up, tuned and then ran under the profile.
+
+        Like [`$($mod_str).benchmark`](@ref), `ref` can be used to reuse the results
+        of a prior run during tuning.
+
+        Some profilers support additional keyword arguments, see below for details.
+
+        !!! info
+            At this time, `PkgJogger` only supports profiling a single benchmark
+            at a time. Automated saving is not supported.
+
+        # Available Profilers
+        The following profilers are currently supported. Additional profilers
+        are available via package extensions.
+
+
+        $(@doc PkgJogger.profile)
+
+        ---
+
+        !!! info
+            This list was generated on jogger creation (`@jog $($pkg)`),
+            and my not reflect all loaded extensions. See [`PkgJogger.profile`](@ref)
+            or regenerate the jogger for additional information
+
+        """
+        function profile(select...; profiler::Symbol=:cpu, kwargs...)
+            s = suite(select...)
+            PkgJogger.profile(s, profiler; kwargs...)
+        end
 
         end
     end
@@ -230,15 +302,15 @@ function build_module(s::BenchModule)
     # benchmarking module. Otherwise, don't track changes.
     revise_id = PkgId(UUID("295af30f-e4ad-537b-8983-00126c2a3abe"), "Revise")
     if haskey(Base.loaded_modules, revise_id)
-        revise_exp = :( Base.loaded_modules[$revise_id].track($modname, $(s.filename)) )
+        revise_exp = :(Base.loaded_modules[$revise_id].track($modname, $(s.filename)))
     else
         revise_exp = :()
     end
 
     module_expr = quote
         module $modname
-            __revise_mode__ = :eval
-            include($(s.filename))
+        __revise_mode__ = :eval
+        include($(s.filename))
         end
         $(revise_exp)
     end
diff --git a/src/utils.jl b/src/utils.jl
index 0695499..80b5870 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -71,6 +71,41 @@ function locate_benchmarks(path, name=String[])
 end
 locate_benchmarks(pkg::Module) = benchmark_dir(pkg) |> locate_benchmarks
 
+"""
+    getsuite(suite, [select...])
+
+Index into `suite` and return the matching entries in suite.
+At it's simplest, `getsuite(suite, "foo", "bar",...)` is the same as `suite["foo"]["bar"]...`
+
+# Supported Indices
+
+- `:` - Accepts any entry at that level in the tree
+- `r"Regexp"` - Accepts any entry matching the regular-expression
+- `key::Any` - Accepts any entry with a matching `key`
+- `@tagged` - Filters the suite to only include `BenchmarkGroup`s with a matching tag.
+  See [Indexing into a BenchmarkGroup using @tagged](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Indexing-into-a-BenchmarkGroup-using-@tagged)
+
+!!! warning
+    An entry in `suite` must match all indices to be returned. For example,
+    `getsuite(s, :, "bar")` would exclude a benchmark at `s["bat"]` as
+    the benchmark isn't matched by **both** `:` and `"bar"`.
+"""
+getsuite(suite::BenchmarkGroup) = suite
+getsuite(suite::BenchmarkGroup, ::Colon) = suite
+getsuite(suite::BenchmarkGroup, r::Regex) = filter(!isnothing ∘ Base.Fix1(match, r) ∘ first, suite)
+getsuite(suite::BenchmarkGroup, f::BenchmarkTools.TagFilter) = suite[f]
+getsuite(::BenchmarkTools.Benchmark, ::Any) = nothing
+getsuite(suite::BenchmarkGroup, idx) = !haskey(suite, idx) ? BenchmarkGroup() : BenchmarkGroup(idx => suite[idx])
+function getsuite(suite::BenchmarkGroup, idx, rest...)
+    src = getsuite(suite, idx)
+    dst = similar(src)
+    for (k, v) in src
+        v = getsuite(v, rest...)
+        !isnothing(v) && !isempty(v) && setindex!(dst, v, k)
+    end
+    return dst
+end
+
 """
     judge(new, old; metric=Statistics.median, kwargs...)
 
@@ -87,7 +122,7 @@ Effectively a convenience wrapper around `load_benchmarks` and `BenchmarkTools.j
 function judge(
     new::BenchmarkTools.BenchmarkGroup,
     old::BenchmarkTools.BenchmarkGroup;
-    metric = Statistics.median,
+    metric=Statistics.median,
     kwargs...
 )
     new_estimate = metric(new)

From 5250e2220291f7c11ba54121c7cf9307e1a9ec1f Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Thu, 17 Aug 2023 11:04:57 -0400
Subject: [PATCH 2/6] feat: support profiling individual benchmarks

Built-in support for `Profile.@profile` and `Profile.Allocs.@profile`,
plus support for `CUDA.@profile` via a package extensions

Package Extensions are supported on 1.9 forward (Won't be backporting
with Requires)

Quick demo:

```julia
@jog Example
using PkgJogger, Example
JogExample.profile("bench_timer.jl", "1ms") # CPU Profiling
JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs) # Allocs
```
---
 Project.toml            | 21 ++++++++++++--
 ext/PkgJoggerCUDAExt.jl | 26 +++++++++++++++++
 src/PkgJogger.jl        |  2 ++
 src/jogger.jl           |  1 -
 src/profile.jl          | 64 +++++++++++++++++++++++++++++++++++++++++
 src/utils.jl            | 18 ++----------
 test/profile.jl         | 40 ++++++++++++++++++++++++++
 7 files changed, 154 insertions(+), 18 deletions(-)
 create mode 100644 ext/PkgJoggerCUDAExt.jl
 create mode 100644 src/profile.jl
 create mode 100644 test/profile.jl

diff --git a/Project.toml b/Project.toml
index 7f68fd6..15f3c45 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "PkgJogger"
 uuid = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
 authors = ["Alexius Wadell <awadell@gmail.com> and contributors"]
-version = "0.5.1"
+version = "0.6.0"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
@@ -11,21 +11,38 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
+[weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f"
+
+[extensions]
+PkgJoggerCUDAExt = ["CUDA", "NVTX"]
+
 [compat]
 BSON = "0.3"
 BenchmarkTools = "1.5"
+CUDA = "5"
 CodecZlib = "0.7"
+Dates = "1.9"
 JSON = "0.21"
+LibGit2 = "1.9"
+NVTX = "0.3"
 Pkg = "1.9"
+Profile = "1.9"
 Revise = "3"
 Statistics = "1.9"
+Test = "1.9"
+UUIDs = "1.9"
 julia = "1.9"
 
 [extras]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
@@ -35,4 +52,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [targets]
-test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg"]
+test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg", "NVTX", "CUDA"]
diff --git a/ext/PkgJoggerCUDAExt.jl b/ext/PkgJoggerCUDAExt.jl
new file mode 100644
index 0000000..8ee8027
--- /dev/null
+++ b/ext/PkgJoggerCUDAExt.jl
@@ -0,0 +1,26 @@
+module PkgJoggerCUDAExt
+
+using PkgJogger
+using CUDA
+using NVTX
+
+"""
+    profiler=:cuda
+
+Profiles the benchmark using [`CUDA.@profile`](@ref).
+
+!!! warning
+    This only activates the CUDA profiler, you need to launch the profiler externally.
+    See [CUDA Profiling](https://cuda.juliagpu.org/stable/development/profiling/) for documentation.
+
+"""
+function PkgJogger.profile(::Val{Symbol(:cuda)}, id, b::PkgJogger.BenchmarkTools.Benchmark; verbose)
+    id_str = join(id, "/")
+    CUDA.@profile begin
+        NVTX.@range id_str begin
+            PkgJogger.BenchmarkTools.run(b)
+        end
+    end
+end
+
+end
diff --git a/src/PkgJogger.jl b/src/PkgJogger.jl
index dd5dc27..132fe6c 100644
--- a/src/PkgJogger.jl
+++ b/src/PkgJogger.jl
@@ -10,6 +10,7 @@ using Dates
 using LibGit2
 using Statistics
 using Test
+using Profile
 
 export @jog, @test_benchmarks
 
@@ -30,6 +31,7 @@ const PKG_JOGGER_VER = VersionNumber(
 )
 
 include("utils.jl")
+include("profile.jl")
 include("jogger.jl")
 include("ci.jl")
 
diff --git a/src/jogger.jl b/src/jogger.jl
index 0f9e8d4..7db1bf8 100644
--- a/src/jogger.jl
+++ b/src/jogger.jl
@@ -146,7 +146,6 @@ macro jog(pkg)
         """
         function benchmark(select...; verbose=false, save=false, ref=nothing)
             s = suite(select...)
-            BenchmarkTools.warmup(s; verbose)
             __tune!(s, ref; verbose=verbose)
             results = BenchmarkTools.run(s; verbose=verbose)
             if save
diff --git a/src/profile.jl b/src/profile.jl
new file mode 100644
index 0000000..288258e
--- /dev/null
+++ b/src/profile.jl
@@ -0,0 +1,64 @@
+function profile(suite, profiler::Symbol; verbose=false, ref=nothing, kwargs...)
+    leaf = leaves(suite)
+    @assert length(leaf) == 1 "Profiling Support is limited to one benchmark at a time"
+    id, benchmark = first(leaf)
+    warmup(suite; verbose)
+    tune!(suite, ref)
+    profile(Val(profiler), id, benchmark; verbose, kwargs...)
+end
+
+profile(p::Val, args...) = error(
+    """Unknown profiler $p.
+    Did you forget to load it's dependencies?
+    See [`PkgJogger.profile`](@ref) for more information
+    """)
+
+function __profiling_loop(start, stop, benchmark)
+    start_time = time()
+    params = benchmark.params
+    quote_vals = benchmark.quote_vals
+    sample = 0
+    while (time() - start_time) <= params.seconds && sample <= params.samples
+        params.gcsample && BenchmarkTools.gcscrub()
+        start()
+        try
+            benchmark.samplefunc(quote_vals, params)
+        finally
+            stop()
+        end
+        sample += 1
+    end
+    return nothing
+end
+
+"""
+    profiler=:cpu
+
+Profiles the benchmark using [`Profile.@profile`](@ref)
+"""
+function profile(::Val{Symbol(:cpu)}, id, b::BenchmarkTools.Benchmark; verbose)
+    Profile.clear()
+    __profiling_loop(Profile.start_timer, Profile.stop_timer, b)
+    verbose && Profile.print()
+    return nothing
+end
+
+if isdefined(Profile, :Allocs)
+    @doc """
+         profiler=:allocs
+
+    Profiles memory allocations using the built-in [`Profile.Allocs.@profile`](@ref)
+
+    Accepts `sample_rate` as a kwarg to control the rate of recordings. A rate of 1.0 will
+    record everything; 0.0 will record nothing. See [`Profile.Allocs.@profile`](@ref) for more.
+
+    !!! compat "Julia 1.8"
+        The allocation profiler was added in Julia 1.8
+    """
+    function profile(::Val{Symbol(:allocs)}, id, b::BenchmarkTools.Benchmark; verbose, sample_rate=0.0001)
+        Profile.Allocs.clear()
+        start = () -> Profile.Allocs.start(; sample_rate)
+        __profiling_loop(start, Profile.Allocs.stop, b)
+        return nothing
+    end
+end
diff --git a/src/utils.jl b/src/utils.jl
index 80b5870..27baec5 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -71,24 +71,12 @@ function locate_benchmarks(path, name=String[])
 end
 locate_benchmarks(pkg::Module) = benchmark_dir(pkg) |> locate_benchmarks
 
+_SELECT_DOCS = """
+"""
 """
     getsuite(suite, [select...])
 
-Index into `suite` and return the matching entries in suite.
-At it's simplest, `getsuite(suite, "foo", "bar",...)` is the same as `suite["foo"]["bar"]...`
-
-# Supported Indices
-
-- `:` - Accepts any entry at that level in the tree
-- `r"Regexp"` - Accepts any entry matching the regular-expression
-- `key::Any` - Accepts any entry with a matching `key`
-- `@tagged` - Filters the suite to only include `BenchmarkGroup`s with a matching tag.
-  See [Indexing into a BenchmarkGroup using @tagged](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Indexing-into-a-BenchmarkGroup-using-@tagged)
-
-!!! warning
-    An entry in `suite` must match all indices to be returned. For example,
-    `getsuite(s, :, "bar")` would exclude a benchmark at `s["bat"]` as
-    the benchmark isn't matched by **both** `:` and `"bar"`.
+$(_SELECT_DOCS)
 """
 getsuite(suite::BenchmarkGroup) = suite
 getsuite(suite::BenchmarkGroup, ::Colon) = suite
diff --git a/test/profile.jl b/test/profile.jl
new file mode 100644
index 0000000..b718605
--- /dev/null
+++ b/test/profile.jl
@@ -0,0 +1,40 @@
+using Test
+using Profile
+using CUDA
+using NVTX
+
+include("utils.jl")
+
+@testset "CPU profiler" begin
+    @jog Example
+    Profile.clear()
+    JogExample.profile("bench_timer.jl", "1ms")
+    @test Profile.is_buffer_full() == false
+    @test Profile.len_data() > 0
+    @test occursin("profiler=:cpu", string(@doc(JogExample.profile)))
+end
+
+@testset "Allocs profiler" begin
+    @jog Example
+    Profile.Allocs.clear()
+    @test isempty(Profile.Allocs.fetch().allocs)
+    JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=1)
+    @test !isempty(Profile.Allocs.fetch().allocs)
+    @test occursin("profiler=:allocs", string(@doc(JogExample.profile)))
+
+    @testset "sample_rate" begin
+        Profile.Allocs.clear()
+        JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=0)
+        @test isempty(Profile.Allocs.fetch().allocs)
+    end
+end
+
+@testset "CUDA profiler" begin
+    @jog Example
+    mktempdir() do cwd
+        cd(cwd) do
+            JogExample.profile("bench_timer.jl", "1ms"; profiler=:cuda)
+        end
+    end
+    @test true # Nothing errored (Yay?)
+end

From 24bd19b8934407e05a9ab02c6e3865fdba4868a4 Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Tue, 21 Nov 2023 08:27:29 -0500
Subject: [PATCH 3/6] wip: docs

---
 docs/Project.toml     |  1 +
 docs/make.jl          | 16 ++++++++++------
 docs/src/.gitignore   |  1 +
 docs/src/jogger.md    | 12 ++++++++++++
 docs/src/profiling.md | 27 +++++++++++++++++++++++++++
 src/jogger.jl         | 13 ++++++++++---
 6 files changed, 61 insertions(+), 9 deletions(-)
 create mode 100644 docs/src/.gitignore
 create mode 100644 docs/src/profiling.md

diff --git a/docs/Project.toml b/docs/Project.toml
index 22b972f..e45133a 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,6 +3,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Example = "4b09cd0b-9172-4840-a79f-b48550c7f881"
 PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 
 [compat]
 Documenter = "1.2"
diff --git a/docs/make.jl b/docs/make.jl
index 127f3e1..8b5ca75 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -20,11 +20,14 @@ DocMeta.setdocmeta!(PkgJogger, :DocTestSetup, :(using PkgJogger); recursive=true
 index_md = joinpath(@__DIR__, "src", "index.md")
 readme_md = joinpath(@__DIR__, "..", "README.md")
 open(index_md, "w") do io
-    write(io, """
-    ```@meta
-    EditURL = "$readme_md"
-    ```
-    """)
+    write(
+        io,
+        """
+```@meta
+EditURL = "$readme_md"
+```
+"""
+    )
     write(io, read(readme_md, String))
 end
 
@@ -37,13 +40,14 @@ makedocs(;
         prettyurls=get(ENV, "CI", "false") == "true",
         canonical="https://awadell1.github.io/PkgJogger.jl",
         assets=String[],
-        analytics = "G-V9E0Q8BDHR",
+        analytics="G-V9E0Q8BDHR",
     ),
     pages=[
         "Home" => "index.md",
         "Jogger" => "jogger.md",
         "Saving Results" => "io.md",
         "Continuous Benchmarking" => "ci.md",
+        "Profiling" => "profiling.md",
         "Reference" => "reference.md",
     ],
     checkdocs=:all,
diff --git a/docs/src/.gitignore b/docs/src/.gitignore
new file mode 100644
index 0000000..fc0ab8a
--- /dev/null
+++ b/docs/src/.gitignore
@@ -0,0 +1 @@
+index.md
diff --git a/docs/src/jogger.md b/docs/src/jogger.md
index 9d16987..81a6d41 100644
--- a/docs/src/jogger.md
+++ b/docs/src/jogger.md
@@ -83,6 +83,17 @@ would have keys of `["bench_subdir", "bench_filename.jl", ...]`, instead of
 > run the suite of a single file, you can `include` the file and run it with:
 > `tune!(suite); run(suite)`
 
+## Filtering Benchmarks
+Often it's useful to run only a subset of the full benchmarking suite.
+For example, to run only the benchmarks within `benchmark/bench_filename.jl`:
+`JogExample.run("bench_filename")`.
+
+Most jogger functions support filtering ([`judge`](@ref JogExample.judge) is a notable exception).
+Greater support is planned.
+
+!!! compat PkgJogger 0.6.0
+    Support for filtering via [`JogExample.suite`](@ref) was added in v0.6.0
+
 ## Jogger Reference
 
 Jogger modules provide helper methods for working with their package's
@@ -93,6 +104,7 @@ Example`.
 JogExample.suite
 JogExample.benchmark
 JogExample.run
+JogExample.profile
 JogExample.save_benchmarks
 JogExample.load_benchmarks
 JogExample.judge
diff --git a/docs/src/profiling.md b/docs/src/profiling.md
new file mode 100644
index 0000000..342726b
--- /dev/null
+++ b/docs/src/profiling.md
@@ -0,0 +1,27 @@
+# Profiling Benchmarks
+
+PkgJogger has support for profiling existing benchmarks using one of the [Supported Profilers](#supported-profilers),
+support for profiling is currently limited. Notably:
+
+1. Only a single benchmark can be profiled at a time
+2. Automated saving or loading of profile results is not supported
+
+## Supported Profilers
+
+
+### CPU
+```@docs
+PkgJogger.profile(::Val{:cpu}, ::Any, ::PkgJogger.BenchmarkTools.Benchmark)
+```
+
+### Allocations
+
+```@docs
+PkgJogger.profile(::Val{:allocs}, ::Any, ::PkgJogger.BenchmarkTools.Benchmark)
+```
+
+### GPU
+
+```@docs
+PkgJogger.profile(::Val{:cuda}, ::Any, ::PkgJogger.BenchmarkTools.Benchmark)
+```
diff --git a/src/jogger.jl b/src/jogger.jl
index 7db1bf8..8b4fcec 100644
--- a/src/jogger.jl
+++ b/src/jogger.jl
@@ -264,10 +264,17 @@ macro jog(pkg)
             At this time, `PkgJogger` only supports profiling a single benchmark
             at a time. Automated saving is not supported.
 
-        # Available Profilers
-        The following profilers are currently supported. Additional profilers
-        are available via package extensions.
+        ## Available Profilers
+        The following profilers have been implemented, but may not be currently
+        loaded (See [Loaded Profilers](#loaded-profilers)).
+
+        - `:cpu` - loaded by default
+        - `:allocs` - loaded if `Profile.Allocs` exists (>=v1.8)
+        - `:cuda` - loaded if the CUDA and NVTX packages are loaded
 
+        ## Loaded Profilers
+        The following profilers are currently loaded. Additional profilers
+        are available via package extensions.
 
         $(@doc PkgJogger.profile)
 

From ff4c2d080d31e5367ee766159674b71ece8d1766 Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Sun, 5 May 2024 11:51:07 -0400
Subject: [PATCH 4/6] feat: add support for select... in judge

---
 docs/src/jogger.md  |  7 +++++--
 src/jogger.jl       | 14 +++++++++++---
 src/utils.jl        |  6 +++---
 test/judge_tests.jl |  7 ++++---
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/docs/src/jogger.md b/docs/src/jogger.md
index 81a6d41..0856c0e 100644
--- a/docs/src/jogger.md
+++ b/docs/src/jogger.md
@@ -88,8 +88,11 @@ Often it's useful to run only a subset of the full benchmarking suite.
 For example, to run only the benchmarks within `benchmark/bench_filename.jl`:
 `JogExample.run("bench_filename")`.
 
-Most jogger functions support filtering ([`judge`](@ref JogExample.judge) is a notable exception).
-Greater support is planned.
+```julia
+using PkgJogger, Example
+@jog Example
+JogExample.run("bench_filename.jl")
+```
 
 !!! compat PkgJogger 0.6.0
     Support for filtering via [`JogExample.suite`](@ref) was added in v0.6.0
diff --git a/src/jogger.jl b/src/jogger.jl
index 8b4fcec..dd658b4 100644
--- a/src/jogger.jl
+++ b/src/jogger.jl
@@ -214,11 +214,14 @@ macro jog(pkg)
         load_benchmarks(id) = PkgJogger.load_benchmarks(joinpath(BENCHMARK_DIR, "trial"), id)
 
         """
-            judge(new, old; metric=Statistics.median, kwargs...)
+            judge(new, old, [select...]; metric=Statistics.median, kwargs...)
 
         Compares benchmarking results from `new` vs `old` for regressions/improvements
         using `metric` as a basis. Additional `kwargs` are passed to `BenchmarkTools.judge`
 
+        Optionally, filter results using `select...`, see [`$($mod_str).suite`](@ref) for
+        details.
+
         Identical to [`PkgJogger.judge`](@ref), but accepts any identifier supported by
         [`$($mod_str).load_benchmarks`](@ref)
 
@@ -230,6 +233,11 @@ macro jog(pkg)
         [...]
         ```
 
+        ```julia
+        # Only judge results in `bench_foo.jl`
+        $($mod_str).judge(:latest, :oldest, "bench_foo.jl")
+        ```
+
         ```julia
         # Judge results by UUID
         $($mod_str).judge("$(UUIDs.uuid4())", "$(UUIDs.uuid4())")
@@ -242,8 +250,8 @@ macro jog(pkg)
         [...]
         ```
         """
-        function judge(new, old; kwargs...)
-            PkgJogger.judge(_get_benchmarks(new), _get_benchmarks(old); kwargs...)
+        function judge(new, old, select...; kwargs...)
+            PkgJogger.judge(_get_benchmarks(new), _get_benchmarks(old), select...; kwargs...)
         end
         _get_benchmarks(b) = load_benchmarks(b)
         _get_benchmarks(b::Dict) = PkgJogger._get_benchmarks(b)
diff --git a/src/utils.jl b/src/utils.jl
index 27baec5..bb4fe23 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -117,9 +117,9 @@ function judge(
     old_estimate = metric(old)
     BenchmarkTools.judge(new_estimate, old_estimate; kwargs...)
 end
-function judge(new, old; kwargs...)
-    new_results = _get_benchmarks(new)
-    old_results = _get_benchmarks(old)
+function judge(new, old, select...; kwargs...)
+    new_results = getsuite(_get_benchmarks(new), select...)
+    old_results = getsuite(_get_benchmarks(old), select...)
     judge(new_results, old_results; kwargs...)
 end
 
diff --git a/test/judge_tests.jl b/test/judge_tests.jl
index 647f774..1c436c3 100644
--- a/test/judge_tests.jl
+++ b/test/judge_tests.jl
@@ -12,9 +12,9 @@ function gen_example()
     return results, filename, dict, uuid
 end
 
-function test_judge(f, new, old)
-    @inferred f(new, old)
-    judgement = f(new, old)
+function test_judge(f, new, old, args...)
+    @inferred f(new, old, args...)
+    judgement = f(new, old, args...)
     @test typeof(judgement) <: BenchmarkGroup
     return judgement
 end
@@ -25,6 +25,7 @@ old = gen_example()
 
 @testset "JogPkgName.judge($(typeof(n)), $(typeof(o)))" for (n, o) in Iterators.product(new, old)
     test_judge(jogger.judge, n, o)
+    test_judge(jogger.judge, n, o, :, "1ms")
 end
 
 @testset "PkgJogger.judge($(typeof(n)), $(typeof(o)))" for (n, o) in Iterators.product(new[1:3], old[1:3])

From 3ceeb6e79350dbfd6740b4c095bf08e991bf7462 Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Thu, 6 Jun 2024 15:56:31 -0400
Subject: [PATCH 5/6] misc: expand compat for cuda

---
 .github/workflows/CI.yml | 20 +++++++++++++++++---
 Project.toml             |  2 +-
 docs/Project.toml        |  2 +-
 ext/PkgJoggerCUDAExt.jl  |  2 +-
 src/profile.jl           |  2 +-
 5 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 111f07e..64a9596 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -19,8 +19,8 @@ jobs:
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
@@ -32,11 +32,25 @@ jobs:
         env:
           COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
+  legacy:
+    name: Test Min Compat Bounds
+    runs-on: ubuntu-latest
+    steps:
+    - uses: julia-actions/julia-downgrade-compat@v1
+      if: ${{ matrix.os == 'ubuntu-latest' }}
+    - uses: actions/checkout@v4
+    - uses: julia-actions/setup-julia@v2
+      with:
+        version: 1.9
+    - uses: julia-actions/cache@v1
+    - uses: julia-actions/julia-buildpkg@v1
+    - uses: julia-actions/julia-runtest@v1
+
   action:
     name: Test Github action
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
       - uses: julia-actions/cache@v1
       - uses: "./"
diff --git a/Project.toml b/Project.toml
index 15f3c45..fd2a84a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -31,10 +31,10 @@ CodecZlib = "0.7"
 Dates = "1.9"
 JSON = "0.21"
 LibGit2 = "1.9"
+Revise = "3"
 NVTX = "0.3"
 Pkg = "1.9"
 Profile = "1.9"
-Revise = "3"
 Statistics = "1.9"
 Test = "1.9"
 UUIDs = "1.9"
diff --git a/docs/Project.toml b/docs/Project.toml
index e45133a..28fc406 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -6,4 +6,4 @@ PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
 Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 
 [compat]
-Documenter = "1.2"
+Documenter = "1.4"
diff --git a/ext/PkgJoggerCUDAExt.jl b/ext/PkgJoggerCUDAExt.jl
index 8ee8027..4de966a 100644
--- a/ext/PkgJoggerCUDAExt.jl
+++ b/ext/PkgJoggerCUDAExt.jl
@@ -16,7 +16,7 @@ Profiles the benchmark using [`CUDA.@profile`](@ref).
 """
 function PkgJogger.profile(::Val{Symbol(:cuda)}, id, b::PkgJogger.BenchmarkTools.Benchmark; verbose)
     id_str = join(id, "/")
-    CUDA.@profile begin
+    CUDA.@profile external=true begin
         NVTX.@range id_str begin
             PkgJogger.BenchmarkTools.run(b)
         end
diff --git a/src/profile.jl b/src/profile.jl
index 288258e..6472f80 100644
--- a/src/profile.jl
+++ b/src/profile.jl
@@ -1,5 +1,5 @@
 function profile(suite, profiler::Symbol; verbose=false, ref=nothing, kwargs...)
-    leaf = leaves(suite)
+    leaf = BenchmarkTools.leaves(suite)
     @assert length(leaf) == 1 "Profiling Support is limited to one benchmark at a time"
     id, benchmark = first(leaf)
     warmup(suite; verbose)

From e1283af71eb1c66025e0bbbd20061229738c212b Mon Sep 17 00:00:00 2001
From: Alexius Wadell <awadell@gmail.com>
Date: Thu, 6 Jun 2024 17:09:02 -0400
Subject: [PATCH 6/6] doc: add interlinks

---
 docs/Project.toml       |  5 +++++
 docs/make.jl            | 18 ++++++++++++++++--
 docs/src/profiling.md   |  5 ++---
 ext/PkgJoggerCUDAExt.jl |  4 ++--
 src/profile.jl          |  7 ++++---
 5 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 28fc406..866dbef 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,9 +1,14 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656"
 Example = "4b09cd0b-9172-4840-a79f-b48550c7f881"
 PkgJogger = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
 Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 
 [compat]
 Documenter = "1.4"
+
+[preferences.CUDA_Runtime_jll]
+local = true
diff --git a/docs/make.jl b/docs/make.jl
index 8b5ca75..d8f884e 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,7 +10,10 @@ Pkg.instantiate()
 
 # Load Code
 using Documenter
+using Documenter.Remotes: GitHub
+using DocumenterInterLinks
 using PkgJogger
+using CUDA
 using Example
 @jog Example
 
@@ -31,10 +34,20 @@ EditURL = "$readme_md"
     write(io, read(readme_md, String))
 end
 
+# Interproject Links
+links = InterLinks(
+    "Julia" => "https://docs.julialang.org/en/v1/",
+    "CUDA" => "https://cuda.juliagpu.org/stable/",
+)
+
 makedocs(;
-    modules=[PkgJogger, JogExample],
+    modules=[
+        PkgJogger,
+        JogExample,
+        Base.get_extension(PkgJogger, :PkgJoggerCUDAExt),
+    ],
     authors="Alexius Wadell <awadell@gmail.com> and contributors",
-    repo="https://github.com/awadell1/PkgJogger.jl/blob/{commit}{path}#{line}",
+    repo=GitHub("awadell1", "PkgJogger.jl"),
     sitename="PkgJogger.jl",
     format=Documenter.HTML(;
         prettyurls=get(ENV, "CI", "false") == "true",
@@ -50,6 +63,7 @@ makedocs(;
         "Profiling" => "profiling.md",
         "Reference" => "reference.md",
     ],
+    plugins=[links],
     checkdocs=:all,
 )
 
diff --git a/docs/src/profiling.md b/docs/src/profiling.md
index 342726b..f2e9496 100644
--- a/docs/src/profiling.md
+++ b/docs/src/profiling.md
@@ -1,14 +1,13 @@
 # Profiling Benchmarks
 
-PkgJogger has support for profiling existing benchmarks using one of the [Supported Profilers](#supported-profilers),
-support for profiling is currently limited. Notably:
+PkgJogger has support for profiling existing benchmarks using one of the [Supported Profilers](#supported-profilers).
+Profiling support is currently limited; notably:
 
 1. Only a single benchmark can be profiled at a time
 2. Automated saving or loading of profile results is not supported
 
 ## Supported Profilers
 
-
 ### CPU
 ```@docs
 PkgJogger.profile(::Val{:cpu}, ::Any, ::PkgJogger.BenchmarkTools.Benchmark)
diff --git a/ext/PkgJoggerCUDAExt.jl b/ext/PkgJoggerCUDAExt.jl
index 4de966a..ea90b2f 100644
--- a/ext/PkgJoggerCUDAExt.jl
+++ b/ext/PkgJoggerCUDAExt.jl
@@ -7,11 +7,11 @@ using NVTX
 """
     profiler=:cuda
 
-Profiles the benchmark using [`CUDA.@profile`](@ref).
+Profiles the benchmark using [`CUDA.@profile`](@extref).
 
 !!! warning
     This only activates the CUDA profiler, you need to launch the profiler externally.
-    See [CUDA Profiling](https://cuda.juliagpu.org/stable/development/profiling/) for documentation.
+    See [CUDA Profiling](@extref CUDA development/profiling) for documentation.
 
 """
 function PkgJogger.profile(::Val{Symbol(:cuda)}, id, b::PkgJogger.BenchmarkTools.Benchmark; verbose)
diff --git a/src/profile.jl b/src/profile.jl
index 6472f80..1f02a7e 100644
--- a/src/profile.jl
+++ b/src/profile.jl
@@ -34,7 +34,8 @@ end
 """
     profiler=:cpu
 
-Profiles the benchmark using [`Profile.@profile`](@ref)
+
+Profiles the benchmark using Julia's built-in profiler: [`Profile.@profile`](@extref)
 """
 function profile(::Val{Symbol(:cpu)}, id, b::BenchmarkTools.Benchmark; verbose)
     Profile.clear()
@@ -47,10 +48,10 @@ if isdefined(Profile, :Allocs)
     @doc """
          profiler=:allocs
 
-    Profiles memory allocations using the built-in [`Profile.Allocs.@profile`](@ref)
+    Profiles memory allocations using the built-in [`Profile.Allocs.@profile`](@extref)
 
     Accepts `sample_rate` as a kwarg to control the rate of recordings. A rate of 1.0 will
-    record everything; 0.0 will record nothing. See [`Profile.Allocs.@profile`](@ref) for more.
+    record everything; 0.0 will record nothing. See [`Profile.Allocs.@profile`](@extref) for more.
 
     !!! compat "Julia 1.8"
         The allocation profiler was added in Julia 1.8