From 5250e2220291f7c11ba54121c7cf9307e1a9ec1f Mon Sep 17 00:00:00 2001 From: Alexius Wadell Date: Thu, 17 Aug 2023 11:04:57 -0400 Subject: [PATCH] feat: support profiling individual benchmarks Built-in support for `Profile.@profile` and `Profile.Allocs.@profile`, plus support for `CUDA.@profile` via a package extensions Package Extensions are supported on 1.9 forward (Won't be backporting with Requires) Quick demo: ```julia @jog Example using PkgJogger, Example JogExample.profile("bench_timer.jl", "1ms") # CPU Profiling JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs) # Allocs ``` --- Project.toml | 21 ++++++++++++-- ext/PkgJoggerCUDAExt.jl | 26 +++++++++++++++++ src/PkgJogger.jl | 2 ++ src/jogger.jl | 1 - src/profile.jl | 64 +++++++++++++++++++++++++++++++++++++++++ src/utils.jl | 18 ++---------- test/profile.jl | 40 ++++++++++++++++++++++++++ 7 files changed, 154 insertions(+), 18 deletions(-) create mode 100644 ext/PkgJoggerCUDAExt.jl create mode 100644 src/profile.jl create mode 100644 test/profile.jl diff --git a/Project.toml b/Project.toml index 7f68fd6..15f3c45 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PkgJogger" uuid = "10150987-6cc1-4b76-abee-b1c1cbd91c01" authors = ["Alexius Wadell and contributors"] -version = "0.5.1" +version = "0.6.0" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" @@ -11,21 +11,38 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +[weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" + +[extensions] +PkgJoggerCUDAExt = ["CUDA", "NVTX"] + [compat] BSON = "0.3" BenchmarkTools = "1.5" +CUDA = "5" CodecZlib = "0.7" +Dates = "1.9" JSON = "0.21" +LibGit2 = "1.9" +NVTX = "0.3" Pkg = "1.9" +Profile = "1.9" Revise = "3" Statistics = "1.9" +Test = "1.9" +UUIDs = "1.9" julia = "1.9" [extras] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823" @@ -35,4 +52,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [targets] -test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg"] +test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg", "NVTX", "CUDA"] diff --git a/ext/PkgJoggerCUDAExt.jl b/ext/PkgJoggerCUDAExt.jl new file mode 100644 index 0000000..8ee8027 --- /dev/null +++ b/ext/PkgJoggerCUDAExt.jl @@ -0,0 +1,26 @@ +module PkgJoggerCUDAExt + +using PkgJogger +using CUDA +using NVTX + +""" + profiler=:cuda + +Profiles the benchmark using [`CUDA.@profile`](@ref). + +!!! warning + This only activates the CUDA profiler, you need to launch the profiler externally. + See [CUDA Profiling](https://cuda.juliagpu.org/stable/development/profiling/) for documentation. + +""" +function PkgJogger.profile(::Val{Symbol(:cuda)}, id, b::PkgJogger.BenchmarkTools.Benchmark; verbose) + id_str = join(id, "/") + CUDA.@profile begin + NVTX.@range id_str begin + PkgJogger.BenchmarkTools.run(b) + end + end +end + +end diff --git a/src/PkgJogger.jl b/src/PkgJogger.jl index dd5dc27..132fe6c 100644 --- a/src/PkgJogger.jl +++ b/src/PkgJogger.jl @@ -10,6 +10,7 @@ using Dates using LibGit2 using Statistics using Test +using Profile export @jog, @test_benchmarks @@ -30,6 +31,7 @@ const PKG_JOGGER_VER = VersionNumber( ) include("utils.jl") +include("profile.jl") include("jogger.jl") include("ci.jl") diff --git a/src/jogger.jl b/src/jogger.jl index 0f9e8d4..7db1bf8 100644 --- a/src/jogger.jl +++ b/src/jogger.jl @@ -146,7 +146,6 @@ macro jog(pkg) """ function benchmark(select...; verbose=false, save=false, ref=nothing) s = suite(select...) - BenchmarkTools.warmup(s; verbose) __tune!(s, ref; verbose=verbose) results = BenchmarkTools.run(s; verbose=verbose) if save diff --git a/src/profile.jl b/src/profile.jl new file mode 100644 index 0000000..288258e --- /dev/null +++ b/src/profile.jl @@ -0,0 +1,64 @@ +function profile(suite, profiler::Symbol; verbose=false, ref=nothing, kwargs...) + leaf = leaves(suite) + @assert length(leaf) == 1 "Profiling Support is limited to one benchmark at a time" + id, benchmark = first(leaf) + warmup(suite; verbose) + tune!(suite, ref) + profile(Val(profiler), id, benchmark; verbose, kwargs...) +end + +profile(p::Val, args...) = error( + """Unknown profiler $p. + Did you forget to load it's dependencies? + See [`PkgJogger.profile`](@ref) for more information + """) + +function __profiling_loop(start, stop, benchmark) + start_time = time() + params = benchmark.params + quote_vals = benchmark.quote_vals + sample = 0 + while (time() - start_time) <= params.seconds && sample <= params.samples + params.gcsample && BenchmarkTools.gcscrub() + start() + try + benchmark.samplefunc(quote_vals, params) + finally + stop() + end + sample += 1 + end + return nothing +end + +""" + profiler=:cpu + +Profiles the benchmark using [`Profile.@profile`](@ref) +""" +function profile(::Val{Symbol(:cpu)}, id, b::BenchmarkTools.Benchmark; verbose) + Profile.clear() + __profiling_loop(Profile.start_timer, Profile.stop_timer, b) + verbose && Profile.print() + return nothing +end + +if isdefined(Profile, :Allocs) + @doc """ + profiler=:allocs + + Profiles memory allocations using the built-in [`Profile.Allocs.@profile`](@ref) + + Accepts `sample_rate` as a kwarg to control the rate of recordings. A rate of 1.0 will + record everything; 0.0 will record nothing. See [`Profile.Allocs.@profile`](@ref) for more. + + !!! compat "Julia 1.8" + The allocation profiler was added in Julia 1.8 + """ + function profile(::Val{Symbol(:allocs)}, id, b::BenchmarkTools.Benchmark; verbose, sample_rate=0.0001) + Profile.Allocs.clear() + start = () -> Profile.Allocs.start(; sample_rate) + __profiling_loop(start, Profile.Allocs.stop, b) + return nothing + end +end diff --git a/src/utils.jl b/src/utils.jl index 80b5870..27baec5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -71,24 +71,12 @@ function locate_benchmarks(path, name=String[]) end locate_benchmarks(pkg::Module) = benchmark_dir(pkg) |> locate_benchmarks +_SELECT_DOCS = """ +""" """ getsuite(suite, [select...]) -Index into `suite` and return the matching entries in suite. -At it's simplest, `getsuite(suite, "foo", "bar",...)` is the same as `suite["foo"]["bar"]...` - -# Supported Indices - -- `:` - Accepts any entry at that level in the tree -- `r"Regexp"` - Accepts any entry matching the regular-expression -- `key::Any` - Accepts any entry with a matching `key` -- `@tagged` - Filters the suite to only include `BenchmarkGroup`s with a matching tag. - See [Indexing into a BenchmarkGroup using @tagged](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Indexing-into-a-BenchmarkGroup-using-@tagged) - -!!! warning - An entry in `suite` must match all indices to be returned. For example, - `getsuite(s, :, "bar")` would exclude a benchmark at `s["bat"]` as - the benchmark isn't matched by **both** `:` and `"bar"`. +$(_SELECT_DOCS) """ getsuite(suite::BenchmarkGroup) = suite getsuite(suite::BenchmarkGroup, ::Colon) = suite diff --git a/test/profile.jl b/test/profile.jl new file mode 100644 index 0000000..b718605 --- /dev/null +++ b/test/profile.jl @@ -0,0 +1,40 @@ +using Test +using Profile +using CUDA +using NVTX + +include("utils.jl") + +@testset "CPU profiler" begin + @jog Example + Profile.clear() + JogExample.profile("bench_timer.jl", "1ms") + @test Profile.is_buffer_full() == false + @test Profile.len_data() > 0 + @test occursin("profiler=:cpu", string(@doc(JogExample.profile))) +end + +@testset "Allocs profiler" begin + @jog Example + Profile.Allocs.clear() + @test isempty(Profile.Allocs.fetch().allocs) + JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=1) + @test !isempty(Profile.Allocs.fetch().allocs) + @test occursin("profiler=:allocs", string(@doc(JogExample.profile))) + + @testset "sample_rate" begin + Profile.Allocs.clear() + JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=0) + @test isempty(Profile.Allocs.fetch().allocs) + end +end + +@testset "CUDA profiler" begin + @jog Example + mktempdir() do cwd + cd(cwd) do + JogExample.profile("bench_timer.jl", "1ms"; profiler=:cuda) + end + end + @test true # Nothing errored (Yay?) +end