Skip to content

Commit

Permalink
feat: support profiling individual benchmarks
Browse files Browse the repository at this point in the history
Built-in support for `Profile.@profile` and `Profile.Allocs.@profile`,
plus support for `CUDA.@profile` via a package extensions

Package Extensions are supported on 1.9 forward (Won't be backporting
with Requires)

Quick demo:

```julia
@jog Example
using PkgJogger, Example
JogExample.profile("bench_timer.jl", "1ms") # CPU Profiling
JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs) # Allocs
```
  • Loading branch information
awadell1 committed May 5, 2024
1 parent c54672d commit 5250e22
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 18 deletions.
21 changes: 19 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PkgJogger"
uuid = "10150987-6cc1-4b76-abee-b1c1cbd91c01"
authors = ["Alexius Wadell <[email protected]> and contributors"]
version = "0.5.1"
version = "0.6.0"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand All @@ -11,21 +11,38 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f"

[extensions]
PkgJoggerCUDAExt = ["CUDA", "NVTX"]

[compat]
BSON = "0.3"
BenchmarkTools = "1.5"
CUDA = "5"
CodecZlib = "0.7"
Dates = "1.9"
JSON = "0.21"
LibGit2 = "1.9"
NVTX = "0.3"
Pkg = "1.9"
Profile = "1.9"
Revise = "3"
Statistics = "1.9"
Test = "1.9"
UUIDs = "1.9"
julia = "1.9"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
NVTX = "5da4648a-3479-48b8-97b9-01cb529c0a1f"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
Expand All @@ -35,4 +52,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[targets]
test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg"]
test = ["Test", "ReTestItems", "Revise", "UUIDs", "TOML", "Random", "Pkg", "NVTX", "CUDA"]
26 changes: 26 additions & 0 deletions ext/PkgJoggerCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module PkgJoggerCUDAExt

using PkgJogger
using CUDA
using NVTX

"""
profiler=:cuda
Profiles the benchmark using [`CUDA.@profile`](@ref).
!!! warning
This only activates the CUDA profiler, you need to launch the profiler externally.
See [CUDA Profiling](https://cuda.juliagpu.org/stable/development/profiling/) for documentation.
"""
function PkgJogger.profile(::Val{Symbol(:cuda)}, id, b::PkgJogger.BenchmarkTools.Benchmark; verbose)
id_str = join(id, "/")
CUDA.@profile begin
NVTX.@range id_str begin
PkgJogger.BenchmarkTools.run(b)
end
end
end

end
2 changes: 2 additions & 0 deletions src/PkgJogger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using Dates
using LibGit2
using Statistics
using Test
using Profile

export @jog, @test_benchmarks

Expand All @@ -30,6 +31,7 @@ const PKG_JOGGER_VER = VersionNumber(
)

include("utils.jl")
include("profile.jl")
include("jogger.jl")
include("ci.jl")

Expand Down
1 change: 0 additions & 1 deletion src/jogger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ macro jog(pkg)
"""
function benchmark(select...; verbose=false, save=false, ref=nothing)
s = suite(select...)
BenchmarkTools.warmup(s; verbose)
__tune!(s, ref; verbose=verbose)
results = BenchmarkTools.run(s; verbose=verbose)
if save
Expand Down
64 changes: 64 additions & 0 deletions src/profile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
function profile(suite, profiler::Symbol; verbose=false, ref=nothing, kwargs...)
leaf = leaves(suite)
@assert length(leaf) == 1 "Profiling Support is limited to one benchmark at a time"
id, benchmark = first(leaf)
warmup(suite; verbose)
tune!(suite, ref)
profile(Val(profiler), id, benchmark; verbose, kwargs...)
end

profile(p::Val, args...) = error(
"""Unknown profiler $p.
Did you forget to load it's dependencies?
See [`PkgJogger.profile`](@ref) for more information
""")

function __profiling_loop(start, stop, benchmark)
start_time = time()
params = benchmark.params
quote_vals = benchmark.quote_vals
sample = 0
while (time() - start_time) <= params.seconds && sample <= params.samples
params.gcsample && BenchmarkTools.gcscrub()
start()
try
benchmark.samplefunc(quote_vals, params)
finally
stop()
end
sample += 1
end
return nothing
end

"""
profiler=:cpu
Profiles the benchmark using [`Profile.@profile`](@ref)
"""
function profile(::Val{Symbol(:cpu)}, id, b::BenchmarkTools.Benchmark; verbose)
Profile.clear()
__profiling_loop(Profile.start_timer, Profile.stop_timer, b)
verbose && Profile.print()
return nothing
end

if isdefined(Profile, :Allocs)
@doc """
profiler=:allocs
Profiles memory allocations using the built-in [`Profile.Allocs.@profile`](@ref)
Accepts `sample_rate` as a kwarg to control the rate of recordings. A rate of 1.0 will
record everything; 0.0 will record nothing. See [`Profile.Allocs.@profile`](@ref) for more.
!!! compat "Julia 1.8"
The allocation profiler was added in Julia 1.8
"""
function profile(::Val{Symbol(:allocs)}, id, b::BenchmarkTools.Benchmark; verbose, sample_rate=0.0001)
Profile.Allocs.clear()
start = () -> Profile.Allocs.start(; sample_rate)
__profiling_loop(start, Profile.Allocs.stop, b)
return nothing
end
end
18 changes: 3 additions & 15 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,12 @@ function locate_benchmarks(path, name=String[])
end
locate_benchmarks(pkg::Module) = benchmark_dir(pkg) |> locate_benchmarks

_SELECT_DOCS = """
"""
"""
getsuite(suite, [select...])
Index into `suite` and return the matching entries in suite.
At it's simplest, `getsuite(suite, "foo", "bar",...)` is the same as `suite["foo"]["bar"]...`
# Supported Indices
- `:` - Accepts any entry at that level in the tree
- `r"Regexp"` - Accepts any entry matching the regular-expression
- `key::Any` - Accepts any entry with a matching `key`
- `@tagged` - Filters the suite to only include `BenchmarkGroup`s with a matching tag.
See [Indexing into a BenchmarkGroup using @tagged](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Indexing-into-a-BenchmarkGroup-using-@tagged)
!!! warning
An entry in `suite` must match all indices to be returned. For example,
`getsuite(s, :, "bar")` would exclude a benchmark at `s["bat"]` as
the benchmark isn't matched by **both** `:` and `"bar"`.
$(_SELECT_DOCS)
"""
getsuite(suite::BenchmarkGroup) = suite
getsuite(suite::BenchmarkGroup, ::Colon) = suite
Expand Down
40 changes: 40 additions & 0 deletions test/profile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using Test
using Profile
using CUDA
using NVTX

include("utils.jl")

@testset "CPU profiler" begin
@jog Example
Profile.clear()
JogExample.profile("bench_timer.jl", "1ms")
@test Profile.is_buffer_full() == false
@test Profile.len_data() > 0
@test occursin("profiler=:cpu", string(@doc(JogExample.profile)))
end

@testset "Allocs profiler" begin
@jog Example
Profile.Allocs.clear()
@test isempty(Profile.Allocs.fetch().allocs)
JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=1)
@test !isempty(Profile.Allocs.fetch().allocs)
@test occursin("profiler=:allocs", string(@doc(JogExample.profile)))

@testset "sample_rate" begin
Profile.Allocs.clear()
JogExample.profile("bench_timer.jl", "1ms"; profiler=:allocs, sample_rate=0)
@test isempty(Profile.Allocs.fetch().allocs)
end
end

@testset "CUDA profiler" begin
@jog Example
mktempdir() do cwd
cd(cwd) do
JogExample.profile("bench_timer.jl", "1ms"; profiler=:cuda)
end
end
@test true # Nothing errored (Yay?)
end

0 comments on commit 5250e22

Please sign in to comment.