Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP measure per-task CPU time #191

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions base/condition.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ in line to wake up on `notify`. Otherwise, `wait` has first-in-first-out (FIFO)
"""
function wait(c::GenericCondition; first::Bool=false)
ct = current_task()
record_cpu_time!(ct)
_wait2(c, ct, first)
token = unlockall(c.lock)
try
Expand Down
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct JLOptions
heap_size_hint::UInt64
trace_compile_timing::Int8
trim::Int8
task_timing::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
49 changes: 48 additions & 1 deletion base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,12 @@ end
# runtime system hook called when a task finishes
function task_done_hook(t::Task)
# `finish_task` sets `sigatomic` before entering this function
if t.is_timing_enabled
# user_time -task-finished-> wait_time
now = time_ns()
record_cpu_time!(t, now)
record_wall_time!(t, now)
end
err = istaskfailed(t)
result = task_result(t)
handled = false
Expand Down Expand Up @@ -972,7 +978,13 @@ function enq_work(t::Task)
return t
end

schedule(t::Task) = enq_work(t)
function schedule(t::Task)
# user_time -task-(re)scheduled-> wait_time
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
enq_work(t)
end

"""
schedule(t::Task, [val]; error=false)
Expand Down Expand Up @@ -1018,6 +1030,10 @@ true
function schedule(t::Task, @nospecialize(arg); error=false)
# schedule a task to be (re)started with the given value or exception
t._state === task_state_runnable || Base.error("schedule: Task not runnable")
# user_time -task-(re)scheduled-> wait_time
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
if error
q = t.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, t)
setfield!(t, :result, arg)
Expand All @@ -1039,6 +1055,7 @@ tasks.
"""
function yield()
ct = current_task()
record_cpu_time!(ct)
enq_work(ct)
try
wait()
Expand All @@ -1060,6 +1077,7 @@ Throws a `ConcurrencyViolationError` if `t` is the currently running task.
"""
function yield(t::Task, @nospecialize(x=nothing))
current = current_task()
record_cpu_time!(current)
t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
(t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
t.result = x
Expand All @@ -1084,6 +1102,9 @@ function yieldto(t::Task, @nospecialize(x=nothing))
elseif t._state === task_state_failed
throw(t.result)
end
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
t.result = x
set_next_task(t)
return try_yieldto(identity)
Expand All @@ -1097,6 +1118,12 @@ function try_yieldto(undo)
rethrow()
end
ct = current_task()
# scheduler -task-started-> user
# scheduler -task-resumed-> user
if ct.is_timing_enabled
# @assert ct.last_dequeued_at == 0
ct.last_dequeued_at = time_ns()
end
if ct._isexception
exc = ct.result
ct.result = nothing
Expand All @@ -1110,6 +1137,9 @@ end

# yield to a task, throwing an exception in it
function throwto(t::Task, @nospecialize exc)
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
t.result = exc
t._isexception = true
set_next_task(t)
Expand Down Expand Up @@ -1176,3 +1206,20 @@ if Sys.iswindows()
else
pause() = ccall(:pause, Cvoid, ())
end

function record_cpu_time!(t::Task, stopped_at::UInt64=time_ns())
if t.is_timing_enabled
@assert t.last_dequeued_at != 0
t.cpu_time_ns += stopped_at - t.last_dequeued_at
t.last_dequeued_at = 0
end
return t
end

function record_wall_time!(t::Task, done_at::UInt64=time_ns())
if t.is_timing_enabled
@assert t.first_enqueued_at != 0
t.wall_time_ns = done_at - t.first_enqueued_at
end
return t
end
58 changes: 58 additions & 0 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,64 @@ end
# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())

"""
task_cpu_time_ns(t::Task) -> UInt64

Return the total nanoseconds that the task `t` has spent running.
This metric is only updated when the task yields or completes.
See also [`task_wall_time_ns`](@ref).

Will be `UInt64(0)` if task timings are not enabled.
See [`Base.task_timing`](@ref).

!!! note "This metric is from the Julia scheduler"
A task may be running on an OS thread that is descheduled by the OS
scheduler, this time still counts towards the metric.

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_cpu_time_ns(t::Task)
return t.cpu_time_ns
end

"""
task_wall_time_ns(t::Task) -> UInt64

Return the total nanoseconds that the task `t` was runnable.
This is the time since the task entered the run queue until the time at which it completed,
or until the current time if the task has not yet completed.
See also [`task_cpu_time_ns`](@ref).

Will be `UInt64(0)` if task timings are not enabled.
See [`Base.task_timing`](@ref).

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_wall_time_ns(t::Task)
# TODO: report up til current time if not done? too racy?
# return istaskdone(t) ? t.wall_time_ns : time_ns() - t.first_enqueued_at
return t.wall_time_ns
end

"""
Base.task_timing(::Bool)

Enable or disable the collection of per-task timing information.
Task created when Base.task_timing(true) is in effect will have [`task_cpu_time_ns`](@ref)
and [`task_wall_time_ns`](@ref) timing information available.
"""
function task_timing(b::Bool)
if b
ccall(:jl_task_timing_enable, Cvoid, ())
else
# TODO: prevent decrementing the counter below zero
ccall(:jl_task_timing_disable, Cvoid, ())
end
return nothing
end

"""
Base.gc_live_bytes()

Expand Down
4 changes: 4 additions & 0 deletions doc/man/julia.1
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ If --trace-compile is enabled show how long each took to compile in ms
--trace-dispatch={stderr|name}
Print precompile statements for methods dispatched during execution or save to stderr or a path.

.TP
--task-timing
Enabled the collection of per-task timing metrics.

.TP
-image-codegen
Force generate code in imaging mode
Expand Down
1 change: 1 addition & 0 deletions doc/src/manual/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ The following is a complete list of command-line switches available when launchi
|`--trace-compile={stderr\|name}` |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
|`--trace-compile-timing` |If --trace-compile is enabled show how long each took to compile in ms|
|`--trace-dispatch={stderr\|name}` |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
|`--task-timing` |Enable tasks the collection of per-task timing metrics|
|`--image-codegen` |Force generate code in imaging mode|
|`--permalloc-pkgimg={yes\|no*}` |Copy the data section of package images into memory|
|`--trim={no*\|safe\|unsafe\|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
Expand Down
3 changes: 3 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,9 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
post_boot_hooks();
}

if (jl_options.task_timing) {
jl_atomic_fetch_add(&jl_task_timing_enabled, 1);
}
if (jl_base_module == NULL) {
// nthreads > 1 requires code in Base
jl_atomic_store_relaxed(&jl_n_threads, 1);
Expand Down
27 changes: 27 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,33 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

/**
* @brief Enable per-task timing.
*/
JL_DLLEXPORT void jl_task_timing_enable(void)
{
// Increment the flag to allow reentrant callers.
jl_atomic_fetch_add(&jl_task_timing_enabled, 1);
}

/**
* @brief Disable per-task timing.
*/
JL_DLLEXPORT void jl_task_timing_disable(void)
{
jl_atomic_fetch_add(&jl_task_timing_enabled, -1);
}

// TODO: remove this
JL_DLLEXPORT void jl_is_task_timing_enabled(void)
{
jl_atomic_load_relaxed(&jl_task_timing_enabled);
}
JL_DLLEXPORT void jl_is_compile_timing_enabled(void)
{
jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
}

/**
* @brief Retrieve floating-point environment constants.
*
Expand Down
7 changes: 7 additions & 0 deletions src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // heap-size-hint
0, // trace_compile_timing
0, // trim
0, // task_timing
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -265,6 +266,7 @@ static const char opts_hidden[] =
" comment if color is not supported\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to\n"
" compile in ms\n"
" --task-timing Enable collection of per-task timing data.\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
" --trim={no*|safe|unsafe|unsafe-warn}\n"
Expand Down Expand Up @@ -296,6 +298,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
opt_task_timing,
opt_math_mode,
opt_worker,
opt_bind_to,
Expand Down Expand Up @@ -375,6 +378,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-compile-timing", no_argument, 0, opt_trace_compile_timing },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
{ "task-timing", no_argument, 0, opt_task_timing },
{ "math-mode", required_argument, 0, opt_math_mode },
{ "handle-signals", required_argument, 0, opt_handle_signals },
// hidden command line options
Expand Down Expand Up @@ -964,6 +968,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
else
jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
break;
case opt_task_timing:
jl_options.task_timing = 1;
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
uint64_t heap_size_hint;
int8_t trace_compile_timing;
int8_t trim;
int8_t task_timing;
} jl_options_t;

#endif
18 changes: 14 additions & 4 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3744,7 +3744,7 @@ void jl_init_types(void) JL_GC_DISABLED
NULL,
jl_any_type,
jl_emptysvec,
jl_perm_symsvec(16,
jl_perm_symsvec(21,
"next",
"queue",
"storage",
Expand All @@ -3760,8 +3760,13 @@ void jl_init_types(void) JL_GC_DISABLED
"_state",
"sticky",
"_isexception",
"priority"),
jl_svec(16,
"priority",
"is_timing_enabled",
"first_enqueued_at",
"last_dequeued_at",
"cpu_time_ns",
"wall_time_ns"),
jl_svec(21,
jl_any_type,
jl_any_type,
jl_any_type,
Expand All @@ -3777,7 +3782,12 @@ void jl_init_types(void) JL_GC_DISABLED
jl_uint8_type,
jl_bool_type,
jl_bool_type,
jl_uint16_type),
jl_uint16_type,
jl_bool_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type),
jl_emptysvec,
0, 1, 6);
XX(task);
Expand Down
10 changes: 10 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2289,6 +2289,16 @@ typedef struct _jl_task_t {
// uint8_t padding1;
// multiqueue priority
uint16_t priority;
// flag indicating whether or not this to measure the wall and cpu time of this task
uint8_t is_timing_enabled;
// timestamp this task first entered the run queue (TODO: int32 of ms instead?)
uint64_t first_enqueued_at;
// timestamp this task was most recently scheduled to run
uint64_t last_dequeued_at;
// time this task has spent running; updated when it yields
uint64_t cpu_time_ns;
// time between first entering the run queue and being done/failed.
uint64_t wall_time_ns;

// hidden state:
// cached floating point environment
Expand Down
3 changes: 3 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;

// Global *atomic* integer controlling *process-wide* task timing.
extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_timing_enabled;

#define jl_return_address() ((uintptr_t)__builtin_return_address(0))

STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
Expand Down
Loading