Skip to content

Commit

Permalink
Ability to enable/disable task timings globally
Browse files Browse the repository at this point in the history
  • Loading branch information
nickrobinson251 committed Oct 24, 2024
1 parent 1ba7444 commit 7299fe3
Show file tree
Hide file tree
Showing 16 changed files with 144 additions and 34 deletions.
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct JLOptions
heap_size_hint::UInt64
trace_compile_timing::Int8
trim::Int8
task_timing::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
26 changes: 16 additions & 10 deletions base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,7 @@ end

function schedule(t::Task)
# user_time -task-(re)scheduled-> wait_time
if t.first_enqueued_at == 0
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
enq_work(t)
Expand Down Expand Up @@ -1036,7 +1036,7 @@ function schedule(t::Task, @nospecialize(arg); error=false)
# schedule a task to be (re)started with the given value or exception
t._state === task_state_runnable || Base.error("schedule: Task not runnable")
# user_time -task-(re)scheduled-> wait_time
if t.first_enqueued_at == 0
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
if error
Expand Down Expand Up @@ -1107,7 +1107,7 @@ function yieldto(t::Task, @nospecialize(x=nothing))
elseif t._state === task_state_failed
throw(t.result)
end
if t.first_enqueued_at == 0
if t.is_timing_enabled && t.first_enqueued_at == 0
t.first_enqueued_at = time_ns()
end
t.result = x
Expand All @@ -1125,8 +1125,10 @@ function try_yieldto(undo)
ct = current_task()
# scheduler -task-started-> user
# scheduler -task-resumed-> user
# @assert ct.last_dequeued_at == 0
ct.last_dequeued_at = time_ns()
if ct.is_timing_enabled
# @assert ct.last_dequeued_at == 0
ct.last_dequeued_at = time_ns()
end
if ct._isexception
exc = ct.result
ct.result = nothing
Expand Down Expand Up @@ -1211,14 +1213,18 @@ else
end

function record_cpu_time!(t::Task, stopped_at::UInt64=time_ns())
@assert t.last_dequeued_at != 0
t.cpu_time_ns += stopped_at - t.last_dequeued_at
t.last_dequeued_at = 0
if t.is_timing_enabled
@assert t.last_dequeued_at != 0
t.cpu_time_ns += stopped_at - t.last_dequeued_at
t.last_dequeued_at = 0
end
return t
end

function record_wall_time!(t::Task, done_at::UInt64=time_ns())
@assert t.first_enqueued_at != 0
t.wall_time_ns = done_at - t.first_enqueued_at
if t.is_timing_enabled
@assert t.first_enqueued_at != 0
t.wall_time_ns = done_at - t.first_enqueued_at
end
return t
end
24 changes: 24 additions & 0 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ Return the total nanoseconds that the task `t` has spent running.
This metric is only updated when the task yields or completes.
See also [`task_wall_time_ns`](@ref).
Will be `UInt64(0)` if task timings are not enabled.
See [`Base.task_timing`](@ref).
!!! note "This metric is from the Julia scheduler"
A task may be running on an OS thread that is descheduled by the OS
scheduler, this time still counts towards the metric.
Expand All @@ -110,14 +113,35 @@ This is the time since the task entered the run queue until the time at which it
or until the current time if the task has not yet completed.
See also [`task_cpu_time_ns`](@ref).
Will be `UInt64(0)` if task timings are not enabled.
See [`Base.task_timing`](@ref).
!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_wall_time_ns(t::Task)
# TODO: report up til current time if not done? too racy?
# return istaskdone(t) ? t.wall_time_ns : time_ns() - t.first_enqueued_at
return t.wall_time_ns
end

"""
Base.task_timing(::Bool)
Enable or disable the collection of per-task timing information.
Task created when Base.task_timing(true) is in effect will have [`task_cpu_time_ns`](@ref)
and [`task_wall_time_ns`](@ref) timing information available.
"""
function task_timing(b::Bool)
if b
ccall(:jl_task_timing_enable, Cvoid, ())
else
# TODO: prevent decrementing the counter below zero
ccall(:jl_task_timing_disable, Cvoid, ())
end
return nothing
end

"""
Base.gc_live_bytes()
Expand Down
4 changes: 4 additions & 0 deletions doc/man/julia.1
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ If --trace-compile is enabled show how long each took to compile in ms
--trace-dispatch={stderr|name}
Print precompile statements for methods dispatched during execution or save to stderr or a path.

.TP
--task-timing
Enabled the collection of per-task timing metrics.

.TP
-image-codegen
Force generate code in imaging mode
Expand Down
1 change: 1 addition & 0 deletions doc/src/manual/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ The following is a complete list of command-line switches available when launchi
|`--trace-compile={stderr\|name}` |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
|`--trace-compile-timing` |If --trace-compile is enabled show how long each took to compile in ms|
|`--trace-dispatch={stderr\|name}` |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
|`--task-timing` |Enable tasks the collection of per-task timing metrics|
|`--image-codegen` |Force generate code in imaging mode|
|`--permalloc-pkgimg={yes\|no*}` |Copy the data section of package images into memory|
|`--trim={no*\|safe\|unsafe\|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
Expand Down
3 changes: 3 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,9 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
post_boot_hooks();
}

if (jl_options.task_timing) {
jl_atomic_fetch_add(&jl_task_timing_enabled, 1);
}
if (jl_base_module == NULL) {
// nthreads > 1 requires code in Base
jl_atomic_store_relaxed(&jl_n_threads, 1);
Expand Down
27 changes: 27 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,33 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

/**
* @brief Enable per-task timing.
*/
JL_DLLEXPORT void jl_task_timing_enable(void)
{
// Increment the flag to allow reentrant callers.
jl_atomic_fetch_add(&jl_task_timing_enabled, 1);
}

/**
* @brief Disable per-task timing.
*/
JL_DLLEXPORT void jl_task_timing_disable(void)
{
jl_atomic_fetch_add(&jl_task_timing_enabled, -1);
}

// TODO: remove this
JL_DLLEXPORT void jl_is_task_timing_enabled(void)
{
jl_atomic_load_relaxed(&jl_task_timing_enabled);
}
JL_DLLEXPORT void jl_is_compile_timing_enabled(void)
{
jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
}

/**
* @brief Retrieve floating-point environment constants.
*
Expand Down
8 changes: 7 additions & 1 deletion src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // heap-size-hint
0, // trace_compile_timing
0, // trim
0, // task_timing
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -258,13 +259,13 @@ static const char opts_hidden[] =
" --output-bc <name> Generate LLVM bitcode (.bc)\n"
" --output-asm <name> Generate an assembly file (.s)\n"
" --output-incremental={yes|no*} Generate an incremental output file (rather than\n"
" complete)\n"
" --trace-compile={stderr|name} Print precompile statements for methods compiled\n"
" during execution or save to stderr or a path. Methods that\n"
" were recompiled are printed in yellow or with a trailing\n"
" comment if color is not supported\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to\n"
" compile in ms\n"
" --task-timing Enable collection of per-task timing data.\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
" --trim={no*|safe|unsafe|unsafe-warn}\n"
Expand Down Expand Up @@ -296,6 +297,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
opt_task_timing,
opt_math_mode,
opt_worker,
opt_bind_to,
Expand Down Expand Up @@ -375,6 +377,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-compile-timing", no_argument, 0, opt_trace_compile_timing },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
{ "task-timing", no_argument, 0, opt_task_timing },
{ "math-mode", required_argument, 0, opt_math_mode },
{ "handle-signals", required_argument, 0, opt_handle_signals },
// hidden command line options
Expand Down Expand Up @@ -964,6 +967,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
else
jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
break;
case opt_task_timing:
jl_options.task_timing = 1;
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
uint64_t heap_size_hint;
int8_t trace_compile_timing;
int8_t trim;
int8_t task_timing;
} jl_options_t;

#endif
6 changes: 4 additions & 2 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3744,7 +3744,7 @@ void jl_init_types(void) JL_GC_DISABLED
NULL,
jl_any_type,
jl_emptysvec,
jl_perm_symsvec(20,
jl_perm_symsvec(21,
"next",
"queue",
"storage",
Expand All @@ -3761,11 +3761,12 @@ void jl_init_types(void) JL_GC_DISABLED
"sticky",
"_isexception",
"priority",
"is_timing_enabled",
"first_enqueued_at",
"last_dequeued_at",
"cpu_time_ns",
"wall_time_ns"),
jl_svec(20,
jl_svec(21,
jl_any_type,
jl_any_type,
jl_any_type,
Expand All @@ -3782,6 +3783,7 @@ void jl_init_types(void) JL_GC_DISABLED
jl_bool_type,
jl_bool_type,
jl_uint16_type,
jl_bool_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type,
Expand Down
2 changes: 2 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,8 @@ typedef struct _jl_task_t {
// uint8_t padding1;
// multiqueue priority
uint16_t priority;
// bool indicating whether or not this to measure the wall and cpu time of this task
uint8_t is_timing_enabled;
// timestamp this task first entered the run queue (TODO: int32 of ms instead?)
uint64_t first_enqueued_at;
// timestamp this task was most recently scheduled to run
Expand Down
8 changes: 2 additions & 6 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ extern JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start, int is_recompile)
extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;
// Global *atomic* integer controlling *process-wide* task timing.
extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_timing_enabled;

#define jl_return_address() ((uintptr_t)__builtin_return_address(0))

Expand All @@ -295,7 +297,6 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
return a; // identity hashing seems to work well enough here
}


// this is a version of memcpy that preserves atomic memory ordering
// which makes it safe to use for objects that can contain memory references
// without risk of creating pointers out of thin air
Expand Down Expand Up @@ -376,7 +377,6 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
void gc_sweep_sysimg(void);


// pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
static const int jl_gc_sizeclasses[] = {
#ifdef _P64
Expand Down Expand Up @@ -513,7 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
#define GC_MAX_SZCLASS (2032-sizeof(void*))
static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");


// Size does NOT include the type tag!!
STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
{
Expand Down Expand Up @@ -834,7 +833,6 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
size_t world, size_t *min_valid, size_t *max_valid, int *ambig);
JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world);


jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT;
JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
Expand Down Expand Up @@ -1479,7 +1477,6 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
void *(*init_trampoline)(void *tramp, void **nval),
jl_unionall_t *env, jl_value_t **vals);


// Special filenames used to refer to internal julia libraries
#define JL_EXE_LIBNAME ((const char*)1)
#define JL_LIBJULIA_DL_LIBNAME ((const char*)2)
Expand Down Expand Up @@ -1924,7 +1921,6 @@ JL_DLLIMPORT uint64_t jl_getUnwindInfo(uint64_t dwBase);

#pragma GCC visibility pop


#ifdef USE_DTRACE
// Generated file, needs to be searched in include paths so that the builddir
// retains priority
Expand Down
21 changes: 15 additions & 6 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
t->ptls = NULL;
t->world_age = ct->world_age;
t->reentrant_timing = 0;
t->is_timing_enabled = jl_atomic_load_relaxed(&jl_task_timing_enabled) != 0;
t->first_enqueued_at = 0;
t->last_dequeued_at = 0;
t->cpu_time_ns = 0;
Expand Down Expand Up @@ -1249,9 +1250,11 @@ CFI_NORETURN

ct->ctx.started = 1;
// wait_time -task-started-> user_time
assert(ct->first_enqueued_at != 0);
assert(ct->last_dequeued_at == 0);
ct->last_dequeued_at = jl_hrtime();
if (ct->is_timing_enabled) {
assert(ct->first_enqueued_at != 0);
assert(ct->last_dequeued_at == 0);
ct->last_dequeued_at = jl_hrtime();
}
JL_PROBE_RT_START_TASK(ct);
jl_timing_block_task_enter(ct, ptls, NULL);
if (jl_atomic_load_relaxed(&ct->_isexception)) {
Expand Down Expand Up @@ -1597,9 +1600,15 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
ct->reentrant_timing = 0;
ct->cpu_time_ns = 0;
ct->wall_time_ns = 0;
uint64_t now = jl_hrtime();
ct->first_enqueued_at = now;
ct->last_dequeued_at = now;
if (ct->is_timing_enabled) {
uint64_t now = jl_hrtime();
ct->first_enqueued_at = now;
ct->last_dequeued_at = now;
}
else {
ct->first_enqueued_at = 0;
ct->last_dequeued_at = 0;
}
ptls->root_task = ct;
jl_atomic_store_relaxed(&ptls->current_task, ct);
JL_GC_PROMISE_ROOTED(ct);
Expand Down
2 changes: 2 additions & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;

JL_DLLEXPORT _Atomic(uint8_t) jl_task_timing_enabled = 0;

JL_DLLEXPORT void *jl_get_ptls_states(void)
{
// mostly deprecated: use current_task instead
Expand Down
Loading

0 comments on commit 7299fe3

Please sign in to comment.