Skip to content

Commit

Permalink
Add per-Task cpu time metric
Browse files Browse the repository at this point in the history
initialize to zero

make new task field visible from Julia

add start time

fix typo

add test

add a wait

import `LinearAlgebra` for `peakflops`

update last scheduled on task finish as well

no need to update last scheduled in finish

add test

fix test

Add per-task wall-time

Test individual task cpu_time less than wall_time

More tests

Move task timings mostly to julia

Record cpu time on entrance to `wait/yield`

Rename 'scheduled_at' -> '[en/de]queued_at'

Ability to enable/disable task timings globally

fixup whitespace

Enable task timing in test

Enable task timing for root task

Prevent `task_timing(false)` decrementing below zero

Change task-timing flag to require yes/no arg

Rename `timings`->`metrics`, `dequeued`->`started_running`

Update task-state-transition comments

Remove unused debug function

Add NEWS

Add fallback recording of cpu time in `wait()`

Instrument async io

Record task cpu time in `wait()`

Make task metrics flag const and counters atomic

Mark the new APIs experimental

Return task metrics as Int or nothing if disabled

Test metrics updating as expected

More tests
  • Loading branch information
nickrobinson251 committed Oct 31, 2024
1 parent fc4ae84 commit 2b61c72
Show file tree
Hide file tree
Showing 16 changed files with 468 additions and 8 deletions.
70 changes: 70 additions & 0 deletions base/experimental.jl
Original file line number Diff line number Diff line change
Expand Up @@ -368,4 +368,74 @@ adding them to the global method table.
"""
:@MethodTable

### Task metrics

"""
Base.Experimental.task_metrics(::Bool)
Enable or disable the collection of per-task metrics.
A `Task` created when `Base.task_metrics(true)` is in effect will have
[`Base.Experimental.task_cpu_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref)
timing information available.
!!! note
Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option.
"""
function task_metrics(b::Bool)
if b
ccall(:jl_task_metrics_enable, Cvoid, ())
else
ccall(:jl_task_metrics_disable, Cvoid, ())
end
return nothing
end

"""
Base.Experimental.task_cpu_time_ns(t::Task) -> Union{Int, Nothing}
Return the total nanoseconds that the task `t` has spent running.
See also [`Base.Experimental.task_wall_time_ns`](@ref).
Will be `nothing` if task timings are not enabled.
See [`Base.Experimental.task_metrics`](@ref).
!!! note "This metric is from the Julia scheduler"
A task may be running on an OS thread that is descheduled by the OS
scheduler, this time still counts towards the metric.
!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_cpu_time_ns(t::Task)
t.metrics_enabled || return nothing
if t.last_started_running_at == 0
return Int(t.cpu_time_ns)
else
return Int(t.cpu_time_ns + (time_ns() - t.last_started_running_at))
end
end

"""
Base.Experimental.task_wall_time_ns(t::Task) -> Union{Int, Nothing}
Return the total nanoseconds that the task `t` was runnable.
This is the time since the task entered the run queue until the time at which it completed,
or until the current time if the task has not yet completed.
See also [`task_cpu_time_ns`](@ref).
Will be `nothing` if task timings are not enabled.
See [`Base.task_metrics`](@ref).
!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_wall_time_ns(t::Task)
t.metrics_enabled || return nothing
start_at = t.first_enqueued_at
start_at == 0 && return 0
end_at = t.finished_at
end_at == 0 && return Int(time_ns() - start_at)
return Int(end_at - start_at)
end

end # module
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct JLOptions
heap_size_hint::UInt64
trace_compile_timing::Int8
safe_crash_log_file::Ptr{UInt8}
task_metrics::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
52 changes: 50 additions & 2 deletions base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,11 @@ function enq_work(t::Task)
return t
end

schedule(t::Task) = enq_work(t)
function schedule(t::Task)
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
end

"""
schedule(t::Task, [val]; error=false)
Expand Down Expand Up @@ -857,6 +861,8 @@ function schedule(t::Task, @nospecialize(arg); error=false)
t.queue === nothing || Base.error("schedule: Task not runnable")
setfield!(t, :result, arg)
end
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
return t
end
Expand Down Expand Up @@ -888,7 +894,13 @@ A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
immediately yields to `t` before calling the scheduler.
"""
function yield(t::Task, @nospecialize(x=nothing))
(t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
current = current_task()
# [task] user_time -yield-> wait_time
record_cpu_time!(current)
t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
(t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
t.result = x
enq_work(current_task())
set_next_task(t)
Expand All @@ -904,13 +916,17 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
or scheduling in any way. Its use is discouraged.
"""
function yieldto(t::Task, @nospecialize(x=nothing))
# [task] user_time -yield-> wait_time
record_cpu_time!(current_task())
# TODO: these are legacy behaviors; these should perhaps be a scheduler
# state error instead.
if t._state === task_state_done
return x
elseif t._state === task_state_failed
throw(t.result)
end
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = x
set_next_task(t)
return try_yieldto(identity)
Expand All @@ -924,6 +940,11 @@ function try_yieldto(undo)
rethrow()
end
ct = current_task()
# [task] wait_time -(re)started-> user_time
if ct.metrics_enabled
@assert ct.last_started_running_at == 0
@atomic :monotonic ct.last_started_running_at = time_ns()
end
if ct._isexception
exc = ct.result
ct.result = nothing
Expand All @@ -937,6 +958,10 @@ end

# yield to a task, throwing an exception in it
function throwto(t::Task, @nospecialize exc)
# [task] user_time -yield-> wait_time
record_cpu_time!(current_task())
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = exc
t._isexception = true
set_next_task(t)
Expand Down Expand Up @@ -989,6 +1014,8 @@ checktaskempty = Partr.multiq_check_empty
end

function wait()
# [task] user_time -yield-or-done-> wait_time
record_cpu_time!(current_task())
GC.safepoint()
W = workqueue_for(Threads.threadid())
poptask(W)
Expand All @@ -1003,3 +1030,24 @@ if Sys.iswindows()
else
pause() = ccall(:pause, Cvoid, ())
end

# update the `cpu_time_ns` field of `t` to include the time since it last started running.
function record_cpu_time!(t::Task)
if t.metrics_enabled
stopped_at = t.finished_at == 0 ? time_ns() : t.finished_at
@assert t.last_started_running_at != 0
@atomic :monotonic t.cpu_time_ns += stopped_at - t.last_started_running_at
# set to 0 to indicate that the task is not running
@atomic :monotonic t.last_started_running_at = 0
end
return t
end
# if this is the first time `t` has been added to the run queue
# (or the first time it has been unfairly yielded to without being added to the run queue)
# then set the `first_enqueued_at` field to the current time.
function maybe_record_enqueued!(t::Task)
if t.metrics_enabled && t.first_enqueued_at == 0
@atomic :monotonic t.first_enqueued_at = time_ns()
end
return t
end
4 changes: 3 additions & 1 deletion doc/man/julia.1
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ Print precompile statements for methods compiled during execution or save to a p
If --trace-compile is enabled show how long each took to compile in ms

.TP
--task-metrics={yes|no*}
Enable the collection of per-task metrics.

-image-codegen
Force generate code in imaging mode

Expand All @@ -281,6 +284,5 @@ See https://docs.julialang.org/en/v1/manual/environment-variables/
Please report any bugs using the GitHub issue tracker:
https://github.com/julialang/julia/issues?state=open


.SH AUTHORS
Contributors: https://github.com/JuliaLang/julia/graphs/contributors
1 change: 1 addition & 0 deletions doc/src/manual/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ The following is a complete list of command-line switches available when launchi
|`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).|
|`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
|`--track-allocation=@<path>` |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
|`--task-metrics={yes\|no*}` |Enable the collection of per-task metrics|
|`--bug-report=KIND` |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
|`--compile={yes*\|no\|all\|min}` |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
|`--output-o <name>` |Generate an object file (including system image data)|
Expand Down
4 changes: 4 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
#pragma GCC diagnostic ignored "-Wdangling-pointer"
#endif
if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) {
// enable before creating the root task so it gets timings too.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}
// warning: this changes `jl_current_task`, so be careful not to call that from this function
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
#pragma GCC diagnostic pop
Expand Down
22 changes: 22 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,28 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

/**
* @brief Enable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_enable(void)
{
// Increment the flag to allow reentrant callers.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}

/**
* @brief Disable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_disable(void)
{
// Prevent decrementing the counter below zero
uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled);
while (enabled > 0) {
if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1))
break;
}
}

JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
14 changes: 13 additions & 1 deletion src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // heap-size-hint
0, // trace_compile_timing
NULL, // safe_crash_log_file
0, // task_metrics
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -206,7 +207,7 @@ static const char opts_hidden[] =
" --strip-metadata Remove docstrings and source location info from system image\n"
" --strip-ir Remove IR (intermediate representation) of compiled functions\n\n"

// compiler debugging (see the devdocs for tips on using these options)
// compiler debugging and experimental (see the devdocs for tips on using these options)
" --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
" --output-bc <name> Generate LLVM bitcode (.bc)\n"
" --output-asm <name> Generate an assembly file (.s)\n"
Expand All @@ -215,6 +216,7 @@ static const char opts_hidden[] =
" --trace-compile={stderr,name}\n"
" Print precompile statements for methods compiled during execution or save to a path\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to compile in ms\n"
" --task-metrics={yes|no*} Enable collection of per-task timing data.\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
;
Expand All @@ -239,6 +241,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
opt_task_metrics,
opt_math_mode,
opt_worker,
opt_bind_to,
Expand Down Expand Up @@ -316,6 +319,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "polly", required_argument, 0, opt_polly },
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
{ "task-metrics", required_argument, 0, opt_task_metrics },
{ "math-mode", required_argument, 0, opt_math_mode },
{ "handle-signals", required_argument, 0, opt_handle_signals },
// hidden command line options
Expand Down Expand Up @@ -872,6 +876,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
if (jl_options.safe_crash_log_file == NULL)
jl_error("julia: failed to allocate memory for --safe-crash-log-file");
break;
case opt_task_metrics:
if (!strcmp(optarg, "no"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF;
else if (!strcmp(optarg, "yes"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
else
jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
uint64_t heap_size_hint;
int8_t trace_compile_timing;
const char *safe_crash_log_file;
int8_t task_metrics;
} jl_options_t;

#endif
24 changes: 20 additions & 4 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3222,7 +3222,7 @@ void jl_init_types(void) JL_GC_DISABLED
NULL,
jl_any_type,
jl_emptysvec,
jl_perm_symsvec(16,
jl_perm_symsvec(21,
"next",
"queue",
"storage",
Expand All @@ -3238,8 +3238,13 @@ void jl_init_types(void) JL_GC_DISABLED
"_state",
"sticky",
"_isexception",
"priority"),
jl_svec(16,
"priority",
"metrics_enabled",
"first_enqueued_at",
"last_started_running_at",
"cpu_time_ns",
"finished_at"),
jl_svec(21,
jl_any_type,
jl_any_type,
jl_any_type,
Expand All @@ -3255,12 +3260,23 @@ void jl_init_types(void) JL_GC_DISABLED
jl_uint8_type,
jl_bool_type,
jl_bool_type,
jl_uint16_type),
jl_uint16_type,
jl_bool_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type),
jl_emptysvec,
0, 1, 6);
XX(task);
jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
jl_svecset(jl_task_type->types, 0, listt);
// Set field 17 (metrics_enabled) as const
// Set fields 13 (_state) and 18-21 (metric counters) as atomic
const static uint32_t task_constfields[1] = { 0b000010000000000000000 };
const static uint32_t task_atomicfields[1] = { 0b111100001000000000000 };
jl_task_type->name->constfields = task_constfields;
jl_task_type->name->atomicfields = task_atomicfields;

jl_binding_type =
jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
Expand Down
Loading

0 comments on commit 2b61c72

Please sign in to comment.