Skip to content

Commit

Permalink
Add LinuxPerf extension for branch + instruction counts
Browse files Browse the repository at this point in the history
This updates the core BenchmarkTools types to include `instructions` and
`branches` fields. These fields support serialization and all of the usual
stats / judgements via the Trial / TrialEstimate / TrialRatio interface.

If the extension is not available or `perf` is not configured correctly on
your system, these are `NaN`.

This also keeps the serialization format backwards-compatible, reporting any
missing measurements as `NaN`.
  • Loading branch information
topolarity committed Oct 1, 2024
1 parent b9f4c5e commit 21ca9cd
Show file tree
Hide file tree
Showing 15 changed files with 626 additions and 93 deletions.
8 changes: 1 addition & 7 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,14 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6'
- '1.10'
- '1'
- 'nightly'
arch:
- x64
os:
- ubuntu-latest
include:
- version: '1.7'
arch: x64
os: ubuntu-20.04
- version: '1.8'
arch: x64
os: ubuntu-22.04
- version: '1.9'
arch: x64
os: ubuntu-22.04
Expand Down
11 changes: 9 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[weakdeps]
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"

[extensions]
LinuxPerfExt = "LinuxPerf"

[compat]
Aqua = "0.8"
Compat = ">= 4.11.0"
Expand All @@ -22,7 +28,8 @@ Profile = "<0.0.1, 1"
Statistics = "<0.0.1, 1"
Test = "<0.0.1, 1"
UUIDs = "<0.0.1, 1"
julia = "1.6"
julia = "1.9"
LinuxPerf = ">= 0.4"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand All @@ -31,4 +38,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Aqua", "JuliaFormatter", "Statistics", "Test"]
test = ["Aqua", "JuliaFormatter", "Statistics", "Test", "LinuxPerf"]
47 changes: 47 additions & 0 deletions ext/LinuxPerfExt/LinuxPerfExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
module LinuxPerfExt

import BenchmarkTools: PerfInterface
import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType
import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read!

function interface()
let g = try
EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
catch
# If perf is not working on the system, the above constructor will throw an
# ioctl or perf_event_open error (after presenting a warning to the user)
return PerfInterface()
end
close(g)
length(g.fds) != 2 && return PerfInterface()
end

# If we made it here, perf seems to be working on this system
return PerfInterface(;
setup=() ->
let g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
PerfBench(0, EventGroup[g])
end,
start=(bench) -> enable_all!(),
stop=(bench) -> disable_all!(),
# start=(bench) -> enable!(bench),
# stop=(bench) -> disable!(bench),
teardown=(bench) -> close(bench),
read=(bench) -> let g = only(bench.groups)
(N, time_enabled, time_running, insts, branches) = read!(g.leader_io, Vector{UInt64}(undef, 5))
if 2 * time_running <= time_enabled
# enabled less than 50% of the time
# (most likely due to PMU contention with other perf events)
return (NaN, NaN)
else
# account for partially-active measurement
k = time_enabled / time_running
estimated_instructions = Float64(insts) * k
estimated_branches = Float64(branches) * k
return (estimated_instructions, estimated_branches)
end
end,
)
end

end
2 changes: 2 additions & 0 deletions src/BenchmarkTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ export loadparams!
include("trials.jl")

export gctime,
instructions,
branches,
memory,
allocs,
params,
Expand Down
57 changes: 48 additions & 9 deletions src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,24 @@ macro benchmarkable(args...)
end
end

struct PerfInterface
setup::Function
start::Function
stop::Function
read::Function
teardown::Function

function PerfInterface(;
setup=Returns(nothing),
start=Returns(nothing),
stop=Returns(nothing),
read=Returns((NaN, NaN)),
teardown=Returns(nothing),
)
return new(setup, start, stop, read, teardown)
end
end

# `eval` an expression that forcibly defines the specified benchmark at
# top-level in order to allow transfer of locally-scoped variables into
# benchmark scope.
Expand Down Expand Up @@ -553,6 +571,8 @@ function generate_benchmark_definition(
end
)
end
ext = Base.get_extension(BenchmarkTools, :LinuxPerfExt)
LinuxPerf = isnothing(ext) ? PerfInterface() : ext.interface()
return Core.eval(
eval_module,
quote
Expand All @@ -563,17 +583,34 @@ function generate_benchmark_definition(
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
)
$(setup)
__perf_bench = $(LinuxPerf.setup)()
__gcdiff = nothing
__return_val = nothing
__sample_time::Int64 = 0
__sample_instructions::Float64 = 0
__sample_branches::Float64 = 0
__evals = __params.evals
__gc_start = Base.gc_num()
__start_time = time_ns()
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
try
__gc_start = Base.gc_num()
$(LinuxPerf.start)(__perf_bench)
__start_time = time_ns()
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
end
__sample_time = time_ns() - __start_time
$(LinuxPerf.stop)(__perf_bench)
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
__sample_instructions, __sample_branches = $(LinuxPerf.read)(
__perf_bench
)
finally
$(LinuxPerf.teardown)(__perf_bench)
$(teardown)
end
__sample_time = time_ns() - __start_time
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
$(teardown)
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
__instructions = max(__sample_instructions / __evals, 0.0) # may be NaN
__branches = max(__sample_branches / __evals, 0.0) # may be NaN
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
__allocs = Int(
Expand All @@ -585,7 +622,9 @@ function generate_benchmark_definition(
__evals,
),
)
return __time, __gctime, __memory, __allocs, __return_val
return __time,
__instructions, __branches, __gctime, __memory, __allocs,
__return_val
end
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
end,
Expand Down
2 changes: 2 additions & 0 deletions src/groups.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ Base.min(groups::BenchmarkGroup...) = mapvals(min, groups...)
Base.max(groups::BenchmarkGroup...) = mapvals(max, groups...)

Base.time(group::BenchmarkGroup) = mapvals(time, group)
instructions(group::BenchmarkGroup) = mapvals(instructions, group)
branches(group::BenchmarkGroup) = mapvals(branches, group)
gctime(group::BenchmarkGroup) = mapvals(gctime, group)
memory(group::BenchmarkGroup) = mapvals(memory, group)
allocs(group::BenchmarkGroup) = mapvals(allocs, group)
Expand Down
29 changes: 27 additions & 2 deletions src/parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ mutable struct Parameters
gctrial::Bool
gcsample::Bool
time_tolerance::Float64
instruction_tolerance::Float64
branch_tolerance::Float64
memory_tolerance::Float64
end

const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
const DEFAULT_PARAMETERS = Parameters(
5.0, 10000, 1, false, 0, true, false, 0.05, 0.05, 0.05, 0.01
)

function Parameters(;
seconds=DEFAULT_PARAMETERS.seconds,
Expand All @@ -28,6 +32,8 @@ function Parameters(;
gctrial=DEFAULT_PARAMETERS.gctrial,
gcsample=DEFAULT_PARAMETERS.gcsample,
time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
instruction_tolerance=DEFAULT_PARAMETERS.instruction_tolerance,
branch_tolerance=DEFAULT_PARAMETERS.branch_tolerance,
memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
)
return Parameters(
Expand All @@ -39,6 +45,8 @@ function Parameters(;
gctrial,
gcsample,
time_tolerance,
instruction_tolerance,
branch_tolerance,
memory_tolerance,
)
end
Expand All @@ -52,6 +60,8 @@ function Parameters(
gctrial=nothing,
gcsample=nothing,
time_tolerance=nothing,
instruction_tolerance=nothing,
branch_tolerance=nothing,
memory_tolerance=nothing,
)
params = Parameters()
Expand All @@ -63,6 +73,13 @@ function Parameters(
params.gcsample = gcsample != nothing ? gcsample : default.gcsample
params.time_tolerance =
time_tolerance != nothing ? time_tolerance : default.time_tolerance
params.instruction_tolerance = if instruction_tolerance != nothing
instruction_tolerance
else
default.instruction_tolerance
end
params.branch_tolerance =
branch_tolerance != nothing ? branch_tolerance : default.branch_tolerance
params.memory_tolerance =
memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
return params::BenchmarkTools.Parameters
Expand All @@ -76,6 +93,8 @@ function Base.:(==)(a::Parameters, b::Parameters)
a.gctrial == b.gctrial &&
a.gcsample == b.gcsample &&
a.time_tolerance == b.time_tolerance &&
a.instruction_tolerance == b.instruction_tolerance &&
a.branch_tolerance == b.branch_tolerance &&
a.memory_tolerance == b.memory_tolerance
end

Expand All @@ -89,6 +108,8 @@ function Base.copy(p::Parameters)
p.gctrial,
p.gcsample,
p.time_tolerance,
p.instruction_tolerance,
p.branch_tolerance,
p.memory_tolerance,
)
end
Expand All @@ -109,7 +130,11 @@ end

@noinline function overhead_sample(evals)
start_time = time_ns()
for _ in 1:evals
try
for _ in 1:evals
nullfunc()
end
finally
nullfunc()
end
sample_time = time_ns() - start_time
Expand Down
34 changes: 32 additions & 2 deletions src/serialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,38 @@ function recover(x::Vector)
else
xsi = if fn == "evals_set" && !haskey(fields, fn)
false
elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
fields[fn] === nothing
elseif fn in ("instructions", "branches")
# JSON spec doesn't support NaN, so handle it specially here
if !haskey(fields, fn)
if ft === Vector{Float64}
Float64[NaN for _ in length(fields["time"])]
elseif ft === Float64
NaN
else
@assert false
end
else
if ft === Vector{Float64}
Float64[
elem === nothing ? NaN : convert(Float64, elem) for
elem in fields[fn]
]
else
fields[fn] === nothing ? NaN : convert(ft, fields[fn])
end
end
elseif fn == "instruction_tolerance" && !haskey(fields, fn)
DEFAULT_PARAMETERS.instruction_tolerance
elseif fn == "branch_tolerance" && !haskey(fields, fn)
DEFAULT_PARAMETERS.branch_tolerance
elseif fn in (
"seconds",
"overhead",
"time_tolerance",
"instruction_tolerance",
"branch_tolerance",
"memory_tolerance",
) && fields[fn] === nothing
# JSON spec doesn't support Inf
# These fields should all be >= 0, so we can ignore -Inf case
typemax(ft)
Expand Down
Loading

0 comments on commit 21ca9cd

Please sign in to comment.