From 21ca9cd4f1d8077ec94aad77a7c2c56fcc04da1a Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Mon, 30 Sep 2024 20:22:10 +0000
Subject: [PATCH] Add `LinuxPerf` extension for branch + instruction counts

This updates the core BenchmarkTools types to include `instructions` and
`branches` fields. These fields support serialization and all of the usual
stats / judgements via the Trial / TrialEstimate / TrialRatio interface.

If the extension is not available or `perf` is not configured correctly on
your system, these are `NaN`.

This also keeps the serialization format backwards-compatible, reporting any
missing measurements as `NaN`.
---
 .github/workflows/CI.yml         |   8 +-
 Project.toml                     |  11 +-
 ext/LinuxPerfExt/LinuxPerfExt.jl |  47 ++++++
 src/BenchmarkTools.jl            |   2 +
 src/execution.jl                 |  57 +++++--
 src/groups.jl                    |   2 +
 src/parameters.jl                |  29 +++-
 src/serialization.jl             |  34 +++-
 src/trials.jl                    | 258 ++++++++++++++++++++++++++-----
 test/ExecutionTests.jl           |  10 +-
 test/GroupsTests.jl              |  32 ++--
 test/ParametersTests.jl          |   8 +
 test/SerializationTests.jl       |  55 ++++++-
 test/TrialsTests.jl              | 158 ++++++++++++++++---
 test/runtests.jl                 |   8 +
 15 files changed, 626 insertions(+), 93 deletions(-)
 create mode 100644 ext/LinuxPerfExt/LinuxPerfExt.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index b22812a9..1e25821c 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
+          - '1.10'
           - '1'
           - 'nightly'
         arch:
@@ -24,12 +24,6 @@ jobs:
         os:
           - ubuntu-latest
         include:
-          - version: '1.7'
-            arch: x64
-            os: ubuntu-20.04
-          - version: '1.8'
-            arch: x64
-            os: ubuntu-22.04
           - version: '1.9'
             arch: x64
             os: ubuntu-22.04
diff --git a/Project.toml b/Project.toml
index 24139691..f80c690f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,6 +11,12 @@ Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
+[weakdeps]
+LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
+
+[extensions]
+LinuxPerfExt = "LinuxPerf"
+
 [compat]
 Aqua = "0.8"
 Compat = ">= 4.11.0"
@@ -22,7 +28,8 @@ Profile = "<0.0.1, 1"
 Statistics = "<0.0.1, 1"
 Test = "<0.0.1, 1"
 UUIDs = "<0.0.1, 1"
-julia = "1.6"
+julia = "1.9"
+LinuxPerf = ">= 0.4"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
@@ -31,4 +38,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "JuliaFormatter", "Statistics", "Test"]
+test = ["Aqua", "JuliaFormatter", "Statistics", "Test", "LinuxPerf"]
diff --git a/ext/LinuxPerfExt/LinuxPerfExt.jl b/ext/LinuxPerfExt/LinuxPerfExt.jl
new file mode 100644
index 00000000..205bae88
--- /dev/null
+++ b/ext/LinuxPerfExt/LinuxPerfExt.jl
@@ -0,0 +1,47 @@
+module LinuxPerfExt
+
+import BenchmarkTools: PerfInterface
+import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType
+import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read!
+
+function interface()
+    let g = try
+            EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
+        catch
+            # If perf is not working on the system, the above constructor will throw an
+            # ioctl or perf_event_open error (after presenting a warning to the user)
+            return PerfInterface()
+        end
+        close(g)
+        length(g.fds) != 2 && return PerfInterface()
+    end
+
+    # If we made it here, perf seems to be working on this system
+    return PerfInterface(;
+        setup=() ->
+            let g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
+                PerfBench(0, EventGroup[g])
+            end,
+        start=(bench) -> enable_all!(),
+        stop=(bench) -> disable_all!(),
+        # start=(bench) -> enable!(bench),
+        # stop=(bench) -> disable!(bench),
+        teardown=(bench) -> close(bench),
+        read=(bench) -> let g = only(bench.groups)
+            (N, time_enabled, time_running, insts, branches) = read!(g.leader_io, Vector{UInt64}(undef, 5))
+            if 2 * time_running <= time_enabled
+                # enabled less than 50% of the time
+                # (most likely due to PMU contention with other perf events)
+                return (NaN, NaN)
+            else
+                # account for partially-active measurement
+                k = time_enabled / time_running
+		estimated_instructions = Float64(insts) * k
+		estimated_branches     = Float64(branches) * k
+                return (estimated_instructions, estimated_branches)
+            end
+        end,
+    )
+end
+
+end
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
index 37102cbe..8ac42347 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkTools.jl
@@ -25,6 +25,8 @@ export loadparams!
 include("trials.jl")
 
 export gctime,
+    instructions,
+    branches,
     memory,
     allocs,
     params,
diff --git a/src/execution.jl b/src/execution.jl
index a9c3e25b..9bd840f6 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -506,6 +506,24 @@ macro benchmarkable(args...)
     end
 end
 
+struct PerfInterface
+    setup::Function
+    start::Function
+    stop::Function
+    read::Function
+    teardown::Function
+
+    function PerfInterface(;
+        setup=Returns(nothing),
+        start=Returns(nothing),
+        stop=Returns(nothing),
+        read=Returns((NaN, NaN)),
+        teardown=Returns(nothing),
+    )
+        return new(setup, start, stop, read, teardown)
+    end
+end
+
 # `eval` an expression that forcibly defines the specified benchmark at
 # top-level in order to allow transfer of locally-scoped variables into
 # benchmark scope.
@@ -553,6 +571,8 @@ function generate_benchmark_definition(
             end
         )
     end
+    ext = Base.get_extension(BenchmarkTools, :LinuxPerfExt)
+    LinuxPerf = isnothing(ext) ? PerfInterface() : ext.interface()
     return Core.eval(
         eval_module,
         quote
@@ -563,17 +583,34 @@ function generate_benchmark_definition(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
                 $(setup)
+                __perf_bench = $(LinuxPerf.setup)()
+                __gcdiff = nothing
+                __return_val = nothing
+                __sample_time::Int64 = 0
+                __sample_instructions::Float64 = 0
+                __sample_branches::Float64 = 0
                 __evals = __params.evals
-                __gc_start = Base.gc_num()
-                __start_time = time_ns()
-                __return_val = $(invocation)
-                for __iter in 2:__evals
-                    $(invocation)
+                try
+                    __gc_start = Base.gc_num()
+                    $(LinuxPerf.start)(__perf_bench)
+                    __start_time = time_ns()
+                    __return_val = $(invocation)
+                    for __iter in 2:__evals
+                        $(invocation)
+                    end
+                    __sample_time = time_ns() - __start_time
+                    $(LinuxPerf.stop)(__perf_bench)
+                    __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
+                    __sample_instructions, __sample_branches = $(LinuxPerf.read)(
+                        __perf_bench
+                    )
+                finally
+                    $(LinuxPerf.teardown)(__perf_bench)
+                    $(teardown)
                 end
-                __sample_time = time_ns() - __start_time
-                __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
-                $(teardown)
                 __time = max((__sample_time / __evals) - __params.overhead, 0.001)
+                __instructions = max(__sample_instructions / __evals, 0.0) # may be NaN
+                __branches = max(__sample_branches / __evals, 0.0) # may be NaN
                 __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
                 __memory = Int(Base.fld(__gcdiff.allocd, __evals))
                 __allocs = Int(
@@ -585,7 +622,9 @@ function generate_benchmark_definition(
                         __evals,
                     ),
                 )
-                return __time, __gctime, __memory, __allocs, __return_val
+                return __time,
+                __instructions, __branches, __gctime, __memory, __allocs,
+                __return_val
             end
             $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
         end,
diff --git a/src/groups.jl b/src/groups.jl
index c1022a80..7f1aafbd 100644
--- a/src/groups.jl
+++ b/src/groups.jl
@@ -113,6 +113,8 @@ Base.min(groups::BenchmarkGroup...) = mapvals(min, groups...)
 Base.max(groups::BenchmarkGroup...) = mapvals(max, groups...)
 
 Base.time(group::BenchmarkGroup) = mapvals(time, group)
+instructions(group::BenchmarkGroup) = mapvals(instructions, group)
+branches(group::BenchmarkGroup) = mapvals(branches, group)
 gctime(group::BenchmarkGroup) = mapvals(gctime, group)
 memory(group::BenchmarkGroup) = mapvals(memory, group)
 allocs(group::BenchmarkGroup) = mapvals(allocs, group)
diff --git a/src/parameters.jl b/src/parameters.jl
index ff1bc615..f6a0f789 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -14,10 +14,14 @@ mutable struct Parameters
     gctrial::Bool
     gcsample::Bool
     time_tolerance::Float64
+    instruction_tolerance::Float64
+    branch_tolerance::Float64
     memory_tolerance::Float64
 end
 
-const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
+const DEFAULT_PARAMETERS = Parameters(
+    5.0, 10000, 1, false, 0, true, false, 0.05, 0.05, 0.05, 0.01
+)
 
 function Parameters(;
     seconds=DEFAULT_PARAMETERS.seconds,
@@ -28,6 +32,8 @@ function Parameters(;
     gctrial=DEFAULT_PARAMETERS.gctrial,
     gcsample=DEFAULT_PARAMETERS.gcsample,
     time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
+    instruction_tolerance=DEFAULT_PARAMETERS.instruction_tolerance,
+    branch_tolerance=DEFAULT_PARAMETERS.branch_tolerance,
     memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
 )
     return Parameters(
@@ -39,6 +45,8 @@ function Parameters(;
         gctrial,
         gcsample,
         time_tolerance,
+        instruction_tolerance,
+        branch_tolerance,
         memory_tolerance,
     )
 end
@@ -52,6 +60,8 @@ function Parameters(
     gctrial=nothing,
     gcsample=nothing,
     time_tolerance=nothing,
+    instruction_tolerance=nothing,
+    branch_tolerance=nothing,
     memory_tolerance=nothing,
 )
     params = Parameters()
@@ -63,6 +73,13 @@ function Parameters(
     params.gcsample = gcsample != nothing ? gcsample : default.gcsample
     params.time_tolerance =
         time_tolerance != nothing ? time_tolerance : default.time_tolerance
+    params.instruction_tolerance = if instruction_tolerance != nothing
+        instruction_tolerance
+    else
+        default.instruction_tolerance
+    end
+    params.branch_tolerance =
+        branch_tolerance != nothing ? branch_tolerance : default.branch_tolerance
     params.memory_tolerance =
         memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
     return params::BenchmarkTools.Parameters
@@ -76,6 +93,8 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.gctrial == b.gctrial &&
            a.gcsample == b.gcsample &&
            a.time_tolerance == b.time_tolerance &&
+           a.instruction_tolerance == b.instruction_tolerance &&
+           a.branch_tolerance == b.branch_tolerance &&
            a.memory_tolerance == b.memory_tolerance
 end
 
@@ -89,6 +108,8 @@ function Base.copy(p::Parameters)
         p.gctrial,
         p.gcsample,
         p.time_tolerance,
+        p.instruction_tolerance,
+        p.branch_tolerance,
         p.memory_tolerance,
     )
 end
@@ -109,7 +130,11 @@ end
 
 @noinline function overhead_sample(evals)
     start_time = time_ns()
-    for _ in 1:evals
+    try
+        for _ in 1:evals
+            nullfunc()
+        end
+    finally
         nullfunc()
     end
     sample_time = time_ns() - start_time
diff --git a/src/serialization.jl b/src/serialization.jl
index 7bec2c8d..36b83809 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -55,8 +55,38 @@ function recover(x::Vector)
         else
             xsi = if fn == "evals_set" && !haskey(fields, fn)
                 false
-            elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
-                fields[fn] === nothing
+            elseif fn in ("instructions", "branches")
+                # JSON spec doesn't support NaN, so handle it specially here
+                if !haskey(fields, fn)
+                    if ft === Vector{Float64}
+                        Float64[NaN for _ in length(fields["time"])]
+                    elseif ft === Float64
+                        NaN
+                    else
+                        @assert false
+                    end
+                else
+                    if ft === Vector{Float64}
+                        Float64[
+                            elem === nothing ? NaN : convert(Float64, elem) for
+                            elem in fields[fn]
+                        ]
+                    else
+                        fields[fn] === nothing ? NaN : convert(ft, fields[fn])
+                    end
+                end
+            elseif fn == "instruction_tolerance" && !haskey(fields, fn)
+                DEFAULT_PARAMETERS.instruction_tolerance
+            elseif fn == "branch_tolerance" && !haskey(fields, fn)
+                DEFAULT_PARAMETERS.branch_tolerance
+            elseif fn in (
+                "seconds",
+                "overhead",
+                "time_tolerance",
+                "instruction_tolerance",
+                "branch_tolerance",
+                "memory_tolerance",
+            ) && fields[fn] === nothing
                 # JSON spec doesn't support Inf
                 # These fields should all be >= 0, so we can ignore -Inf case
                 typemax(ft)
diff --git a/src/trials.jl b/src/trials.jl
index 67382813..4518de01 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -5,27 +5,53 @@
 mutable struct Trial
     params::Parameters
     times::Vector{Float64}
+    instructions::Vector{Float64}
+    branches::Vector{Float64}
     gctimes::Vector{Float64}
     memory::Int
     allocs::Int
 end
 
-Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int))
+function Trial(params::Parameters)
+    return Trial(
+        params, Float64[], Float64[], Float64[], Float64[], typemax(Int), typemax(Int)
+    )
+end
+
+function eq_including_nan(x::Float64, y::Float64)
+    return x === y
+end
+
+function eq_including_nan(x::Vector{Float64}, y::Vector{Float64})
+    return all(eq_including_nan.(x, y))
+end
 
 function Base.:(==)(a::Trial, b::Trial)
     return a.params == b.params &&
            a.times == b.times &&
+           eq_including_nan(a.instructions, b.instructions) &&
+           eq_including_nan(a.branches, b.branches) &&
            a.gctimes == b.gctimes &&
            a.memory == b.memory &&
            a.allocs == b.allocs
 end
 
 function Base.copy(t::Trial)
-    return Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs)
+    return Trial(
+        copy(t.params),
+        copy(t.times),
+        copy(t.instructions),
+        copy(t.branches),
+        copy(t.gctimes),
+        t.memory,
+        t.allocs,
+    )
 end
 
-function Base.push!(t::Trial, time, gctime, memory, allocs)
+function Base.push!(t::Trial, time, instructions, branches, gctime, memory, allocs)
     push!(t.times, time)
+    push!(t.instructions, instructions)
+    push!(t.branches, branches)
     push!(t.gctimes, gctime)
     memory < t.memory && (t.memory = memory)
     allocs < t.allocs && (t.allocs = allocs)
@@ -34,20 +60,42 @@ end
 
 function Base.deleteat!(t::Trial, i)
     deleteat!(t.times, i)
+    deleteat!(t.instructions, i)
+    deleteat!(t.branches, i)
     deleteat!(t.gctimes, i)
     return t
 end
 
 Base.length(t::Trial) = length(t.times)
 function Base.getindex(t::Trial, i::Number)
-    return push!(Trial(t.params), t.times[i], t.gctimes[i], t.memory, t.allocs)
+    return push!(
+        Trial(t.params),
+        t.times[i],
+        t.instructions[i],
+        t.branches[i],
+        t.gctimes[i],
+        t.memory,
+        t.allocs,
+    )
+end
+function Base.getindex(t::Trial, i)
+    return Trial(
+        t.params,
+        t.times[i],
+        t.instructions[i],
+        t.branches[i],
+        t.gctimes[i],
+        t.memory,
+        t.allocs,
+    )
 end
-Base.getindex(t::Trial, i) = Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs)
 Base.lastindex(t::Trial) = length(t)
 
 function Base.sort!(t::Trial)
     inds = sortperm(t.times)
     t.times = t.times[inds]
+    t.instructions = t.instructions[inds]
+    t.branches = t.branches[inds]
     t.gctimes = t.gctimes[inds]
     return t
 end
@@ -55,6 +103,8 @@ end
 Base.sort(t::Trial) = sort!(copy(t))
 
 Base.time(t::Trial) = time(minimum(t))
+instructions(t::Trial) = instructions(minimum(t))
+branches(t::Trial) = branches(minimum(t))
 gctime(t::Trial) = gctime(minimum(t))
 memory(t::Trial) = t.memory
 allocs(t::Trial) = t.allocs
@@ -95,47 +145,91 @@ end
 mutable struct TrialEstimate
     params::Parameters
     time::Float64
+    instructions::Float64
+    branches::Float64
     gctime::Float64
     memory::Int
     allocs::Int
 end
 
-function TrialEstimate(trial::Trial, t, gct)
-    return TrialEstimate(params(trial), t, gct, memory(trial), allocs(trial))
+function TrialEstimate(trial::Trial, t, instructions, branches, gctime)
+    return TrialEstimate(
+        params(trial), t, instructions, branches, gctime, memory(trial), allocs(trial)
+    )
 end
 
 function Base.:(==)(a::TrialEstimate, b::TrialEstimate)
     return a.params == b.params &&
            a.time == b.time &&
+           eq_including_nan(a.instructions, b.instructions) &&
+           eq_including_nan(a.branches, b.branches) &&
            a.gctime == b.gctime &&
            a.memory == b.memory &&
            a.allocs == b.allocs
 end
 
 function Base.copy(t::TrialEstimate)
-    return TrialEstimate(copy(t.params), t.time, t.gctime, t.memory, t.allocs)
+    return TrialEstimate(
+        copy(t.params), t.time, t.instructions, t.branches, t.gctime, t.memory, t.allocs
+    )
 end
 
 function Base.minimum(trial::Trial)
     i = argmin(trial.times)
-    return TrialEstimate(trial, trial.times[i], trial.gctimes[i])
+    return TrialEstimate(
+        trial, trial.times[i], trial.instructions[i], trial.branches[i], trial.gctimes[i]
+    )
 end
 
 function Base.maximum(trial::Trial)
     i = argmax(trial.times)
-    return TrialEstimate(trial, trial.times[i], trial.gctimes[i])
+    return TrialEstimate(
+        trial, trial.times[i], trial.instructions[i], trial.branches[i], trial.gctimes[i]
+    )
 end
 
 function Statistics.median(trial::Trial)
-    return TrialEstimate(trial, median(trial.times), median(trial.gctimes))
+    return TrialEstimate(
+        trial,
+        median(trial.times),
+        any(!isnan, trial.instructions) ? median(filter(!isnan, trial.instructions)) : NaN,
+        any(!isnan, trial.branches) ? median(filter(!isnan, trial.branches)) : NaN,
+        median(trial.gctimes),
+    )
+end
+function Statistics.mean(trial::Trial)
+    return TrialEstimate(
+        trial,
+        mean(trial.times),
+        mean(filter(!isnan, trial.instructions)),
+        mean(filter(!isnan, trial.branches)),
+        mean(trial.gctimes),
+    )
+end
+function Statistics.var(trial::Trial)
+    return TrialEstimate(
+        trial,
+        var(trial.times),
+        var(filter(!isnan, trial.instructions)),
+        var(filter(!isnan, trial.branches)),
+        var(trial.gctimes),
+    )
+end
+function Statistics.std(trial::Trial)
+    return TrialEstimate(
+        trial,
+        std(trial.times),
+        std(filter(!isnan, trial.instructions)),
+        std(filter(!isnan, trial.branches)),
+        std(trial.gctimes),
+    )
 end
-Statistics.mean(trial::Trial) = TrialEstimate(trial, mean(trial.times), mean(trial.gctimes))
-Statistics.var(trial::Trial) = TrialEstimate(trial, var(trial.times), var(trial.gctimes))
-Statistics.std(trial::Trial) = TrialEstimate(trial, std(trial.times), std(trial.gctimes))
 
 Base.isless(a::TrialEstimate, b::TrialEstimate) = isless(time(a), time(b))
 
 Base.time(t::TrialEstimate) = t.time
+instructions(t::TrialEstimate) = !isnan(t.instructions) ? t.instructions : nothing
+branches(t::TrialEstimate) = !isnan(t.branches) ? t.branches : nothing
 gctime(t::TrialEstimate) = t.gctime
 memory(t::TrialEstimate) = t.memory
 allocs(t::TrialEstimate) = t.allocs
@@ -148,6 +242,8 @@ params(t::TrialEstimate) = t.params
 mutable struct TrialRatio
     params::Parameters
     time::Float64
+    instructions::Float64
+    branches::Float64
     gctime::Float64
     memory::Float64
     allocs::Float64
@@ -156,14 +252,22 @@ end
 function Base.:(==)(a::TrialRatio, b::TrialRatio)
     return a.params == b.params &&
            a.time == b.time &&
+           eq_including_nan(a.instructions, b.instructions) &&
+           eq_including_nan(a.branches, b.branches) &&
            a.gctime == b.gctime &&
            a.memory == b.memory &&
            a.allocs == b.allocs
 end
 
-Base.copy(t::TrialRatio) = TrialRatio(copy(t.params), t.time, t.gctime, t.memory, t.allocs)
+function Base.copy(t::TrialRatio)
+    return TrialRatio(
+        copy(t.params), t.time, t.instructions, t.branches, t.gctime, t.memory, t.allocs
+    )
+end
 
 Base.time(t::TrialRatio) = t.time
+instructions(t::TrialRatio) = !isnan(t.instructions) ? t.instructions : nothing
+branches(t::TrialRatio) = !isnan(t.branches) ? t.branches : nothing
 gctime(t::TrialRatio) = t.gctime
 memory(t::TrialRatio) = t.memory
 allocs(t::TrialRatio) = t.allocs
@@ -180,9 +284,21 @@ function ratio(a::TrialEstimate, b::TrialEstimate)
     ttol = max(params(a).time_tolerance, params(b).time_tolerance)
     mtol = max(params(a).memory_tolerance, params(b).memory_tolerance)
     p = Parameters(params(a); time_tolerance=ttol, memory_tolerance=mtol)
+    instruction_ratio = if (instructions(a) === nothing || instructions(b) === nothing)
+        NaN
+    else
+        ratio(instructions(a), instructions(b))
+    end
+    branch_ratio = if (branches(a) === nothing || branches(b) === nothing)
+        NaN
+    else
+        ratio(branches(a), branches(b))
+    end
     return TrialRatio(
         p,
         ratio(time(a), time(b)),
+        instruction_ratio,
+        branch_ratio,
         ratio(gctime(a), gctime(b)),
         ratio(memory(a), memory(b)),
         ratio(allocs(a), allocs(b)),
@@ -198,22 +314,38 @@ gcratio(t::TrialEstimate) = ratio(gctime(t), time(t))
 struct TrialJudgement
     ratio::TrialRatio
     time::Symbol
+    instructions::Symbol
+    branches::Symbol
     memory::Symbol
 end
 
 function TrialJudgement(r::TrialRatio)
     ttol = params(r).time_tolerance
+    itol = params(r).instruction_tolerance
+    btol = params(r).branch_tolerance
     mtol = params(r).memory_tolerance
-    return TrialJudgement(r, judge(time(r), ttol), judge(memory(r), mtol))
+    return TrialJudgement(
+        r,
+        judge(time(r), ttol),
+        judge(instructions(r), itol),
+        judge(branches(r), btol),
+        judge(memory(r), mtol),
+    )
 end
 
 function Base.:(==)(a::TrialJudgement, b::TrialJudgement)
-    return a.ratio == b.ratio && a.time == b.time && a.memory == b.memory
+    return a.ratio == b.ratio &&
+           a.time == b.time &&
+           a.instructions == b.instructions &&
+           a.branches == b.branches &&
+           a.memory == b.memory
 end
 
 Base.copy(t::TrialJudgement) = TrialJudgement(copy(t.params), t.time, t.memory)
 
 Base.time(t::TrialJudgement) = t.time
+instructions(t::TrialJudgement) = t.instructions
+branches(t::TrialJudgement) = t.branches
 memory(t::TrialJudgement) = t.memory
 ratio(t::TrialJudgement) = t.ratio
 params(t::TrialJudgement) = params(ratio(t))
@@ -226,8 +358,12 @@ function judge(r::TrialRatio; kwargs...)
     return TrialJudgement(newr)
 end
 
+judge(ratio::Nothing, tolerance::Float64) = :unknown
+
 function judge(ratio::Real, tolerance::Float64)
-    if isnan(ratio) || (ratio - tolerance) > 1.0
+    if isnan(ratio)
+        return :unknown
+    elseif (ratio - tolerance) > 1.0
         return :regression
     elseif (ratio + tolerance) < 1.0
         return :improvement
@@ -237,21 +373,31 @@ function judge(ratio::Real, tolerance::Float64)
 end
 
 isimprovement(f, t::TrialJudgement) = f(t) == :improvement
-isimprovement(t::TrialJudgement) = isimprovement(time, t) || isimprovement(memory, t)
+function isimprovement(t::TrialJudgement)
+    return isimprovement(time, t) ||
+           isimprovement(instructions, t) ||
+           isimprovement(branches, t) ||
+           isimprovement(memory, t)
+end
 
 isregression(f, t::TrialJudgement) = f(t) == :regression
-isregression(t::TrialJudgement) = isregression(time, t) || isregression(memory, t)
-
-isinvariant(f, t::TrialJudgement) = f(t) == :invariant
-isinvariant(t::TrialJudgement) = isinvariant(time, t) && isinvariant(memory, t)
-
-const colormap = (regression=:red, improvement=:green, invariant=:normal)
+function isregression(t::TrialJudgement)
+    return isregression(time, t) ||
+           isregression(instructions, t) ||
+           isregression(branches, t) ||
+           isregression(memory, t)
+end
 
-printtimejudge(io, t::TrialJudgement) = printstyled(io, time(t); color=colormap[time(t)])
-function printmemoryjudge(io, t::TrialJudgement)
-    return printstyled(io, memory(t); color=colormap[memory(t)])
+isinvariant(f, t::TrialJudgement) = f(t) == :invariant || f(t) == :unknown
+function isinvariant(t::TrialJudgement)
+    return isinvariant(time, t) &&
+           isinvariant(instructions, t) &&
+           isinvariant(branches, t) &&
+           isinvariant(memory, t)
 end
 
+const colormap = (regression=:red, improvement=:green, invariant=:normal, unknown=:gray)
+
 ###################
 # Pretty Printing #
 ###################
@@ -289,6 +435,19 @@ function prettymemory(b)
     return string(@sprintf("%.2f", value), " ", units)
 end
 
+function prettycount(b; base_unit="")
+    if b < 1000
+        value, units = b, base_unit
+    elseif b < 1000^2
+        value, units = b / 1000, "K" * base_unit
+    elseif b < 1000^3
+        value, units = b / 1000^2, "M" * base_unit
+    else
+        value, units = b / 1000^3, "G" * base_unit
+    end
+    return string(@sprintf("%.2f", value), " ", units)
+end
+
 function withtypename(f, io, t)
     needtype = get(io, :typeinfo, Nothing) !== typeof(t)
     if needtype
@@ -339,11 +498,12 @@ _summary(io, t, args...) = withtypename(() -> print(io, args...), io, t)
 Base.summary(io::IO, t::Trial) = _summary(io, t, prettytime(time(t)))
 Base.summary(io::IO, t::TrialEstimate) = _summary(io, t, prettytime(time(t)))
 Base.summary(io::IO, t::TrialRatio) = _summary(io, t, prettypercent(time(t)))
-Base.summary(io::IO, t::TrialJudgement) =
+function Base.summary(io::IO, t::TrialJudgement)
     withtypename(io, t) do
         print(io, prettydiff(time(ratio(t))), " => ")
-        printtimejudge(io, t)
+        printstyled(io, time(t); color=colormap[time(t)])
     end
+end
 
 _show(io, t) =
     if get(io, :compact, true)
@@ -382,6 +542,8 @@ function Base.show(io::IO, ::MIME"text/plain", t::Trial)
 
     perm = sortperm(t.times)
     times = t.times[perm]
+    instructions = t.instructions[perm]
+    branches = t.branches[perm]
     gctimes = t.gctimes[perm]
 
     if length(t) > 1
@@ -565,9 +727,19 @@ function Base.show(io::IO, ::MIME"text/plain", t::Trial)
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate)
-    println(io, "BenchmarkTools.TrialEstimate: ")
+    println(io, "BenchmarkTools.TrialEstimate:")
     pad = get(io, :pad, "")
     println(io, pad, "  time:             ", prettytime(time(t)))
+    if instructions(t) !== nothing
+        println(
+            io, pad, "  instructions:     ", prettycount(instructions(t); base_unit="insts")
+        )
+    end
+    if branches(t) !== nothing
+        println(
+            io, pad, "  branches:         ", prettycount(branches(t); base_unit="branches")
+        )
+    end
     println(
         io,
         pad,
@@ -585,6 +757,12 @@ function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio)
     println(io, "BenchmarkTools.TrialRatio: ")
     pad = get(io, :pad, "")
     println(io, pad, "  time:             ", time(t))
+    if instructions(t) !== nothing
+        println(io, pad, "  instructions:     ", instructions(t))
+    end
+    if branches(t) !== nothing
+        println(io, pad, "  branches:         ", branches(t))
+    end
     println(io, pad, "  gctime:           ", gctime(t))
     println(io, pad, "  memory:           ", memory(t))
     return print(io, pad, "  allocs:           ", allocs(t))
@@ -593,10 +771,20 @@ end
 function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement)
     println(io, "BenchmarkTools.TrialJudgement: ")
     pad = get(io, :pad, "")
-    print(io, pad, "  time:   ", prettydiff(time(ratio(t))), " => ")
-    printtimejudge(io, t)
+    print(io, pad, "  time:         ", prettydiff(time(ratio(t))), " => ")
+    printstyled(io, time(t); color=colormap[time(t)])
     println(io, " (", prettypercent(params(t).time_tolerance), " tolerance)")
-    print(io, pad, "  memory: ", prettydiff(memory(ratio(t))), " => ")
-    printmemoryjudge(io, t)
+    if instructions(t) !== :unknown
+        print(io, pad, "  instructions: ", prettydiff(instructions(ratio(t))), " => ")
+        printstyled(io, instructions(t); color=colormap[instructions(t)])
+        println(io, " (", prettypercent(params(t).instruction_tolerance), " tolerance)")
+    end
+    if branches(t) !== :unknown
+        print(io, pad, "  branches:     ", prettydiff(instructions(ratio(t))), " => ")
+        printstyled(io, branches(t); color=colormap[branches(t)])
+        println(io, " (", prettypercent(params(t).instruction_tolerance), " tolerance)")
+    end
+    print(io, pad, "  memory:       ", prettydiff(memory(ratio(t))), " => ")
+    printstyled(io, memory(t); color=colormap[memory(t)])
     return println(io, " (", prettypercent(params(t).memory_tolerance), " tolerance)")
 end
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 8dea039c..b9b79c55 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -261,7 +261,13 @@ for (tf, rex1, rex2) in (
     ioctx = IOContext(io, :logbins => tf)
     # A flat distribution won't trigger log by default
     b = BenchmarkTools.Trial(
-        BenchmarkTools.DEFAULT_PARAMETERS, 0.001 * (1:100) * 1e9, zeros(100), 0, 0
+        BenchmarkTools.DEFAULT_PARAMETERS,
+        0.001 * (1:100) * 1e9,
+        zeros(100),
+        zeros(100),
+        zeros(100),
+        0,
+        0,
     )
     show(ioctx, MIME("text/plain"), b)
     str = String(take!(io))
@@ -273,6 +279,8 @@ for (tf, rex1, rex2) in (
         BenchmarkTools.DEFAULT_PARAMETERS,
         t / sum(t) * 1e9 * BenchmarkTools.DEFAULT_PARAMETERS.seconds,
         zeros(100),
+        zeros(100),
+        zeros(100),
         0,
         0,
     )
diff --git a/test/GroupsTests.jl b/test/GroupsTests.jl
index 8a7a5dcd..2b6b9432 100644
--- a/test/GroupsTests.jl
+++ b/test/GroupsTests.jl
@@ -15,11 +15,13 @@ seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
 
 g1 = BenchmarkGroup(["1", "2"])
 
-t1a = TrialEstimate(Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 32, 1, 2, 3)
+t1a = TrialEstimate(
+    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 32, 0, 0, 1, 2, 3
+)
 t1b = TrialEstimate(
-    Parameters(; time_tolerance=0.40, memory_tolerance=0.40), 4123, 123, 43, 9
+    Parameters(; time_tolerance=0.40, memory_tolerance=0.40), 4123, 0, 0, 123, 43, 9
 )
-tc = TrialEstimate(Parameters(; time_tolerance=1.0, memory_tolerance=1.0), 1, 1, 1, 1)
+tc = TrialEstimate(Parameters(; time_tolerance=1.0, memory_tolerance=1.0), 1, 0, 0, 1, 1, 1)
 
 g1["a"] = t1a
 g1["b"] = t1b
@@ -30,16 +32,26 @@ g1similar = similar(g1)
 
 g2 = BenchmarkGroup(["2", "3"])
 
-t2a = TrialEstimate(Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 323, 1, 2, 3)
+t2a = TrialEstimate(
+    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 323, 0, 0, 1, 2, 3
+)
 t2b = TrialEstimate(
-    Parameters(; time_tolerance=0.40, memory_tolerance=0.40), 1002, 123, 43, 9
+    Parameters(; time_tolerance=0.40, memory_tolerance=0.40), 1002, 0, 0, 123, 43, 9
 )
 
 g2["a"] = t2a
 g2["b"] = t2b
 g2["c"] = tc
 
-trial = BenchmarkTools.Trial(Parameters(), [1, 2, 5], [0, 1, 1], 3, 56)
+trial = BenchmarkTools.Trial(
+    Parameters(),
+    Float64[1, 2, 5],
+    Float64[0, 0, 0],
+    Float64[0, 0, 0],
+    Float64[0, 1, 1],
+    3,
+    56,
+)
 
 gtrial = BenchmarkGroup([], Dict("t" => trial))
 
@@ -151,10 +163,10 @@ groupsa["g1"] = g1
 groupsa["g2"] = g2
 g3a = addgroup!(groupsa, "g3", ["3", "4"])
 g3a["c"] = TrialEstimate(
-    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 6341, 23, 41, 536
+    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 6341, 0, 0, 23, 41, 536
 )
 g3a["d"] = TrialEstimate(
-    Parameters(; time_tolerance=0.13, memory_tolerance=0.13), 12341, 3013, 2, 150
+    Parameters(; time_tolerance=0.13, memory_tolerance=0.13), 12341, 0, 0, 3013, 2, 150
 )
 
 groups_copy = copy(groupsa)
@@ -165,10 +177,10 @@ groupsb["g1"] = g1
 groupsb["g2"] = g2
 g3b = addgroup!(groupsb, "g3", ["3", "4"])
 g3b["c"] = TrialEstimate(
-    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 1003, 23, 41, 536
+    Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 1003, 0, 0, 23, 41, 536
 )
 g3b["d"] = TrialEstimate(
-    Parameters(; time_tolerance=0.23, memory_tolerance=0.23), 25341, 3013, 2, 150
+    Parameters(; time_tolerance=0.23, memory_tolerance=0.23), 25341, 0, 0, 3013, 2, 150
 )
 
 groupstrial = BenchmarkGroup()
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index 9fa07027..8736946d 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -25,11 +25,15 @@ p = Parameters(;
     overhead=42,
     gcsample=false,
     time_tolerance=0.043,
+    instruction_tolerance=0.030,
+    branch_tolerance=0.01,
     memory_tolerance=0.15,
 )
 oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
 oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
 old_time_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance
+old_instruction_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.instruction_tolerance
+old_branch_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.branch_tolerance
 old_memory_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance
 oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
 oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
@@ -38,6 +42,8 @@ oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
 BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
+BenchmarkTools.DEFAULT_PARAMETERS.instruction_tolerance = p.instruction_tolerance
+BenchmarkTools.DEFAULT_PARAMETERS.branch_tolerance = p.branch_tolerance
 BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = p.memory_tolerance
 BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
@@ -48,6 +54,8 @@ BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = oldgctrial
 BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = old_time_tolerance
+BenchmarkTools.DEFAULT_PARAMETERS.instruction_tolerance = old_instruction_tolerance
+BenchmarkTools.DEFAULT_PARAMETERS.branch_tolerance = old_branch_tolerance
 BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = old_memory_tolerance
 BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
diff --git a/test/SerializationTests.jl b/test/SerializationTests.jl
index e24314a1..e54fafc1 100644
--- a/test/SerializationTests.jl
+++ b/test/SerializationTests.jl
@@ -6,7 +6,8 @@ using Test
 function eq(x::T, y::T) where {T<:Union{values(BenchmarkTools.SUPPORTED_TYPES)...}}
     return all(i -> eq(getfield(x, i), getfield(y, i)), 1:fieldcount(T))
 end
-eq(x::T, y::T) where {T} = isapprox(x, y)
+eq(x::Vector{Float64}, y::Vector{Float64}) = all(eq.(x, y))
+eq(x::T, y::T) where {T} = (x === y) || isapprox(x, y)
 
 function withtempdir(f::Function)
     d = mktempdir()
@@ -103,18 +104,26 @@ end
     json_string = "[{\"Julia\":\"1.11.0-DEV.1116\",\"BenchmarkTools\":\"1.4.0\"},[[\"Parameters\",{\"gctrial\":true,\"time_tolerance\":0.05,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":5.0,\"overhead\":0.0,\"memory_tolerance\":0.01}]]]"
     json_io = IOBuffer(json_string)
 
-    @test BenchmarkTools.load(json_io) ==
-        [BenchmarkTools.Parameters(5.0, 10000, 1, false, 0.0, true, false, 0.05, 0.01)]
+    @test BenchmarkTools.load(json_io) == [
+        BenchmarkTools.Parameters(
+            5.0, 10000, 1, false, 0.0, true, false, 0.05, 0.05, 0.05, 0.01
+        ),
+    ]
 
     json_string = "[{\"Julia\":\"1.11.0-DEV.1116\",\"BenchmarkTools\":\"1.4.0\"},[[\"Parameters\",{\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":true,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":5.0,\"overhead\":0.0,\"memory_tolerance\":0.01}]]]"
     json_io = IOBuffer(json_string)
 
-    @test BenchmarkTools.load(json_io) ==
-        [BenchmarkTools.Parameters(5.0, 10000, 1, true, 0.0, true, false, 0.05, 0.01)]
+    @test BenchmarkTools.load(json_io) == [
+        BenchmarkTools.Parameters(
+            5.0, 10000, 1, true, 0.0, true, false, 0.05, 0.05, 0.05, 0.01
+        ),
+    ]
 end
 
-@testset "Inf in Paramters struct" begin
-    params = BenchmarkTools.Parameters(Inf, 10000, 1, false, Inf, true, false, Inf, Inf)
+@testset "Inf in Parameters struct" begin
+    params = BenchmarkTools.Parameters(
+        Inf, 10000, 1, false, Inf, true, false, Inf, Inf, Inf, Inf
+    )
 
     io = IOBuffer()
     BenchmarkTools.save(io, params)
@@ -124,4 +133,36 @@ end
     @test BenchmarkTools.load(json_io) == [params]
 end
 
+@testset "NaN in Trial" begin
+    trial1 = BenchmarkTools.Trial(
+        BenchmarkTools.Parameters(), [0.49, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], 2, 1
+    )
+    trial2 = BenchmarkTools.Trial(
+        BenchmarkTools.Parameters(), [0.49, 0.0], [NaN, NaN], [NaN, NaN], [0.0, 0.0], 2, 1
+    )
+
+    io = IOBuffer()
+    BenchmarkTools.save(io, trial1, trial2)
+    json_string = String(take!(io))
+    json_io = IOBuffer(json_string)
+
+    @test BenchmarkTools.load(json_io) == [trial1, trial2]
+end
+
+@testset "NaN in TrialEstimate" begin
+    trial_estimate1 = BenchmarkTools.TrialEstimate(
+        BenchmarkTools.Parameters(), 0.49, 0, 0, 0.0, 2, 1
+    )
+    trial_estimate2 = BenchmarkTools.TrialEstimate(
+        BenchmarkTools.Parameters(), 0.49, NaN, NaN, 0.0, 2, 1
+    )
+
+    io = IOBuffer()
+    BenchmarkTools.save(io, trial_estimate1, trial_estimate2)
+    json_string = String(take!(io))
+    json_io = IOBuffer(json_string)
+
+    @test BenchmarkTools.load(json_io) == [trial_estimate1, trial_estimate2]
+end
+
 end # module
diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
index c7ab8396..88a2285c 100644
--- a/test/TrialsTests.jl
+++ b/test/TrialsTests.jl
@@ -8,14 +8,14 @@ using Test
 #########
 
 trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2))
-push!(trial1, 2, 1, 4, 5)
-push!(trial1, 21, 0, 41, 51)
+push!(trial1, 2, 15, 2, 1, 4, 5)
+push!(trial1, 21, 17, 3, 0, 41, 51)
 
 trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(; time_tolerance=0.15))
-push!(trial2, 21, 0, 41, 51)
-push!(trial2, 2, 1, 4, 5)
+push!(trial2, 21, 17, 3, 0, 41, 51)
+push!(trial2, 2, 15, 2, 1, 4, 5)
 
-push!(trial2, 21, 0, 41, 51)
+push!(trial2, 21, 17, 3, 0, 41, 51)
 @test length(trial2) == 3
 deleteat!(trial2, 3)
 @test length(trial1) == length(trial2) == 2
@@ -25,6 +25,8 @@ sort!(trial2)
 @test trial2.params ==
     BenchmarkTools.Parameters(; time_tolerance=trial2.params.time_tolerance)
 @test trial1.times == trial2.times == [2.0, 21.0]
+@test trial1.instructions == trial2.instructions == [15.0, 17.0]
+@test trial1.branches == trial2.branches == [2.0, 3.0]
 @test trial1.gctimes == trial2.gctimes == [1.0, 0.0]
 @test trial1.memory == trial2.memory == 4
 @test trial1.allocs == trial2.allocs == 5
@@ -34,10 +36,12 @@ trial2.params = trial1.params
 @test trial1 == trial2
 
 @test trial1[2] ==
-    push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 0, 4, 5)
+    push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(; evals=2)), 21, 17, 3, 0, 4, 5)
 @test trial1[1:end] == trial1
 
 @test time(trial1) == time(trial2) == 2.0
+@test instructions(trial1) == instructions(trial2) == 15.0
+@test branches(trial1) == branches(trial2) == 2.0
 @test gctime(trial1) == gctime(trial2) == 1.0
 @test memory(trial1) == memory(trial2) == trial1.memory
 @test allocs(trial1) == allocs(trial2) == trial1.allocs
@@ -45,7 +49,13 @@ trial2.params = trial1.params
 
 # outlier trimming
 trial3 = BenchmarkTools.Trial(
-    BenchmarkTools.Parameters(), [1, 2, 3, 10, 11], [1, 1, 1, 1, 1], 1, 1
+    BenchmarkTools.Parameters(),
+    [1, 2, 3, 10, 11],
+    [0, 0, 0, 0, 0],
+    [0, 0, 0, 0, 0],
+    [1, 1, 1, 1, 1],
+    1,
+    1,
 )
 
 trimtrial3 = rmskew(trial3)
@@ -61,11 +71,11 @@ rmskew!(trial3)
 randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
 
 for _ in 1:40
-    push!(randtrial, rand(1:20), 1, 1, 1)
+    push!(randtrial, rand(1:20), 1, 1, 1, 1, 1)
 end
 
 while mean(randtrial) <= median(randtrial)
-    push!(randtrial, rand(10:20), 1, 1, 1)
+    push!(randtrial, rand(10:20), 1, 1, 1, 1, 1)
 end
 
 rmskew!(randtrial)
@@ -78,6 +88,8 @@ tstd = std(randtrial)
 tmax = maximum(randtrial)
 
 @test time(tmin) == time(randtrial)
+@test instructions(tmin) == instructions(randtrial)
+@test branches(tmin) == branches(randtrial)
 @test gctime(tmin) == gctime(randtrial)
 @test memory(tmin) ==
     memory(tmed) ==
@@ -117,14 +129,16 @@ x, y = rand(randrange), rand(randrange)
 @test ratio(0.0, 0.0) == 1.0
 
 ta = BenchmarkTools.TrialEstimate(
-    BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int)
+    BenchmarkTools.Parameters(), rand(), rand(), rand(), rand(), rand(Int), rand(Int)
 )
 tb = BenchmarkTools.TrialEstimate(
-    BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int)
+    BenchmarkTools.Parameters(), rand(), rand(), rand(), rand(), rand(Int), rand(Int)
 )
 tr = ratio(ta, tb)
 
 @test time(tr) == ratio(time(ta), time(tb))
+@test instructions(tr) == ratio(instructions(ta), instructions(tb))
+@test branches(tr) == ratio(branches(ta), branches(tb))
 @test gctime(tr) == ratio(gctime(ta), gctime(tb))
 @test memory(tr) == ratio(memory(ta), memory(tb))
 @test allocs(tr) == ratio(allocs(ta), allocs(tb))
@@ -133,15 +147,46 @@ tr = ratio(ta, tb)
 @test BenchmarkTools.gcratio(ta) == ratio(gctime(ta), time(ta))
 @test BenchmarkTools.gcratio(tb) == ratio(gctime(tb), time(tb))
 
+ta_nan = BenchmarkTools.TrialEstimate(
+    BenchmarkTools.Parameters(), rand(), NaN, NaN, rand(), rand(Int), rand(Int)
+)
+tb_nan = BenchmarkTools.TrialEstimate(
+    BenchmarkTools.Parameters(), rand(), NaN, NaN, rand(), rand(Int), rand(Int)
+)
+tr_nan = ratio(ta_nan, tb_nan)
+
+@test time(tr_nan) == ratio(time(ta_nan), time(tb_nan))
+@test instructions(tr_nan) === nothing
+@test branches(tr_nan) === nothing
+@test gctime(tr_nan) == ratio(gctime(ta_nan), gctime(tb_nan))
+@test memory(tr_nan) == ratio(memory(ta_nan), memory(tb_nan))
+@test allocs(tr_nan) == ratio(allocs(ta_nan), allocs(tb_nan))
+@test params(tr_nan) == params(ta_nan) == params(tb_nan)
+
+@test BenchmarkTools.gcratio(ta_nan) == ratio(gctime(ta_nan), time(ta_nan))
+@test BenchmarkTools.gcratio(tb_nan) == ratio(gctime(tb_nan), time(tb_nan))
+
 ##################
 # TrialJudgement #
 ##################
 
 ta = BenchmarkTools.TrialEstimate(
-    BenchmarkTools.Parameters(; time_tolerance=0.50, memory_tolerance=0.50), 0.49, 0.0, 2, 1
+    BenchmarkTools.Parameters(; time_tolerance=0.50, memory_tolerance=0.50),
+    0.49,
+    0.49,
+    0.49,
+    0.0,
+    2,
+    1,
 )
 tb = BenchmarkTools.TrialEstimate(
-    BenchmarkTools.Parameters(; time_tolerance=0.05, memory_tolerance=0.05), 1.00, 0.0, 1, 1
+    BenchmarkTools.Parameters(; time_tolerance=0.05, memory_tolerance=0.05),
+    1.00,
+    1.00,
+    1.00,
+    0.0,
+    1,
+    1,
 )
 tr = ratio(ta, tb)
 tj_ab = judge(ta, tb)
@@ -149,15 +194,32 @@ tj_r = judge(tr)
 
 @test ratio(tj_ab) == ratio(tj_r) == tr
 @test time(tj_ab) == time(tj_r) == :improvement
+@test instructions(tj_ab) == instructions(tj_r) == :improvement
+@test branches(tj_ab) == branches(tj_r) == :improvement
 @test memory(tj_ab) == memory(tj_r) == :regression
 @test tj_ab == tj_r
 
-tj_ab_2 = judge(ta, tb; time_tolerance=2.0, memory_tolerance=2.0)
-tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
+tj_ab_2 = judge(
+    ta,
+    tb;
+    time_tolerance=2.0,
+    instruction_tolerance=2.0,
+    branch_tolerance=2.0,
+    memory_tolerance=2.0,
+)
+tj_r_2 = judge(
+    tr;
+    time_tolerance=2.0,
+    instruction_tolerance=2.0,
+    branch_tolerance=2.0,
+    memory_tolerance=2.0,
+)
 
 @test tj_ab_2 == tj_r_2
 @test ratio(tj_ab_2) == ratio(tj_r_2)
 @test time(tj_ab_2) == time(tj_r_2) == :invariant
+@test instructions(tj_ab_2) == instructions(tj_r_2) == :invariant
+@test branches(tj_ab_2) == branches(tj_r_2) == :invariant
 @test memory(tj_ab_2) == memory(tj_r_2) == :invariant
 
 @test !(isinvariant(tj_ab))
@@ -170,6 +232,16 @@ tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
 @test isinvariant(time, tj_ab_2)
 @test isinvariant(time, tj_r_2)
 
+@test !(isinvariant(instructions, tj_ab))
+@test !(isinvariant(instructions, tj_r))
+@test isinvariant(instructions, tj_ab_2)
+@test isinvariant(instructions, tj_r_2)
+
+@test !(isinvariant(branches, tj_ab))
+@test !(isinvariant(branches, tj_r))
+@test isinvariant(branches, tj_ab_2)
+@test isinvariant(branches, tj_r_2)
+
 @test !(isinvariant(memory, tj_ab))
 @test !(isinvariant(memory, tj_r))
 @test isinvariant(memory, tj_ab_2)
@@ -185,6 +257,16 @@ tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
 @test !(isregression(time, tj_ab_2))
 @test !(isregression(time, tj_r_2))
 
+@test !(isregression(instructions, tj_ab))
+@test !(isregression(instructions, tj_r))
+@test !(isregression(instructions, tj_ab_2))
+@test !(isregression(instructions, tj_r_2))
+
+@test !(isregression(branches, tj_ab))
+@test !(isregression(branches, tj_r))
+@test !(isregression(branches, tj_ab_2))
+@test !(isregression(branches, tj_r_2))
+
 @test isregression(memory, tj_ab)
 @test isregression(memory, tj_r)
 @test !(isregression(memory, tj_ab_2))
@@ -200,6 +282,16 @@ tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
 @test !(isimprovement(time, tj_ab_2))
 @test !(isimprovement(time, tj_r_2))
 
+@test isimprovement(instructions, tj_ab)
+@test isimprovement(instructions, tj_r)
+@test !(isimprovement(instructions, tj_ab_2))
+@test !(isimprovement(instructions, tj_r_2))
+
+@test isimprovement(branches, tj_ab)
+@test isimprovement(branches, tj_r)
+@test !(isimprovement(branches, tj_ab_2))
+@test !(isimprovement(branches, tj_r_2))
+
 @test !(isimprovement(memory, tj_ab))
 @test !(isimprovement(memory, tj_r))
 @test !(isimprovement(memory, tj_ab_2))
@@ -222,6 +314,13 @@ tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
 @test BenchmarkTools.prettytime(999_999_999) == "1000.000 ms"
 @test BenchmarkTools.prettytime(1_000_000_000) == "1.000 s"
 
+@test BenchmarkTools.prettycount(999; base_unit="trials") == "999.00 trials"
+@test BenchmarkTools.prettycount(1000; base_unit="trials") == "1.00 Ktrials"
+@test BenchmarkTools.prettycount(999_999; base_unit="trials") == "1000.00 Ktrials"
+@test BenchmarkTools.prettycount(1_000_000; base_unit="trials") == "1.00 Mtrials"
+@test BenchmarkTools.prettycount(999_999_999; base_unit="trials") == "1000.00 Mtrials"
+@test BenchmarkTools.prettycount(1_000_000_000; base_unit="trials") == "1.00 Gtrials"
+
 @test BenchmarkTools.prettymemory(1023) == "1023 bytes"
 @test BenchmarkTools.prettymemory(1024) == "1.00 KiB"
 @test BenchmarkTools.prettymemory(1048575) == "1024.00 KiB"
@@ -230,7 +329,26 @@ tj_r_2 = judge(tr; time_tolerance=2.0, memory_tolerance=2.0)
 @test BenchmarkTools.prettymemory(1073741824) == "1.00 GiB"
 
 @test sprint(show, "text/plain", ta) == sprint(show, ta; context=:compact => false) == """
-BenchmarkTools.TrialEstimate: 
+BenchmarkTools.TrialEstimate:
+  time:             0.490 ns
+  instructions:     0.49 insts
+  branches:         0.49 branches
+  gctime:           0.000 ns (0.00%)
+  memory:           2 bytes
+  allocs:           1"""
+
+tc = BenchmarkTools.TrialEstimate(
+    BenchmarkTools.Parameters(; time_tolerance=0.50, memory_tolerance=0.50),
+    0.49,
+    NaN,
+    NaN,
+    0.0,
+    2,
+    1,
+)
+
+@test sprint(show, "text/plain", tc) == """
+BenchmarkTools.TrialEstimate:
   time:             0.490 ns
   gctime:           0.000 ns (0.00%)
   memory:           2 bytes
@@ -245,7 +363,9 @@ BenchmarkTools.TrialEstimate:
 
 @test sprint(show, [ta, tb]) == "BenchmarkTools.TrialEstimate[0.490 ns, 1.000 ns]"
 
-trial1sample = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1], [1], 1, 1)
+trial1sample = BenchmarkTools.Trial(
+    BenchmarkTools.Parameters(), [1.0], [1.0], [1.0], [1.0], 1, 1
+)
 @test try
     display(trial1sample)
     true
@@ -266,7 +386,9 @@ else
      1.000 ns"""
 end
 
-trial = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1.0, 1.01], [0.0, 0.0], 0, 0)
+trial = BenchmarkTools.Trial(
+    BenchmarkTools.Parameters(), [1.0, 1.01], [0.0, 0.0], [0, 0], [0.0, 0.0], 0, 0
+)
 @test sprint(show, "text/plain", trial) == """
 BenchmarkTools.Trial: 2 samples with 1 evaluation.
  Range (min … max):  1.000 ns … 1.010 ns  ┊ GC (min … max): 0.00% … 0.00%
diff --git a/test/runtests.jl b/test/runtests.jl
index 6f58393a..98c20fb4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,3 +34,11 @@ println("done (took ", took_seconds, " seconds)")
 print("Testing serialization...")
 took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
+
+@static if Sys.islinux()
+    using LinuxPerf
+
+    print("Testing execution (w/ LinuxPerf)...")
+    took_seconds = @elapsed include("ExecutionTests.jl")
+    println("done (took ", took_seconds, " seconds)")
+end