Change seed to a benchmark parameter

JuliaCI · Oct 1, 2023 · 49fb200 · 49fb200
1 parent 453d503
commit 49fb200
Show file tree

Hide file tree

Showing 6 changed files with 54 additions and 78 deletions.
diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -85,6 +85,7 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
+- `seed`: A seed number to which the global RNG is reset every benchmark run, if it is non-negative. This ensures that comparing two benchmarks gives actionable results, even if the running time depends on random numbers. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.seed = -1` (indicating no seed reset)
 
 To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
 
@@ -255,6 +256,20 @@ julia> @btime exp(x) setup = (x=1,)  # errors
 ERROR: UndefVarError: `x` not defined
 ```
 
+### Consistent random numbers between runs
+
+You can supply the `seed` parameter to have the seed reset between runs, giving a consistent series of pseudorandom numbers.
+This is useful for comparing benchmarks - to know that they are operating on the same datasets while not needing to create those datasets manually.
+
+```julia
+julia> bg = BenchmarkGroup(
+           "a" => @benchmarkable(sleep(rand([0, 0.5]))),
+           "b" => @benchmarkable(sleep(rand([0, 0.5]))),
+           );
+julia> run(bg); # shows different results for "a" and "b", as the sleep time varies
+julia> run(bg; seed=42); # shows similar results for "a" and "b"
+```
+
 ### Understanding compiler optimizations
 
 It's possible for LLVM and Julia's compiler to perform optimizations on `@benchmarkable` expressions. In some cases, these optimizations can elide a computation altogether, resulting in unexpectedly "fast" benchmarks. For example, the following expression is non-allocating:
@@ -596,44 +611,6 @@ BenchmarkTools.BenchmarkGroup:
   "trig" => BenchmarkGroup(["math", "triangles"])
 ```
 
-### Consistent randomness within a `BenchmarkGroup`
-
-When benchmarking using random data, it is sometimes important to ensure that
-the same random numbers are used for each benchmark in a group. To do this, you
-can supply a `seed` to the `BenchmarkGroup`, and the global seed will then be
-reset for each benchmark, meaning each benchmark will receive the same set of
-random numbers:
-
-```julia
-julia> bg1 = BenchmarkGroup();
-
-julia> bg1["a"] = @benchmarkable sleep(k) setup=(k=rand(0.001:0.001:0.1));
-
-julia> bg1["b"] = @benchmarkable sleep(k) setup=(k=rand(0.001:0.001:0.1));
-
-julia> res1 = run(bg1)
-2-element BenchmarkTools.BenchmarkGroup:
-  tags: []
-  "b" => Trial(3.171 ms)
-  "a" => Trial(2.233 ms)
-
-julia> bg2 = BenchmarkGroup(seed=1);
-
-julia> bg2["a"] = @benchmarkable sleep(k) setup=(k=rand(0.001:0.001:0.1));
-
-julia> bg2["b"] = @benchmarkable sleep(k) setup=(k=rand(0.001:0.001:0.1));
-
-julia> res = run(bg2)
-2-element BenchmarkTools.BenchmarkGroup:
-  tags: []
-  "b" => Trial(2.228 ms)
-  "a" => Trial(2.172 ms)
-```
-
-Note how the identical benchmarks in `bg1` time in at different times, due to
-the random numbers, while `bg2` ends up nearly the same, since they operated on
-the same list of random numbers.
-
 ### Working with trial data in a `BenchmarkGroup`
 
 Following from the previous section, we see that running our benchmark suite returns a

diff --git a/src/execution.jl b/src/execution.jl
@@ -103,6 +103,7 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
     params.gctrial && gcscrub()
+    params.seed >= 0 && Random.seed!(params.seed)
     start_time = Base.time()
     trial = Trial(params)
     params.gcsample && gcscrub()
@@ -138,17 +139,13 @@ end
     run(group::BenchmarkGroup[, args...]; verbose::Bool = false, pad = "", kwargs...)
 
 Run the benchmark group, with benchmark parameters set to `group`'s by default.
-
-If the benchmark group has a non-negative `seed`, the global seed will be reset before
-running each benchmark.
 """
 function Base.run(group::BenchmarkGroup, args...; verbose::Bool=false, pad="", kwargs...)
     _withprogress("Benchmarking", group; kwargs...) do progressid, nleaves, ndone
         result = similar(group)
         gcscrub() # run GC before running group, even if individual benchmarks don't manually GC
         i = 1
         for id in keys(group)
-            group.seed >= 0 && Random.seed!(group.seed)
             @logmsg(
                 ProgressLevel, "Benchmarking", progress = ndone / nleaves, _id = progressid
             )
@@ -249,7 +246,6 @@ function tune!(group::BenchmarkGroup; verbose::Bool=false, pad="", kwargs...)
         gcscrub() # run GC before running group, even if individual benchmarks don't manually GC
         i = 1
         for id in keys(group)
-            group.seed >= 0 && Random.seed!(group.seed)
             @logmsg(ProgressLevel, "Tuning", progress = ndone / nleaves, _id = progressid)
             verbose && println(pad, "($(i)/$(length(group))) tuning ", repr(id), "...")
             took_seconds = @elapsed tune!(

diff --git a/src/groups.jl b/src/groups.jl
@@ -16,14 +16,12 @@ makekey(v::Any) = string(v)::String
 struct BenchmarkGroup
     tags::Vector{Any}
     data::Dict{Any,Any}
-    seed::Int
 end
 
-function BenchmarkGroup(tags::Vector, args::Pair...; seed=-1)
-    return BenchmarkGroup(tags, Dict{Any,Any}((makekey(k) => v for (k, v) in args)), seed)
+function BenchmarkGroup(tags::Vector, args::Pair...)
+    return BenchmarkGroup(tags, Dict{Any,Any}((makekey(k) => v for (k, v) in args)))
 end
-BenchmarkGroup(args::Pair...; seed=-1) = BenchmarkGroup([], args...; seed=seed)
-BenchmarkGroup(tags, args; seed=-1) = BenchmarkGroup(tags, args, seed)
+BenchmarkGroup(args::Pair...) = BenchmarkGroup([], args...)
 
 function addgroup!(suite::BenchmarkGroup, id, args...)
     g = BenchmarkGroup(args...)
@@ -53,15 +51,9 @@ clear_empty!(x) = x
 # Dict-like methods #
 #-------------------#
 
-function Base.:(==)(a::BenchmarkGroup, b::BenchmarkGroup)
-    return a.tags == b.tags && a.data == b.data && a.seed == b.seed
-end
-function Base.copy(group::BenchmarkGroup)
-    return BenchmarkGroup(copy(group.tags), copy(group.data), group.seed)
-end
-function Base.similar(group::BenchmarkGroup)
-    return BenchmarkGroup(copy(group.tags), empty(group.data), group.seed)
-end
+Base.:(==)(a::BenchmarkGroup, b::BenchmarkGroup) = a.tags == b.tags && a.data == b.data
+Base.copy(group::BenchmarkGroup) = BenchmarkGroup(copy(group.tags), copy(group.data))
+Base.similar(group::BenchmarkGroup) = BenchmarkGroup(copy(group.tags), empty(group.data))
 
 """
     isempty(group::BenchmarkGroup)
@@ -318,7 +310,7 @@ end
 #----------------------------#
 
 function Base.getindex(group::BenchmarkGroup, x::BenchmarkGroup)
-    result = BenchmarkGroup(; seed=group.seed)
+    result = BenchmarkGroup()
     for (k, v) in x
         result[k] = isa(v, BenchmarkGroup) ? group[k][v] : group[k]
     end

diff --git a/src/parameters.jl b/src/parameters.jl
@@ -15,9 +15,10 @@ mutable struct Parameters
     gcsample::Bool
     time_tolerance::Float64
     memory_tolerance::Float64
+    seed::Int
 end
 
-const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
+const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01, -1)
 
 function Parameters(;
     seconds=DEFAULT_PARAMETERS.seconds,
@@ -29,6 +30,7 @@ function Parameters(;
     gcsample=DEFAULT_PARAMETERS.gcsample,
     time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
     memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
+    seed=DEFAULT_PARAMETERS.seed,
 )
     return Parameters(
         seconds,
@@ -40,6 +42,7 @@ function Parameters(;
         gcsample,
         time_tolerance,
         memory_tolerance,
+        seed,
     )
 end
 
@@ -53,6 +56,7 @@ function Parameters(
     gcsample=nothing,
     time_tolerance=nothing,
     memory_tolerance=nothing,
+    seed=nothing,
 )
     params = Parameters()
     params.seconds = seconds != nothing ? seconds : default.seconds
@@ -65,6 +69,7 @@ function Parameters(
         time_tolerance != nothing ? time_tolerance : default.time_tolerance
     params.memory_tolerance =
         memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
+    params.seed = seed != nothing ? seed : default.seed
     return params::BenchmarkTools.Parameters
 end
 
@@ -76,7 +81,8 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.gctrial == b.gctrial &&
            a.gcsample == b.gcsample &&
            a.time_tolerance == b.time_tolerance &&
-           a.memory_tolerance == b.memory_tolerance
+           a.memory_tolerance == b.memory_tolerance &&
+           a.seed == b.seed
 end
 
 function Base.copy(p::Parameters)
@@ -90,6 +96,7 @@ function Base.copy(p::Parameters)
         p.gcsample,
         p.time_tolerance,
         p.memory_tolerance,
+        p.seed,
     )
 end
 

diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
@@ -357,4 +357,26 @@ b = x = nothing
 GC.gc()
 @test x_finalized
 
+# Set seed
+results = Dict("a" => Int[], "b" => Int[], "c" => Int[], "d" => Int[])
+bg = BenchmarkGroup(
+    "a" => @benchmarkable(
+        push!(results["a"], rand(Int)), samples = 10, evals = 1, seed = 1234
+    ),
+    "b" => @benchmarkable(
+        push!(results["b"], rand(Int)), samples = 10, evals = 1, seed = 1234
+    ),
+    "c" => @benchmarkable(
+        push!(results["c"], rand(Int)), samples = 10, evals = 1, seed = 1235
+    ),
+    "d" => @benchmarkable(push!(results["d"], rand(Int)), samples = 10, evals = 1),
+)
+run(bg)
+@test results["a"] == results["b"]
+@test results["a"] != results["c"]
+@test results["a"] != results["d"]
+results = Dict("a" => Int[], "b" => Int[], "c" => Int[], "d" => Int[])
+run(bg; seed=1)
+@test results["a"] == results["b"] == results["c"] == results["d"]
+
 end # module
diff --git a/test/GroupsTests.jl b/test/GroupsTests.jl
@@ -432,22 +432,4 @@ for T in [Float32, Float64], n in [10, 100], m in [5, 20]
     @test typeof(g1["sum"][T][n][m]) == BenchmarkTools.Benchmark
 end
 
-results = Dict("a" => Int[], "b" => Int[])
-bg = BenchmarkGroup(
-    "a" => @benchmarkable(push!(results["a"], rand(Int)), samples = 10, evals = 1),
-    "b" => @benchmarkable(push!(results["b"], rand(Int)), samples = 10, evals = 1),
-    ;
-    seed=1234,
-)
-run(bg)
-@test results["a"] == results["b"]
-bg = BenchmarkGroup(
-    "a" => @benchmarkable(push!(results["a"], rand(Int)), samples = 10, evals = 1),
-    "b" => @benchmarkable(push!(results["b"], rand(Int)), samples = 10, evals = 1),
-    ;
-    # No seed specified
-)
-run(bg)
-@test results["a"] != results["b"]
-
 # end # module