From 8608df37eed7d4de79728dcd53ad05f03d8797cf Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Thu, 28 Mar 2024 12:04:25 +0100 Subject: [PATCH 1/5] Improved enum use and generic algorithm Better use of the strategy enum (with conversion to/from strings as needed) Introduce the generic_jet_reconstruction wrapper that supports also the "Best" strategy, which dynamically switches algorithm based on the event cluster density. --- Project.toml | 1 + examples/jetreco.jl | 52 ++++++++++++++++------------------------ src/GenericAlgo.jl | 16 +++++++++++++ src/JetRecoStrategies.jl | 18 ++++++++++++++ src/JetReconstruction.jl | 17 +++++++------ 5 files changed, 65 insertions(+), 39 deletions(-) create mode 100644 src/GenericAlgo.jl create mode 100644 src/JetRecoStrategies.jl diff --git a/Project.toml b/Project.toml index d1cd28e..7b3a43c 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3" +MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" ProfileSVG = "132c30aa-f267-4189-9183-c8a63c7e05e6" StatProfilerHTML = "a8a75453-ed82-57c9-9e16-4cd1196ecbf5" diff --git a/examples/jetreco.jl b/examples/jetreco.jl index 0f092ad..6954fbe 100644 --- a/examples/jetreco.jl +++ b/examples/jetreco.jl @@ -54,7 +54,15 @@ function profile_code(jet_reconstruction, events, niters) """, ) end +""" +Top level call funtion for demonstrating the use of jet reconstruction + +This uses the "generic_jet_reconstruct" wrapper, so algorithm swutching +happens inside the JetReconstruction package itself. +Some other ustilities are also supported here, such as profiling and +serialising the reconstructed jet outputs. +""" function jet_process( events::Vector{Vector{PseudoJet}}; ptmin::Float64 = 5.0, @@ -69,22 +77,6 @@ function jet_process( ) @info "Will process $(size(events)[1]) events" - # Strategy - if (strategy == N2Plain) - jet_reconstruction = plain_jet_reconstruct - elseif (strategy == N2Tiled || stragegy == Best) - jet_reconstruction = tiled_jet_reconstruct - else - throw(ErrorException("Strategy not yet implemented")) - end - - # Build internal EDM structures for timing measurements, if needed - # For N2Tiled and N2Plain this is unnecessary as both these reconstruction - # methods can process PseudoJets directly - if (strategy == N2Tiled) || (strategy == N2Plain) - event_vector = events - end - # If we are dumping the results, setup the JSON structure if !isnothing(dump) jet_collection = FinalJets[] @@ -93,21 +85,21 @@ function jet_process( # Warmup code if we are doing a multi-sample timing run if nsamples > 1 || profile @info "Doing initial warm-up run" - for event in event_vector - finaljets, _ = jet_reconstruction(event, R = distance, p = power) + for event in events + finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy) final_jets(finaljets, ptmin) end end if profile - profile_code(jet_reconstruction, event_vector, nsamples) + profile_code(generic_jet_reconstruct, events, nsamples) return nothing end if alloc println("Memory allocation statistics:") - @timev for event in event_vector - finaljets, _ = jet_reconstruction(event, R = distance, p = power) + @timev for event in events + finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy) final_jets(finaljets, ptmin) end return nothing @@ -121,8 +113,8 @@ function jet_process( for irun ∈ 1:nsamples gcoff && GC.enable(false) t_start = time_ns() - for (ievt, event) in enumerate(event_vector) - finaljets, _ = jet_reconstruction(event, R = distance, p = power, ptmin=ptmin) + for (ievt, event) in enumerate(events) + finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, ptmin=ptmin, strategy = strategy) fj = final_jets(finaljets, ptmin) # Only print the jet content once if irun == 1 @@ -206,9 +198,9 @@ parse_command_line(args) = begin default = -1 "--strategy" - help = "Strategy for the algorithm, valid values: Best, N2Plain, N2Tiled, N2TiledSoAGlobal, N2TiledSoATile" + help = """Strategy for the algorithm, valid values: $(join(JetReconstruction.AllJetRecoStrategies, ", "))""" arg_type = JetRecoStrategy - default = N2Plain + default = JetReconstruction.N2Plain::JetRecoStrategy "--nsamples", "-m" help = "Number of measurement points to acquire." @@ -247,15 +239,11 @@ end function ArgParse.parse_item(::Type{JetRecoStrategy}, x::AbstractString) - if (x == "Best") - return JetRecoStrategy(0) - elseif (x == "N2Plain") - return JetRecoStrategy(1) - elseif (x == "N2Tiled") - return JetRecoStrategy(2) - else + s = tryparse(JetRecoStrategy, x) + if s === nothing throw(ErrorException("Invalid value for strategy: $(x)")) end + s end main() = begin diff --git a/src/GenericAlgo.jl b/src/GenericAlgo.jl new file mode 100644 index 0000000..c27685e --- /dev/null +++ b/src/GenericAlgo.jl @@ -0,0 +1,16 @@ +# This is the generic reconstruction algorithm that will +# switch based on the strategy, or based on the event density +# if the "Best" strategy is to be employed + +function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmin = 0.0, strategy = Best::JetRecoStrategy) + # Either map to the fixed algorithm corresponding to the strategy + # or to an optimal choice based on the density of initial particles + algorithm = @match strategy begin + N2Plain => plain_jet_reconstruct + N2Tiled => tiled_jet_reconstruct + Best => length(particles) > 50 ? tiled_jet_reconstruct : plain_jet_reconstruct + end + + # Now call the chosen algorithm, passing through the other parameters + algorithm(particles; p = p, R = R, recombine = recombine, ptmin = ptmin) +end diff --git a/src/JetRecoStrategies.jl b/src/JetRecoStrategies.jl new file mode 100644 index 0000000..2088d39 --- /dev/null +++ b/src/JetRecoStrategies.jl @@ -0,0 +1,18 @@ +using MLStyle + +# Valid strategy enum +@enum JetRecoStrategy Best N2Plain N2Tiled + +# Map from string to strategy +Base.tryparse(E::Type{<:Enum}, str::String) = + let insts = instances(E) , + p = findfirst(==(Symbol(str)) ∘ Symbol, insts) ; + p !== nothing ? insts[p] : nothing + end + +const AllJetRecoStrategies = [ String(Symbol(x)) for x in instances(JetRecoStrategy) ] + +# We will use the MLStyle package for a "switch", so needs a few tweaks for Julia enums +# https://thautwarm.github.io/MLStyle.jl/latest/syntax/pattern.html#support-pattern-matching-for-julia-enums +MLStyle.is_enum(::JetRecoStrategy) = true +MLStyle.enum_matcher(e::JetRecoStrategy, expr) = :($e === $expr) diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index e59346d..1b2d868 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -4,6 +4,7 @@ Jet reconstruction (reclustering) in Julia. module JetReconstruction using LorentzVectorHEP +using MLStyle # Import from LorentzVectorHEP methods for those 4-vector types pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p) @@ -22,13 +23,17 @@ py(p::LorentzVectorCyl) = LorentzVectorHEP.py(p) pz(p::LorentzVectorCyl) = LorentzVectorHEP.pz(p) energy(p::LorentzVectorCyl) = LorentzVectorHEP.energy(p) -# Philipp's pseudojet +# Philipp's pseudojet type include("Pseudojet.jl") export PseudoJet # Simple HepMC3 reader include("HepMC3.jl") +# Jet reconstruction strategies +include("JetRecoStrategies.jl") +export JetRecoStrategy + ## N2Plain algorithm # Algorithmic part for simple sequential implementation include("PlainAlgo.jl") @@ -37,11 +42,14 @@ export plain_jet_reconstruct ## N2Tiled algorithm # Common pieces include("TiledAlgoUtils.jl") - # Algorithmic part, tiled reconstruction strategy with linked list jet objects include("TiledAlgoLL.jl") export tiled_jet_reconstruct +## Generic algorithm, which can switch strategy dynamically +include("GenericAlgo.jl") +export generic_jet_reconstruct + # jet serialisation (saving to file) include("Serialize.jl") export savejets, loadjets!, loadjets @@ -58,9 +66,4 @@ export jetsplot include("JSONresults.jl") export FinalJet, FinalJets, JSON3 -# Strategy to be used -## Maybe an enum is not the best idea, use type dispatch instead? -@enum JetRecoStrategy Best N2Plain N2Tiled -export JetRecoStrategy, Best, N2Plain, N2Tiled - end From f52c55ce1a0cf27a43bcf72e480ba9c4eb1c1acf Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Wed, 3 Apr 2024 13:12:34 +0200 Subject: [PATCH 2/5] Switch to use EnumX EnumX provides a scoped enum, which is much nicer when the enum names are quite generic (like "Best") This doesn't work with MLStyle, so drop the switching back to a simple "if .. elseif ..." construct --- Project.toml | 2 +- examples/jetreco.jl | 10 +++++----- src/GenericAlgo.jl | 15 ++++++++++----- src/JetRecoStrategies.jl | 14 +++++--------- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index 7b3a43c..677ece7 100644 --- a/Project.toml +++ b/Project.toml @@ -7,13 +7,13 @@ version = "0.2.0" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" +EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" FlameGraphs = "08572546-2f56-4bcf-ba4e-bab62c3a3f89" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3" -MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" ProfileSVG = "132c30aa-f267-4189-9183-c8a63c7e05e6" StatProfilerHTML = "a8a75453-ed82-57c9-9e16-4cd1196ecbf5" diff --git a/examples/jetreco.jl b/examples/jetreco.jl index 6954fbe..48e98fb 100644 --- a/examples/jetreco.jl +++ b/examples/jetreco.jl @@ -68,7 +68,7 @@ function jet_process( ptmin::Float64 = 5.0, distance::Float64 = 0.4, power::Integer = -1, - strategy::JetRecoStrategy, + strategy::JetRecoStrategy.T, nsamples::Integer = 1, gcoff::Bool = false, profile::Bool = false, @@ -199,8 +199,8 @@ parse_command_line(args) = begin "--strategy" help = """Strategy for the algorithm, valid values: $(join(JetReconstruction.AllJetRecoStrategies, ", "))""" - arg_type = JetRecoStrategy - default = JetReconstruction.N2Plain::JetRecoStrategy + arg_type = JetRecoStrategy.T + default = JetRecoStrategy.Best "--nsamples", "-m" help = "Number of measurement points to acquire." @@ -238,8 +238,8 @@ parse_command_line(args) = begin end -function ArgParse.parse_item(::Type{JetRecoStrategy}, x::AbstractString) - s = tryparse(JetRecoStrategy, x) +function ArgParse.parse_item(::Type{JetRecoStrategy.T}, x::AbstractString) + s = tryparse(JetRecoStrategy.T, x) if s === nothing throw(ErrorException("Invalid value for strategy: $(x)")) end diff --git a/src/GenericAlgo.jl b/src/GenericAlgo.jl index c27685e..5dd7e14 100644 --- a/src/GenericAlgo.jl +++ b/src/GenericAlgo.jl @@ -2,13 +2,18 @@ # switch based on the strategy, or based on the event density # if the "Best" strategy is to be employed -function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmin = 0.0, strategy = Best::JetRecoStrategy) +function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmin = 0.0, strategy = JetRecoStrategy.Best) # Either map to the fixed algorithm corresponding to the strategy # or to an optimal choice based on the density of initial particles - algorithm = @match strategy begin - N2Plain => plain_jet_reconstruct - N2Tiled => tiled_jet_reconstruct - Best => length(particles) > 50 ? tiled_jet_reconstruct : plain_jet_reconstruct + + if strategy == JetRecoStrategy.Best + algorithm = length(particles) > 50 ? tiled_jet_reconstruct : plain_jet_reconstruct + elseif strategy == JetRecoStrategy.N2Plain + algorithm = plain_jet_reconstruct + elseif strategy == JetRecoStrategy.N2Tiled + algorithm = tiled_jet_reconstruct + else + throw(ErrorException("Invalid strategy: $(strategy)")) end # Now call the chosen algorithm, passing through the other parameters diff --git a/src/JetRecoStrategies.jl b/src/JetRecoStrategies.jl index 2088d39..4e357dd 100644 --- a/src/JetRecoStrategies.jl +++ b/src/JetRecoStrategies.jl @@ -1,18 +1,14 @@ -using MLStyle +using EnumX -# Valid strategy enum -@enum JetRecoStrategy Best N2Plain N2Tiled +# Valid strategy enum (this is a scoped enum) +@enumx JetRecoStrategy Best N2Plain N2Tiled -# Map from string to strategy +# Map from string to an enum value (used for CLI parsing) Base.tryparse(E::Type{<:Enum}, str::String) = let insts = instances(E) , p = findfirst(==(Symbol(str)) ∘ Symbol, insts) ; p !== nothing ? insts[p] : nothing end -const AllJetRecoStrategies = [ String(Symbol(x)) for x in instances(JetRecoStrategy) ] +const AllJetRecoStrategies = [ String(Symbol(x)) for x in instances(JetRecoStrategy.T) ] -# We will use the MLStyle package for a "switch", so needs a few tweaks for Julia enums -# https://thautwarm.github.io/MLStyle.jl/latest/syntax/pattern.html#support-pattern-matching-for-julia-enums -MLStyle.is_enum(::JetRecoStrategy) = true -MLStyle.enum_matcher(e::JetRecoStrategy, expr) = :($e === $expr) From c9080f6d1363066acbf4d0058037ca219f9f8c55 Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Wed, 3 Apr 2024 13:16:50 +0200 Subject: [PATCH 3/5] Use better name for emun Use the explicit name "Strategy" for the emun type instead of the generic "T" (this works better with the --help option) --- examples/jetreco.jl | 8 ++++---- src/JetRecoStrategies.jl | 4 ++-- src/JetReconstruction.jl | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/jetreco.jl b/examples/jetreco.jl index 48e98fb..61d8a3f 100644 --- a/examples/jetreco.jl +++ b/examples/jetreco.jl @@ -68,7 +68,7 @@ function jet_process( ptmin::Float64 = 5.0, distance::Float64 = 0.4, power::Integer = -1, - strategy::JetRecoStrategy.T, + strategy::JetRecoStrategy.Strategy, nsamples::Integer = 1, gcoff::Bool = false, profile::Bool = false, @@ -199,7 +199,7 @@ parse_command_line(args) = begin "--strategy" help = """Strategy for the algorithm, valid values: $(join(JetReconstruction.AllJetRecoStrategies, ", "))""" - arg_type = JetRecoStrategy.T + arg_type = JetRecoStrategy.Strategy default = JetRecoStrategy.Best "--nsamples", "-m" @@ -238,8 +238,8 @@ parse_command_line(args) = begin end -function ArgParse.parse_item(::Type{JetRecoStrategy.T}, x::AbstractString) - s = tryparse(JetRecoStrategy.T, x) +function ArgParse.parse_item(::Type{JetRecoStrategy.Strategy}, x::AbstractString) + s = tryparse(JetRecoStrategy.Strategy, x) if s === nothing throw(ErrorException("Invalid value for strategy: $(x)")) end diff --git a/src/JetRecoStrategies.jl b/src/JetRecoStrategies.jl index 4e357dd..0e42c22 100644 --- a/src/JetRecoStrategies.jl +++ b/src/JetRecoStrategies.jl @@ -1,7 +1,7 @@ using EnumX # Valid strategy enum (this is a scoped enum) -@enumx JetRecoStrategy Best N2Plain N2Tiled +@enumx T=Strategy JetRecoStrategy Best N2Plain N2Tiled # Map from string to an enum value (used for CLI parsing) Base.tryparse(E::Type{<:Enum}, str::String) = @@ -10,5 +10,5 @@ Base.tryparse(E::Type{<:Enum}, str::String) = p !== nothing ? insts[p] : nothing end -const AllJetRecoStrategies = [ String(Symbol(x)) for x in instances(JetRecoStrategy.T) ] +const AllJetRecoStrategies = [ String(Symbol(x)) for x in instances(JetRecoStrategy.Strategy) ] diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index 1b2d868..c2a0ab5 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -4,7 +4,6 @@ Jet reconstruction (reclustering) in Julia. module JetReconstruction using LorentzVectorHEP -using MLStyle # Import from LorentzVectorHEP methods for those 4-vector types pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p) From 61d63b3facf14ebb6aee6010e5783c11b88c328b Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Wed, 3 Apr 2024 13:33:16 +0200 Subject: [PATCH 4/5] Update algorithm switching point Results indicated that N2Tiled gets faster than N2Plain between 65 and 112 initial clusters, so 90 is a good intermediate pick --- src/GenericAlgo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/GenericAlgo.jl b/src/GenericAlgo.jl index 5dd7e14..3fc3037 100644 --- a/src/GenericAlgo.jl +++ b/src/GenericAlgo.jl @@ -7,7 +7,8 @@ function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmi # or to an optimal choice based on the density of initial particles if strategy == JetRecoStrategy.Best - algorithm = length(particles) > 50 ? tiled_jet_reconstruct : plain_jet_reconstruct + # The breakpoint of ~90 is determined empirically on e+e- -> H and 0.5TeV pp -> 5GeV jets + algorithm = length(particles) > 80 ? tiled_jet_reconstruct : plain_jet_reconstruct elseif strategy == JetRecoStrategy.N2Plain algorithm = plain_jet_reconstruct elseif strategy == JetRecoStrategy.N2Tiled From c567599f6e7d377b4138624243f36974b9b910e0 Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Wed, 3 Apr 2024 13:47:36 +0200 Subject: [PATCH 5/5] Fix test script Update to new strategy enum --- test/runtests.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index ae2b9f0..0b0c566 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -43,26 +43,26 @@ function main() # Test each stratgy... for power in keys(algorithms) - do_test_compare_to_fastjet(N2Plain, fastjet_data[power], algname = algorithms[power], power = power) - do_test_compare_to_fastjet(N2Tiled, fastjet_data[power], algname = algorithms[power], power = power) + do_test_compare_to_fastjet(JetRecoStrategy.N2Plain, fastjet_data[power], algname = algorithms[power], power = power) + do_test_compare_to_fastjet(JetRecoStrategy.N2Tiled, fastjet_data[power], algname = algorithms[power], power = power) end # Compare inputing data in PseudoJet with using a LorentzVector - do_test_compare_types(N2Plain, algname = algorithms[-1], power = -1) - do_test_compare_types(N2Tiled, algname = algorithms[-1], power = -1) + do_test_compare_types(JetRecoStrategy.N2Plain, algname = algorithms[-1], power = -1) + do_test_compare_types(JetRecoStrategy.N2Tiled, algname = algorithms[-1], power = -1) end -function do_test_compare_to_fastjet(strategy::JetRecoStrategy, fastjet_jets; +function do_test_compare_to_fastjet(strategy::JetRecoStrategy.Strategy, fastjet_jets; algname = "Unknown", ptmin::Float64 = 5.0, distance::Float64 = 0.4, power::Integer = -1) # Strategy - if (strategy == N2Plain) + if (strategy == JetRecoStrategy.N2Plain) jet_reconstruction = plain_jet_reconstruct strategy_name = "N2Plain" - elseif (strategy == N2Tiled) + elseif (strategy == JetRecoStrategy.N2Tiled) jet_reconstruction = tiled_jet_reconstruct strategy_name = "N2Tiled" else @@ -104,17 +104,17 @@ function do_test_compare_to_fastjet(strategy::JetRecoStrategy, fastjet_jets; end end -function do_test_compare_types(strategy::JetRecoStrategy; +function do_test_compare_types(strategy::JetRecoStrategy.Strategy; algname = "Unknown", ptmin::Float64 = 5.0, distance::Float64 = 0.4, power::Integer = -1) # Strategy - if (strategy == N2Plain) + if (strategy == JetRecoStrategy.N2Plain) jet_reconstruction = plain_jet_reconstruct strategy_name = "N2Plain" - elseif (strategy == N2Tiled) + elseif (strategy == JetRecoStrategy.N2Tiled) jet_reconstruction = tiled_jet_reconstruct strategy_name = "N2Tiled" else