diff --git a/.gitignore b/.gitignore index a046575..d1458d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,38 @@ -/Manifest.toml -/jetsavetest*.dat -/demo.jl -.vscode +# Files generated by invoking Julia with --code-coverage +*.jl.cov +*.jl.*.cov + +# Files generated by invoking Julia with --track-allocation +*.jl.mem + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest.toml + +# Editor files +.vscode/* + +# Test data files and benchmarking +jetsavetest*.dat +benchmark/* + +# Misc files +.DS_Store /notebooks/* +/profile/* /statprof/* /debug/* -*.mem -/benchmark/* diff --git a/examples/jetreco.jl b/examples/jetreco.jl index 61d8a3f..874ef32 100644 --- a/examples/jetreco.jl +++ b/examples/jetreco.jl @@ -17,18 +17,20 @@ using JSON using LorentzVectorHEP using JetReconstruction -function profile_code(jet_reconstruction, events, niters) +function profile_code(profile, jet_reconstruction, events, niters; R=0.4, p=-1, strategy=JetRecoStrategy.N2Tiled) Profile.init(n = 5*10^6, delay = 0.00001) profile_events(events) = begin for evt in events - jet_reconstruction(evt, R = 0.4) + jet_reconstruction(evt, R=R, p=p, strategy=strategy) end end profile_events(events[1:1]) @profile for i ∈ 1:niters profile_events(events) end - statprofilehtml() + profile_path = joinpath("profile", profile, "profsvg.svg") + mkpath(dirname(profile_path)) + statprofilehtml(path=dirname(profile_path)) fcolor = FlameGraphs.FlameColors( reverse(colormap("Blues", 15))[1:5], colorant"slategray4", @@ -38,14 +40,14 @@ function profile_code(jet_reconstruction, events, niters) ) ProfileSVG.save( fcolor, - joinpath("statprof", "profsvg.svg"); + profile_path, combine = true, timeunit = :ms, font = "Arial, Helvetica, sans-serif", ) println( "Flame graph from ProfileSVG.jl at file://", - abspath("statprof/profsvg.svg"), + abspath(profile_path), "\n", """ \tRed tint: Runtime dispatch @@ -71,7 +73,7 @@ function jet_process( strategy::JetRecoStrategy.Strategy, nsamples::Integer = 1, gcoff::Bool = false, - profile::Bool = false, + profile = nothing, alloc::Bool = false, dump::Union{String, Nothing} = nothing, ) @@ -83,24 +85,22 @@ function jet_process( end # Warmup code if we are doing a multi-sample timing run - if nsamples > 1 || profile + if nsamples > 1 || !isnothing(profile) @info "Doing initial warm-up run" for event in events - finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy) - final_jets(finaljets, ptmin) + _ = inclusive_jets(generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy), ptmin) end end - if profile - profile_code(generic_jet_reconstruct, events, nsamples) + if !isnothing(profile) + profile_code(profile, generic_jet_reconstruct, events, nsamples, R = distance, p = power, strategy = strategy) return nothing end if alloc println("Memory allocation statistics:") @timev for event in events - finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy) - final_jets(finaljets, ptmin) + _ = inclusive_jets(generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy), ptmin) end return nothing end @@ -114,19 +114,18 @@ function jet_process( gcoff && GC.enable(false) t_start = time_ns() for (ievt, event) in enumerate(events) - finaljets, _ = generic_jet_reconstruct(event, R = distance, p = power, ptmin=ptmin, strategy = strategy) - fj = final_jets(finaljets, ptmin) + finaljets = inclusive_jets(generic_jet_reconstruct(event, R = distance, p = power, strategy = strategy), ptmin) # Only print the jet content once if irun == 1 @info begin jet_output = "Event $(ievt)\n" - for (ijet, jet) in enumerate(fj) + for (ijet, jet) in enumerate(finaljets) jet_output *= " $(ijet) - $(jet)\n" end "$(jet_output)" end if !isnothing(dump) - push!(jet_collection, FinalJets(ievt, fj)) + push!(jet_collection, FinalJets(ievt, finaljets)) end end end @@ -212,8 +211,9 @@ parse_command_line(args) = begin action = :store_true "--profile" - help = "Profile code and generate a flame graph." - action = :store_true + help = """Profile code and generate a flame graph; the results + will be stored in 'profile/PROFILE' subdirectory, where PROFILE is the value + given to this option)""" "--alloc" help = "Provide memory allocation statistics." diff --git a/src/ClusterSequence.jl b/src/ClusterSequence.jl new file mode 100644 index 0000000..74c9e8d --- /dev/null +++ b/src/ClusterSequence.jl @@ -0,0 +1,135 @@ +# Structure definitions for the Tiled algorithm, with linked list +# and TiledJets (a la FastJet) +# Original Julia implementation by Philippe Gras, +# ported to this package by Graeme Stewart + +using Accessors + +# One cannot use an ENUM here as it's a different type +# I don't know any good way to keep this lean and to have these necessary +# flags other than using these "magic numbers" +const Invalid=-3 +const NonexistentParent=-2 +const BeamJet=-1 + +"""A struct holding a record of jet mergers and finalisations""" +struct HistoryElement + """Index in _history where first parent of this jet + was created (NonexistentParent if this jet is an + original particle)""" + parent1::Int + + """Index in _history where second parent of this jet + was created (NonexistentParent if this jet is an + original particle); BeamJet if this history entry + just labels the fact that the jet has recombined + with the beam)""" + parent2::Int + + """Index in _history where the current jet is + recombined with another jet to form its child. It + is Invalid if this jet does not further + recombine.""" + child::Int + + """Index in the _jets vector where we will find the + PseudoJet object corresponding to this jet + (i.e. the jet created at this entry of the + history). NB: if this element of the history + corresponds to a beam recombination, then + jetp_index=Invalid.""" + jetp_index::Int + + """The distance corresponding to the recombination + at this stage of the clustering.""" + dij::Float64 + + """The largest recombination distance seen + so far in the clustering history.""" + max_dij_so_far::Float64 +end + +"""Used for initial particles""" +HistoryElement(jetp_index) = HistoryElement(NonexistentParent, NonexistentParent, Invalid, jetp_index, 0.0, 0.0) + + +""" +Convienence structure holding all of the relevant parameters for +the jet reconstruction +""" +struct ClusterSequence + """ + This contains the physical PseudoJets; for each PseudoJet one can find + the corresponding position in the _history by looking at + _jets[i].cluster_hist_index() + """ + jets::Vector{PseudoJet} + + """ + This vector will contain the branching history; for each stage, + history[i].jetp_index indicates where to look in the _jets + vector to get the physical PseudoJet. + """ + history::Vector{HistoryElement} + + """Total energy of the event""" + Qtot +end + +"""Return all inclusive jets of a ClusterSequence with pt > ptmin""" +function inclusive_jets(clusterseq::ClusterSequence, ptmin = 0.0) + dcut = ptmin * ptmin + jets_local = Vector{LorentzVectorCyl}(undef, 0) + # sizehint!(jets_local, length(clusterseq.jets)) + # For inclusive jets with a plugin algorithm, we make no + # assumptions about anything (relation of dij to momenta, + # ordering of the dij, etc.) + # for elt in Iterators.reverse(clusterseq.history) + for elt in clusterseq.history + elt.parent2 == BeamJet || continue + iparent_jet = clusterseq.history[elt.parent1].jetp_index + jet = clusterseq.jets[iparent_jet] + if pt2(jet) >= dcut + push!(jets_local, LorentzVectorCyl(pt(jet), rapidity(jet), phi(jet), mass(jet))) + end + end + jets_local +end + +"""Add a new jet's history into the recombination sequence""" +add_step_to_history!(clusterseq::ClusterSequence, parent1, parent2, jetp_index, dij) = begin + max_dij_so_far = max(dij, clusterseq.history[end].max_dij_so_far) + push!(clusterseq.history, HistoryElement(parent1, parent2, Invalid, + jetp_index, dij, max_dij_so_far)) + + local_step = length(clusterseq.history) + + # Sanity check: make sure the particles have not already been recombined + # + # Note that good practice would make this an assert (since this is + # a serious internal issue). However, we decided to throw an + # InternalError so that the end user can decide to catch it and + # retry the clustering with a different strategy. + @assert parent1 >= 1 + if clusterseq.history[parent1].child != Invalid + error("Internal error. Trying to recombine an object that has previsously been recombined.") + end + + hist_elem = clusterseq.history[parent1] + clusterseq.history[parent1] = @set hist_elem.child = local_step + + if parent2 >= 1 + clusterseq.history[parent2].child == Invalid || error( + "Internal error. Trying to recombine an object that has previsously been recombined. Parent " * string(parent2) * "'s child index " * string(clusterseq.history[parent1].child) * ". Parent jet index: " * + string(clusterseq.history[parent2].jetp_index) * ".", + ) + hist_elem = clusterseq.history[parent2] + clusterseq.history[parent2] = @set hist_elem.child = local_step + end + + # Get cross-referencing right from PseudoJets + if jetp_index != Invalid + @assert jetp_index >= 1 + clusterseq.jets[jetp_index]._cluster_hist_index = local_step + end +end diff --git a/src/GenericAlgo.jl b/src/GenericAlgo.jl index 3fc3037..bd4eec5 100644 --- a/src/GenericAlgo.jl +++ b/src/GenericAlgo.jl @@ -2,7 +2,7 @@ # switch based on the strategy, or based on the event density # if the "Best" strategy is to be employed -function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmin = 0.0, strategy = JetRecoStrategy.Best) +function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, strategy = JetRecoStrategy.Best) # Either map to the fixed algorithm corresponding to the strategy # or to an optimal choice based on the density of initial particles @@ -18,5 +18,5 @@ function generic_jet_reconstruct(particles; p = -1, R = 1.0, recombine = +, ptmi end # Now call the chosen algorithm, passing through the other parameters - algorithm(particles; p = p, R = R, recombine = recombine, ptmin = ptmin) + algorithm(particles; p = p, R = R, recombine = recombine) end diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index c2a0ab5..597aac5 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -22,12 +22,13 @@ py(p::LorentzVectorCyl) = LorentzVectorHEP.py(p) pz(p::LorentzVectorCyl) = LorentzVectorHEP.pz(p) energy(p::LorentzVectorCyl) = LorentzVectorHEP.energy(p) -# Philipp's pseudojet type +# Pseudojet type include("Pseudojet.jl") export PseudoJet -# Simple HepMC3 reader -include("HepMC3.jl") +# ClusterSequence type +include("ClusterSequence.jl") +export ClusterSequence, inclusive_jets # Jet reconstruction strategies include("JetRecoStrategies.jl") @@ -49,6 +50,9 @@ export tiled_jet_reconstruct include("GenericAlgo.jl") export generic_jet_reconstruct +# Simple HepMC3 reader +include("HepMC3.jl") + # jet serialisation (saving to file) include("Serialize.jl") export savejets, loadjets!, loadjets diff --git a/src/PlainAlgo.jl b/src/PlainAlgo.jl index 3aff600..5d53029 100644 --- a/src/PlainAlgo.jl +++ b/src/PlainAlgo.jl @@ -88,119 +88,131 @@ Examples of suitable types are JetReconstruction.PseudoJet and LorentzVectorHEP. N.B. these methods need to exist in the namespace of this package, i.e. JetReconstruction.pt2(::T), which is already done for the two types above. """ -function plain_jet_reconstruct(particles::Vector{T}; p = -1, R = 1.0, recombine = +, ptmin = 0.0) where T +function plain_jet_reconstruct(particles::Vector{T}; p = -1, R = 1.0, recombine = +) where T # Integer p if possible p = (round(p) == p) ? Int(p) : p - # We make sure these arrays are type stable - have seen issues where, depending on the values - # returned by the methods, they can become unstable and performance degrades - kt2_array::Vector{Float64} = pt2.(particles) .^ p - phi_array::Vector{Float64} = phi.(particles) - rapidity_array::Vector{Float64} = rapidity.(particles) - - objects_array = copy(particles) + if T == PseudoJet + # recombination_particles will become part of the cluster sequence, so size it for + # the starting particles and all N recombinations + recombination_particles = copy(particles) + sizehint!(recombination_particles, length(particles) * 2) + else + recombination_particles = PseudoJet[] + sizehint!(recombination_particles, length(particles) * 2) + for i in eachindex(particles) + push!(recombination_particles, PseudoJet(px(particles[i]), py(particles[i]), pz(particles[i]), energy(particles[i]))) + end + end # Now call the actual reconstruction method, tuned for our internal EDM - plain_jet_reconstruct(objects_array=objects_array, kt2_array=kt2_array, phi_array=phi_array, - rapidity_array=rapidity_array, p=p, R=R, recombine=recombine, ptmin=ptmin) + _plain_jet_reconstruct(particles=recombination_particles, p=p, R=R, recombine=recombine) end -function plain_jet_reconstruct(;objects_array::Vector{J}, kt2_array::Vector{F}, - phi_array::Vector{F}, rapidity_array::Vector{F}, p = -1, R = 1.0, recombine = +, ptmin = 0.0) where {J, F<:AbstractFloat} +function _plain_jet_reconstruct(;particles::Vector{PseudoJet}, p = -1, R = 1.0, recombine = +) # Bounds - N::Int = length(objects_array) - - # Returned values - jets = PseudoJet[] - sequences = Vector{Int}[] # recombination sequences, WARNING: first index in the sequence is not necessarily the seed - + N::Int = length(particles) # Parameters R2 = R^2 - ptmin2 = ptmin^2 - # Data - nn = Vector(1:N) # nearest neighbours - nndist = fill(float(R2), N) # distances to the nearest neighbour - sequences = Vector{Int}[[x] for x in 1:N] + # Optimised compact arrays for determining the next merge step + # We make sure these arrays are type stable - have seen issues where, depending on the values + # returned by the methods, they can become unstable and performance degrades + kt2_array::Vector{Float64} = pt2.(particles) .^ p + phi_array::Vector{Float64} = phi.(particles) + rapidity_array::Vector{Float64} = rapidity.(particles) + nn::Vector{Int} = Vector(1:N) # nearest neighbours + nndist::Vector{Float64} = fill(float(R2), N) # geometric distances to the nearest neighbour + nndij::Vector{Float64} = zeros(N) # dij metric distance + + # Maps index from the compact array to the clusterseq jet vector + clusterseq_index::Vector{Int} = collect(1:N) - # initialize _nn + # Setup the initial history and get the total energy + history, Qtot = initial_history(particles) + # Current implementation mutates the particles vector, so need to copy it + # for the cluster sequence (there is too much copying happening, so this + # needs to be rethought and reoptimised) + clusterseq = ClusterSequence(particles, history, Qtot) + + # Initialize nearest neighbours @simd for i in 1:N upd_nn_crosscheck!(i, 1, i - 1, rapidity_array, phi_array, R2, nndist, nn) end - # diJ table *_R2 - nndij::Vector{Float64} = zeros(N) + # diJ table * R2 @inbounds @simd for i in 1:N nndij[i] = dij(i, kt2_array, nn, nndist) end iteration::Int = 1 while N != 0 - # findmin - dij_min, i = fast_findmin(nndij, N) + # Extremely odd - having these @debug statements present causes a performance + # degradation of ~140μs per event on my M2 mac (20%!), even when no debugging is used + # so they need to be completely commented out... + #@debug "Beginning iteration $iteration" + # Findmin and add back renormalisation to distance + dij_min, i = fast_findmin(nndij, N) + dij_min *= R2 j::Int = nn[i] + #@debug "Closest compact jets are $i ($(clusterseq_index[i])) and $j ($(clusterseq_index[j]))" - ## Needed for certain tricky debugging situations - # if iteration==1 - # debug_jets(_nn, _nndist, _nndij) - # end - - if i != j + if i != j # Merge jets i and j # swap if needed if j < i i, j = j, i end - # update ith jet, replacing it with the new one - objects_array[i] = recombine(objects_array[i], objects_array[j]) - phi_array[i] = phi(objects_array[i]) - rapidity_array[i] = rapidity(objects_array[i]) - kt2_array[i] = pt2(objects_array[i]) ^ p - + # Source "history" for merge + hist_i = clusterseq.jets[clusterseq_index[i]]._cluster_hist_index + hist_j = clusterseq.jets[clusterseq_index[j]]._cluster_hist_index + + # Recombine i and j into the next jet + push!(clusterseq.jets, + recombine(clusterseq.jets[clusterseq_index[i]], clusterseq.jets[clusterseq_index[j]])) + # Get its index and the history index + newjet_k = length(clusterseq.jets) + newstep_k = length(clusterseq.history) + 1 + clusterseq.jets[newjet_k]._cluster_hist_index = newstep_k + # Update history + add_step_to_history!(clusterseq, minmax(hist_i, hist_j)..., newjet_k, dij_min) + + # Update the compact arrays, reusing the i-th slot + kt2_array[i] = pt2(clusterseq.jets[newjet_k]) ^ p + rapidity_array[i] = rapidity(clusterseq.jets[newjet_k]) + phi_array[i] = phi(clusterseq.jets[newjet_k]) + clusterseq_index[i] = newjet_k nndist[i] = R2 nn[i] = i - - @inbounds for x in sequences[j] # WARNING: first index in the sequence is not necessarily the seed - push!(sequences[i], x) - end - else # i == j - if (pt2(objects_array[i]) >= ptmin2) - # We return PseudoJets, so if we were not passed these then we need to convert (N.B. this is costly!) - if J == PseudoJet - push!(jets, objects_array[i]) - else - push!(jets, PseudoJet(px(objects_array[i]), py(objects_array[i]), pz(objects_array[i]), energy(objects_array[i]))) - end - end - push!(sequences, sequences[i]) + else # i == j, this is a final jet ("merged with beam") + add_step_to_history!(clusterseq, clusterseq.jets[clusterseq_index[i]]._cluster_hist_index, BeamJet, Invalid, dij_min) end - # copy jet N to j - objects_array[j] = objects_array[N] - - phi_array[j] = phi_array[N] - rapidity_array[j] = rapidity_array[N] - kt2_array[j] = kt2_array[N] - nndist[j] = nndist[N] - nn[j] = nn[N] - nndij[j] = nndij[N] - - sequences[j] = sequences[N] + # Squash step - copy the final jet's compact data into the j-th slot + if j != N + phi_array[j] = phi_array[N] + rapidity_array[j] = rapidity_array[N] + kt2_array[j] = kt2_array[N] + nndist[j] = nndist[N] + nn[j] = nn[N] + nndij[j] = nndij[N] + clusterseq_index[j] = clusterseq_index[N] + end Nn::Int = N N -= 1 iteration += 1 - # update nearest neighbours step + # Update nearest neighbours step @inbounds @simd for k in 1:N upd_nn_step!(i, j, k, N, Nn, kt2_array, rapidity_array, phi_array, R2, nndist, nn, nndij) end - # @infiltrate nndij[i] = dij(i, kt2_array, nn, nndist) end - jets, sequences + # Return the final cluster sequence structure + clusterseq end diff --git a/src/Pseudojet.jl b/src/Pseudojet.jl index d8a712e..f37b14f 100644 --- a/src/Pseudojet.jl +++ b/src/Pseudojet.jl @@ -50,7 +50,7 @@ PseudoJet(px::Float64, py::Float64, import Base.show show(io::IO, jet::PseudoJet) = begin print(io, "Pseudojet(px: ", jet.px, " py: ", jet.py, " pz: ", jet.pz, " E: ", jet.E, "; ", - "pt: ", sqrt(jet._pt2), " eta: ", rap(jet), " phi: ", phi(jet), ", m: ", m(jet), ")") + "pt: ", sqrt(jet._pt2), " eta: ", rapidity(jet), " phi: ", phi(jet), ", m: ", m(jet), ")") end diff --git a/src/TiledAlgoLL.jl b/src/TiledAlgoLL.jl index a979cf3..4003ec5 100644 --- a/src/TiledAlgoLL.jl +++ b/src/TiledAlgoLL.jl @@ -8,7 +8,7 @@ using Accessors using LoopVectorization # Include struct definitions and basic operations -include("TiledAlglLLStructs.jl") +include("TiledAlgoLLStructs.jl") """ Initialise the clustering history in a standard way, @@ -63,7 +63,7 @@ end """Initialise a tiled jet from a PseudoJet (using an index into our ClusterSequence)""" -tiledjet_set_jetinfo!(jet::TiledJet, clusterseq::ClusterSequence, jets_index, R2, p) = begin +tiledjet_set_jetinfo!(jet::TiledJet, clusterseq::ClusterSequence, tiling::Tiling, jets_index, R2, p) = begin @inbounds jet.eta = rapidity(clusterseq.jets[jets_index]) @inbounds jet.phi = phi_02pi(clusterseq.jets[jets_index]) @inbounds jet.kt2 = pt2(clusterseq.jets[jets_index]) > 1.e-300 ? pt2(clusterseq.jets[jets_index])^p : 1.e300 @@ -73,23 +73,23 @@ tiledjet_set_jetinfo!(jet::TiledJet, clusterseq::ClusterSequence, jets_index, R2 jet.NN = noTiledJet # Find out which tile it belonds to - jet.tile_index = tile_index(clusterseq.tiling.setup, jet.eta, jet.phi) + jet.tile_index = tile_index(tiling.setup, jet.eta, jet.phi) # Insert it into the tile's linked list of jets (at the beginning) jet.previous = noTiledJet - @inbounds jet.next = clusterseq.tiling.tiles[jet.tile_index] + @inbounds jet.next = tiling.tiles[jet.tile_index] if isvalid(jet.next) jet.next.previous = jet end - @inbounds clusterseq.tiling.tiles[jet.tile_index] = jet + @inbounds tiling.tiles[jet.tile_index] = jet nothing end """Full scan for nearest neighbours""" -function set_nearest_neighbours!(clusterseq::ClusterSequence, tiledjets::Vector{TiledJet}) +function set_nearest_neighbours!(clusterseq::ClusterSequence, tiling::Tiling, tiledjets::Vector{TiledJet}) # Setup the initial nearest neighbour information - for tile in clusterseq.tiling.tiles + for tile in tiling.tiles isvalid(tile) || continue for jetA in tile for jetB in tile @@ -109,9 +109,9 @@ function set_nearest_neighbours!(clusterseq::ClusterSequence, tiledjets::Vector{ end # Look for neighbour jets n the neighbour tiles - for rtile_index in rightneighbours(tile.tile_index, clusterseq.tiling) + for rtile_index in rightneighbours(tile.tile_index, tiling) for jetA in tile - for jetB in @inbounds clusterseq.tiling.tiles[rtile_index] + for jetB in @inbounds tiling.tiles[rtile_index] dist = _tj_dist(jetA, jetB) if (dist < jetA.NN_dist) jetA.NN_dist = dist @@ -177,43 +177,6 @@ do_iB_recombination_step!(clusterseq::ClusterSequence, jet_i, diB) = begin Invalid, diB) end -"""Add a new jet's history into the recombination sequence""" -add_step_to_history!(clusterseq::ClusterSequence, parent1, parent2, jetp_index, dij) = begin - max_dij_so_far = max(dij, clusterseq.history[end].max_dij_so_far) - push!(clusterseq.history, HistoryElement(parent1, parent2, Invalid, - jetp_index, dij, max_dij_so_far)) - - local_step = length(clusterseq.history) - - # Sanity check: make sure the particles have not already been recombined - # - # Note that good practice would make this an assert (since this is - # a serious internal issue). However, we decided to throw an - # InternalError so that the end user can decide to catch it and - # retry the clustering with a different strategy. - @assert parent1 >= 1 - if clusterseq.history[parent1].child != Invalid - throw(ErrorException("Internal error. Trying to recombine an object that has previsously been recombined.")) - end - - hist_elem = clusterseq.history[parent1] - clusterseq.history[parent1] = @set hist_elem.child = local_step - - if parent2 >= 1 - clusterseq.history[parent2].child == Invalid || error( - "Internal error. Trying to recombine an object that has previsously been recombined. Parent " * string(parent2) * "'s child index " * string(clusterseq.history[parent1].child) * ". Parent jet index: " * - string(clusterseq.history[parent2].jetp_index) * ".", - ) - hist_elem = clusterseq.history[parent2] - clusterseq.history[parent2] = @set hist_elem.child = local_step - end - - # Get cross-referencing right from PseudoJets - if jetp_index != Invalid - @assert jetp_index >= 1 - clusterseq.jets[jetp_index]._cluster_hist_index = local_step - end -end """Adds to the vector tile_union the tiles that are in the neighbourhood of the specified tile_index, including itself and whose tagged status are @@ -260,28 +223,6 @@ function find_tile_neighbours!(tile_union, jetA, jetB, oldB, tiling) end -"""Return all inclusive jets of a ClusterSequence with pt > ptmin""" -function inclusive_jets(clusterseq::ClusterSequence, ptmin = 0.0) - dcut = ptmin * ptmin - jets_local = Vector{LorentzVectorCyl}(undef, 0) - # sizehint!(jets_local, length(clusterseq.jets)) - # For inclusive jets with a plugin algorithm, we make no - # assumptions about anything (relation of dij to momenta, - # ordering of the dij, etc.) - # for elt in Iterators.reverse(clusterseq.history) - for elt in clusterseq.history - elt.parent2 == BeamJet || continue - iparent_jet = clusterseq.history[elt.parent1].jetp_index - jet = clusterseq.jets[iparent_jet] - if pt2(jet) >= dcut - push!(jets_local, LorentzVectorCyl(pt(jet), rapidity(jet), phi(jet), mass(jet))) - # push!(jets_local, jet) - end - end - jets_local -end - - """ Main jet reconstruction algorithm entry point for generic data types @@ -292,7 +233,7 @@ be used. If a non-standard recombination is used, it must be defined for JetReconstruction.PseudoJet, as this struct is used internally. """ -function tiled_jet_reconstruct(particles::Vector{T}; p = -1, R = 1.0, recombine = +, ptmin = 0.0) where {T} +function tiled_jet_reconstruct(particles::Vector{T}; p = -1, R = 1.0, recombine = +) where {T} # Here we need to populate the vector of PseudoJets that are the internal # EDM for the main algorithm, then we call the reconstruction pseudojets = Vector{PseudoJet}(undef, length(particles)) @@ -300,22 +241,30 @@ function tiled_jet_reconstruct(particles::Vector{T}; p = -1, R = 1.0, recombine pseudojets[i] = PseudoJet(px(particle), py(particle), pz(particle), energy(particle)) end - tiled_jet_reconstruct(pseudojets, p = p, R = R, recombine = recombine, ptmin = ptmin) + tiled_jet_reconstruct(pseudojets, p = p, R = R, recombine = recombine) end + """ Main jet reconstruction algorithm, using PseudoJet objects """ -function tiled_jet_reconstruct(particles::Vector{PseudoJet}; p = -1, R = 1.0, recombine = +, ptmin = 0.0) +function tiled_jet_reconstruct(particles::Vector{PseudoJet}; p = -1, R = 1.0, recombine = +) # Bounds N::Int = length(particles) + + # Extremely odd - having these @debug statements present causes a performance + # degradation of ~20μs per event on my M2 mac (12%!), even when no debugging is used + # so they need to be completely commented out... + # + # There are a few reports of this in, e.g., https://github.com/JuliaLang/julia/issues/28147 + # It does seem to have improved, but it's far from perfect! # @debug "Initial particles: $(N)" # Algorithm parameters R2::Float64 = R * R p = (round(p) == p) ? Int(p) : p # integer p if possible - # This will be used quite deep inside loops, but declare it here so that + # This will be used quite deep inside loops, so declare it here so that # memory (de)allocation gets done only once tile_union = Vector{Int}(undef, 3 * _n_tile_neighbours) @@ -340,17 +289,17 @@ function tiled_jet_reconstruct(particles::Vector{PseudoJet}; p = -1, R = 1.0, re tiling = Tiling(setup_tiling(_eta, R)) # ClusterSequence is a convenience struct that holds the state of the reconstruction - clusterseq = ClusterSequence(jets, history, tiling, Qtot) + clusterseq = ClusterSequence(jets, history, Qtot) # Tiled jets is a structure that has additional variables for tracking which tile a jet is in tiledjets = similar(clusterseq.jets, TiledJet) for ijet in eachindex(tiledjets) tiledjets[ijet] = TiledJet(ijet) - tiledjet_set_jetinfo!(tiledjets[ijet], clusterseq, ijet, R2, p) + tiledjet_set_jetinfo!(tiledjets[ijet], clusterseq, tiling, ijet, R2, p) end # Now initalise all of the nearest neighbour tiles - NNs, dij = set_nearest_neighbours!(clusterseq, tiledjets) + NNs, dij = set_nearest_neighbours!(clusterseq, tiling, tiledjets) # Main loop of the reconstruction # Each iteration we either merge 2→1 or finalise a jet, so it takes N iterations @@ -382,21 +331,21 @@ function tiled_jet_reconstruct(particles::Vector{PseudoJet}; p = -1, R = 1.0, re # Recombine jetA and jetB and retrieves the new index, nn nn = do_ij_recombination_step!(clusterseq, jetA.jets_index, jetB.jets_index, dij_min, recombine) - tiledjet_remove_from_tiles!(clusterseq.tiling, jetA) + tiledjet_remove_from_tiles!(tiling, jetA) oldB = copy(jetB) # take a copy because we will need it... - tiledjet_remove_from_tiles!(clusterseq.tiling, jetB) - tiledjet_set_jetinfo!(jetB, clusterseq, nn, R2, p) # cause jetB to become _jets[nn] + tiledjet_remove_from_tiles!(tiling, jetB) + tiledjet_set_jetinfo!(jetB, clusterseq, tiling, nn, R2, p) # cause jetB to become _jets[nn] # (in addition, registers the jet in the tiling) else # Jet-beam recombination do_iB_recombination_step!(clusterseq, jetA.jets_index, dij_min) - tiledjet_remove_from_tiles!(clusterseq.tiling, jetA) + tiledjet_remove_from_tiles!(tiling, jetA) oldB = jetB end # Find all the neighbour tiles that hold candidate jets for Updates - n_near_tiles = find_tile_neighbours!(tile_union, jetA, jetB, oldB, clusterseq.tiling) + n_near_tiles = find_tile_neighbours!(tile_union, jetA, jetB, oldB, tiling) # Firstly compactify the diJ by taking the last of the diJ and copying # it to the position occupied by the diJ for jetA @@ -460,5 +409,5 @@ function tiled_jet_reconstruct(particles::Vector{PseudoJet}; p = -1, R = 1.0, re @inbounds dij[jetB.dij_posn] = _tj_diJ(jetB) end end - inclusive_jets(clusterseq, ptmin), clusterseq.history + clusterseq end diff --git a/src/TiledAlglLLStructs.jl b/src/TiledAlgoLLStructs.jl similarity index 72% rename from src/TiledAlglLLStructs.jl rename to src/TiledAlgoLLStructs.jl index 4d55520..95857b0 100644 --- a/src/TiledAlglLLStructs.jl +++ b/src/TiledAlgoLLStructs.jl @@ -1,53 +1,4 @@ -# Structure definitions for the Tiled algorithm, with linked list -# and TiledJets (a la FastJet) -# Original Julia implementation by Philippe Gras, -# ported to this package by Graeme Stewart - -# TODO: Consider ENUM here, rather than magic numbers -const Invalid=-3 -const NonexistentParent=-2 -const BeamJet=-1 - -"""A struct holding a record of jet mergers and finalisations""" -struct HistoryElement - """Index in _history where first parent of this jet - was created (NonexistentParent if this jet is an - original particle)""" - parent1::Int - - """Index in _history where second parent of this jet - was created (NonexistentParent if this jet is an - original particle); BeamJet if this history entry - just labels the fact that the jet has recombined - with the beam)""" - parent2::Int - - """Index in _history where the current jet is - recombined with another jet to form its child. It - is Invalid if this jet does not further - recombine.""" - child::Int - - """Index in the _jets vector where we will find the - PseudoJet object corresponding to this jet - (i.e. the jet created at this entry of the - history). NB: if this element of the history - corresponds to a beam recombination, then - jetp_index=Invalid.""" - jetp_index::Int - - """The distance corresponding to the recombination - at this stage of the clustering.""" - dij::Float64 - - """The largest recombination distance seen - so far in the clustering history.""" - max_dij_so_far::Float64 -end - -"""Used for initial particles""" -HistoryElement(jetp_index) = HistoryElement(NonexistentParent, NonexistentParent, Invalid, jetp_index, 0.0, 0.0) - +# Structures used internally by the tiled algorithm """Structure analogous to BriefJet, but with the extra information needed for dealing with tiles""" @@ -242,30 +193,3 @@ tiledjet_remove_from_tiles!(tiling, jet) = begin jet.next = jet.previous = noTiledJet # To be clean, but not mandatory end - -""" -Convienence structure holding all of the relevant parameters for -the jet reconstruction -""" -struct ClusterSequence - """ - This contains the physical PseudoJets; for each PseudoJet one can find - the corresponding position in the _history by looking at - _jets[i].cluster_hist_index() - """ - jets::Vector{PseudoJet} - - """ - This vector will contain the branching history; for each stage, - history[i].jetp_index indicates where to look in the _jets - vector to get the physical PseudoJet. - """ - history::Vector{HistoryElement} - - """PseudoJet tiling""" - tiling::Tiling - - """Total energy of the event""" - Qtot -end - diff --git a/src/Utils.jl b/src/Utils.jl index f17fbf0..62c71e4 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -20,6 +20,7 @@ function read_final_state_particles(fname; maxevents = -1, skipevents = 0) push!(events, input_particles) ipart += 1 end + close(f) @info "Total Events: $(length(events))" @debug events @@ -46,6 +47,7 @@ function read_final_state_particles_lv(fname; maxevents = -1, skipevents = 0) push!(events, input_particles) ipart += 1 end + close(f) @info "Total Events: $(length(events))" @debug events @@ -70,8 +72,13 @@ function pseudojets2vectors(events::Vector{Vector{PseudoJet}}) event_vector end -"""Return the list of jets passing a pt cut""" -function final_jets(jets::Vector{Vector{Float64}}, ptmin::AbstractFloat) +""" +Return the list of jets passing a pt cut + +The ptmin cut in these functions is slightly legacy as often the +input jets were already filtered on pt +""" +function final_jets(jets::Vector{Vector{Float64}}, ptmin::AbstractFloat=0.0) count = 0 final_jets = Vector{FinalJet}() sizehint!(final_jets, 6) @@ -86,7 +93,7 @@ function final_jets(jets::Vector{Vector{Float64}}, ptmin::AbstractFloat) final_jets end -function final_jets(jets::Vector{PseudoJet}, ptmin::AbstractFloat) +function final_jets(jets::Vector{PseudoJet}, ptmin::AbstractFloat=0.0) count = 0 final_jets = Vector{FinalJet}() sizehint!(final_jets, 6) @@ -100,7 +107,7 @@ function final_jets(jets::Vector{PseudoJet}, ptmin::AbstractFloat) final_jets end -function final_jets(jets::Vector{LorentzVector}, ptmin::AbstractFloat) +function final_jets(jets::Vector{LorentzVector}, ptmin::AbstractFloat=0.0) count = 0 final_jets = Vector{FinalJet}() sizehint!(final_jets, 6) @@ -114,7 +121,7 @@ function final_jets(jets::Vector{LorentzVector}, ptmin::AbstractFloat) final_jets end -function final_jets(jets::Vector{LorentzVectorCyl}, ptmin::AbstractFloat) +function final_jets(jets::Vector{LorentzVectorCyl}, ptmin::AbstractFloat=0.0) count = 0 final_jets = Vector{FinalJet}() sizehint!(final_jets, 6) diff --git a/test/data/events.hepmc3.gz b/test/data/events.hepmc3.gz deleted file mode 100644 index fb9067f..0000000 Binary files a/test/data/events.hepmc3.gz and /dev/null differ diff --git a/test/data/events.hepmc3.xz b/test/data/events.hepmc3.xz new file mode 100644 index 0000000..c6b4841 Binary files /dev/null and b/test/data/events.hepmc3.xz differ diff --git a/test/runtests.jl b/test/runtests.jl index 0b0c566..acbcb73 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,8 +6,26 @@ using JSON using LorentzVectorHEP using Logging +const events_file = joinpath(@__DIR__, "data", "events.hepmc3") + +"""Decompress data file, if necessary""" +function unpack_events(file) + # File already there? + if isfile(file) + return true + end + # LZMA source? + file_compressed = joinpath(dirname(file), basename(file)*".xz") + if isfile(file_compressed) + @debug "Unpacking $(file_compressed)" + run(pipeline(`xzcat $file_compressed`, stdout=file)) + return true + end + false +end + """Read JSON file with fastjet jets in it""" -function read_fastjet_outputs(; fname = "test/data/jet_collections_fastjet_akt.json") +function read_fastjet_outputs(fname) f = open(fname) JSON.parse(f) end @@ -25,18 +43,26 @@ function sort_jets!(jet_array::Vector{FinalJet}) sort!(jet_array, by = jet_pt, rev = true) end +function sort_jets!(jet_array::Vector{LorentzVectorCyl}) + jet_pt(jet) = jet.pt + sort!(jet_array, by = jet_pt, rev = true) +end + function main() + # If necessary, unzip the events data file + unpack_events(events_file) + # Read our fastjet outputs (we read for anti-kt, cambridge/achen, inclusive-kt) algorithms = Dict(-1 => "Anti-kt", 0 => "Cambridge/Achen", 1 => "Inclusive-kt") - fastjet_alg_files = Dict(-1 => "test/data/jet_collections_fastjet_akt.json", - 0 => "test/data/jet_collections_fastjet_ca.json", - 1 => "test/data/jet_collections_fastjet_ikt.json") + fastjet_alg_files = Dict(-1 => joinpath(@__DIR__, "data", "jet_collections_fastjet_akt.json"), + 0 => joinpath(@__DIR__, "data", "jet_collections_fastjet_ca.json"), + 1 => joinpath(@__DIR__, "data", "jet_collections_fastjet_ikt.json")) fastjet_data = Dict{Int, Vector{Any}}() for (k, v) in pairs(fastjet_alg_files) - fastjet_jets = read_fastjet_outputs(fname = v) + fastjet_jets = read_fastjet_outputs(v) sort_jets!(fastjet_jets) fastjet_data[k] = fastjet_jets end @@ -71,13 +97,12 @@ function do_test_compare_to_fastjet(strategy::JetRecoStrategy.Strategy, fastjet_ # Now run our jet reconstruction... # From PseudoJets - events::Vector{Vector{PseudoJet}} = read_final_state_particles("test/data/events.hepmc3") + events::Vector{Vector{PseudoJet}} = read_final_state_particles(events_file) jet_collection = FinalJets[] for (ievt, event) in enumerate(events) - finaljets, _ = jet_reconstruction(event, R = distance, p = power, ptmin = ptmin) - fj = final_jets(finaljets, ptmin) - sort_jets!(fj) - push!(jet_collection, FinalJets(ievt, fj)) + finaljets = final_jets(inclusive_jets(jet_reconstruction(event, R = distance, p = power), ptmin)) + sort_jets!(finaljets) + push!(jet_collection, FinalJets(ievt, finaljets)) end @testset "Jet Reconstruction compare to FastJet: Strategy $strategy_name, Algorithm $algname" begin @@ -123,23 +148,21 @@ function do_test_compare_types(strategy::JetRecoStrategy.Strategy; # Now run our jet reconstruction... # From PseudoJets - events::Vector{Vector{PseudoJet}} = read_final_state_particles("test/data/events.hepmc3") + events::Vector{Vector{PseudoJet}} = read_final_state_particles(events_file) jet_collection = FinalJets[] for (ievt, event) in enumerate(events) - finaljets, _ = jet_reconstruction(event, R = distance, p = power, ptmin = ptmin) - fj = final_jets(finaljets, ptmin) - sort_jets!(fj) - push!(jet_collection, FinalJets(ievt, fj)) + finaljets = final_jets(inclusive_jets(jet_reconstruction(event, R = distance, p = power), ptmin)) + sort_jets!(finaljets) + push!(jet_collection, FinalJets(ievt, finaljets)) end # From LorentzVector - events_lv::Vector{Vector{LorentzVector}} = read_final_state_particles_lv("test/data/events.hepmc3") + events_lv::Vector{Vector{LorentzVector}} = read_final_state_particles_lv(events_file) jet_collection_lv = FinalJets[] for (ievt, event) in enumerate(events_lv) - finaljets, _ = jet_reconstruction(event, R = distance, p = power, ptmin = ptmin) - fj = final_jets(finaljets, ptmin) - sort_jets!(fj) - push!(jet_collection_lv, FinalJets(ievt, fj)) + finaljets = final_jets(inclusive_jets(jet_reconstruction(event, R = distance, p = power), ptmin)) + sort_jets!(finaljets) + push!(jet_collection_lv, FinalJets(ievt, finaljets)) end @testset "Jet Reconstruction Compare PseudoJet and LorentzVector, Strategy $strategy_name, Algorithm $algname" begin @@ -158,9 +181,6 @@ function do_test_compare_types(strategy::JetRecoStrategy.Strategy; end end - -if abspath(PROGRAM_FILE) == @__FILE__ - logger = ConsoleLogger(stdout, Logging.Warn) - global_logger(logger) - main() -end +logger = ConsoleLogger(stdout, Logging.Warn) +global_logger(logger) +main()