From a6eb80a5b8688bb102e7d8b4a46f09601786df7f Mon Sep 17 00:00:00 2001 From: Graeme A Stewart Date: Tue, 26 Mar 2024 16:51:59 +0100 Subject: [PATCH] Add summary script and trim package content Add script that summarises HepMC3 file content Add more exclusions to .gitignore to prevent package getting bloated with auxiliary files --- .gitignore | 2 + test/data/.gitignore | 15 +++- test/data/Project.toml | 5 ++ test/data/README.md | 2 +- test/data/hepmc32summary.jl | 136 ++++++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 test/data/Project.toml create mode 100644 test/data/hepmc32summary.jl diff --git a/.gitignore b/.gitignore index cd76dd6..a046575 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ /notebooks/* /statprof/* /debug/* +*.mem +/benchmark/* diff --git a/test/data/.gitignore b/test/data/.gitignore index d1e2634..50abdfb 100644 --- a/test/data/.gitignore +++ b/test/data/.gitignore @@ -1 +1,14 @@ -events.hepmc3 +# Ignore unpacked HepMC3 event files (commit only compressed!) +*.hepmc3 + +# Additional HepMC3 files should not be committed into this repository +# because they are far too large for the package itself - they should be +# in the JetReconstructionBenchmarks.jl repo instead. +# +# Prefix these event files with "benchmark" if they are stashed here +# temporarily (previously they had names like "events-pp-20TeV-50GeV.hepmc3") +benchmark-* +events-* + +# Ignore manifest for the helpert script +Manifest.toml diff --git a/test/data/Project.toml b/test/data/Project.toml new file mode 100644 index 0000000..66f0c9e --- /dev/null +++ b/test/data/Project.toml @@ -0,0 +1,5 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" diff --git a/test/data/README.md b/test/data/README.md index 09b37fe..5bb520c 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -2,7 +2,7 @@ ## Files -- `events.hepmc3.gz` - 100 Pythia 13TeV envents in HepMC3 format (compressed) +- `events.hepmc3.gz` - 100 Pythia 13TeV events in HepMC3 format (compressed) - Unzip to `evemts.hepmc3` for actual use - `jet_collections_fastjet_akt.json` - JSON output from running anti-kt jet reconstruction using FastJet 3.4.1, with R=0.4 and p_t_min=5.0 diff --git a/test/data/hepmc32summary.jl b/test/data/hepmc32summary.jl new file mode 100644 index 0000000..1fbacb8 --- /dev/null +++ b/test/data/hepmc32summary.jl @@ -0,0 +1,136 @@ +#! /usr/bin/env julia + +using ArgParse +using LorentzVectorHEP +using Statistics +using UnicodePlots + +struct Particle{T} + momentum::LorentzVector{T} + status::Integer + pdgid::Integer + barcode::Integer + vertex::Integer +end + +Particle{T}() where T = Particle(LorentzVector{T}(0., 0., 0., 0.), 0, 0, 0, 0) + +""" Read a [HepMC3](https://doi.org/10.1016/j.cpc.2020.107310) ascii file. + + Each event is passed to the provided function f as a vector of Particles. A + maximum number of events to read (value -1 to read all availble events) and + a number of events to skip at the beginning of the file can be provided. +""" +function read_events(f, fin; maxevents=-1, skipevents=0) + T = Float64 + particles = Particle{T}[] + ievent = 0 + ipart = 0 + toskip = skipevents + + for (il, l) in enumerate(eachline(fin)) + if occursin(r"HepMC::.*-END_EVENT_LISTING", l) + break + end + + tok = split(l) + + if tok[1] == "E" + ievent += 1 + (maxevents >= 0 && ievent > maxevents) && break + if ievent > 1 && toskip == 0 + f(particles) + end + if toskip > 0 + toskip -= 1 + end + resize!(particles, 0) + ipart = 0 + elseif tok[1] == "P" && toskip == 0 + ipart += 1 + if ipart > length(particles) + push!(particles, Particle{T}()) + end + barcode = parse(Int, tok[2]) + vertex = parse(Int, tok[3]) + pdgid = parse(Int, tok[4]) + px = parse(T, tok[5]) + py = parse(T, tok[6]) + pz = parse(T, tok[7]) + e = parse(T, tok[8]) + status = parse(Int, tok[10]) + push!(particles, Particle{T}(LorentzVector(e,px,py,pz), status, pdgid, barcode, vertex)) + end + end + #processing the last event: + ievent > 0 && f(particles) +end + + +read_events(fname, maxevents=-1, skipevents=0) = begin + f = open(fname) + + events = Vector{LorentzVector}[] + + read_events(f, maxevents=maxevents, skipevents=skipevents) do parts + input_particles = LorentzVector[] + for p in parts + if p.status == 1 + push!(input_particles, p.momentum) + end + end + push!(events, input_particles) + end + + events +end + +parse_command_line(args) = begin + s = ArgParseSettings(autofix_names = true) + @add_arg_table! s begin + "--summary" + help = "Print only summary information, filename and average density" + action = :store_true + + "--maxevents", "-n" + help = "Maximum number of events to read. -1 to read all events from the file." + arg_type = Int + default = -1 + + "--skip", "-s" + help = "Number of events to skip at beginning of the file." + arg_type = Int + default = 0 + + "files" + help = "The HepMC3 event files to read." + required = true + nargs = '+' + end + return parse_args(args, s; as_symbols = true) +end + + +function main() + args = parse_command_line(ARGS) + + for file in args[:files] + events = read_events(file, args[:maxevents], args[:skip]) + n_events = length(events) + n_particles = Int[] + for e in events + push!(n_particles, length(e)) + end + average_n = mean(n_particles) + if args[:summary] + println("$file,$average_n") + else + println("File $file") + println(" Number of events: $n_events") + println(" Average number of particles: ", mean(n_particles)) + println(histogram(n_particles)) + end + end +end + +main()