Skip to content

Commit

Permalink
Add summary script and trim package content
Browse files Browse the repository at this point in the history
Add script that summarises HepMC3 file content
Add more exclusions to .gitignore to prevent package
getting bloated with auxiliary files
  • Loading branch information
graeme-a-stewart committed Mar 26, 2024
1 parent eb2466c commit c3fa8df
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
/notebooks/*
/statprof/*
/debug/*
*.mem
/benchmark/*
15 changes: 14 additions & 1 deletion test/data/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
events.hepmc3
# Ignore unpacked HepMC3 event files (commit only compressed!)
*.hepmc3

# Additional HepMC3 files should not be committed into this repository
# because they are far too large for the package itself - they should be
# in the JetReconstructionBenchmarks.jl repo instead.
#
# Prefix these event files with "benchmark" if they are stashed here
# temporarily (previously they had names like "events-pp-20TeV-50GeV.hepmc3")
benchmark-*
events-*

# Ignore manifest for the helpert script
Manifest.toml
5 changes: 5 additions & 0 deletions test/data/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
2 changes: 1 addition & 1 deletion test/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Files

- `events.hepmc3.gz` - 100 Pythia 13TeV envents in HepMC3 format (compressed)
- `events.hepmc3.gz` - 100 Pythia 13TeV events in HepMC3 format (compressed)
- Unzip to `evemts.hepmc3` for actual use

- `jet_collections_fastjet_akt.json` - JSON output from running anti-kt jet reconstruction using FastJet 3.4.1, with R=0.4 and p_t_min=5.0
Expand Down
136 changes: 136 additions & 0 deletions test/data/hepmc32summary.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#! /usr/bin/env julia

using ArgParse
using LorentzVectorHEP
using Statistics
using UnicodePlots

struct Particle{T}
momentum::LorentzVector{T}
status::Integer
pdgid::Integer
barcode::Integer
vertex::Integer
end

Particle{T}() where T = Particle(LorentzVector{T}(0., 0., 0., 0.), 0, 0, 0, 0)

""" Read a [HepMC3](https://doi.org/10.1016/j.cpc.2020.107310) ascii file.
Each event is passed to the provided function f as a vector of Particles. A
maximum number of events to read (value -1 to read all availble events) and
a number of events to skip at the beginning of the file can be provided.
"""
function read_events(f, fin; maxevents=-1, skipevents=0)
T = Float64
particles = Particle{T}[]
ievent = 0
ipart = 0
toskip = skipevents

for (il, l) in enumerate(eachline(fin))
if occursin(r"HepMC::.*-END_EVENT_LISTING", l)
break
end

tok = split(l)

if tok[1] == "E"
ievent += 1
(maxevents >= 0 && ievent > maxevents) && break
if ievent > 1 && toskip == 0
f(particles)
end
if toskip > 0
toskip -= 1
end
resize!(particles, 0)
ipart = 0
elseif tok[1] == "P" && toskip == 0
ipart += 1
if ipart > length(particles)
push!(particles, Particle{T}())
end
barcode = parse(Int, tok[2])
vertex = parse(Int, tok[3])
pdgid = parse(Int, tok[4])
px = parse(T, tok[5])
py = parse(T, tok[6])
pz = parse(T, tok[7])
e = parse(T, tok[8])
status = parse(Int, tok[10])
push!(particles, Particle{T}(LorentzVector(e,px,py,pz), status, pdgid, barcode, vertex))
end
end
#processing the last event:
ievent > 0 && f(particles)
end


read_events(fname, maxevents=-1, skipevents=0) = begin
f = open(fname)

events = Vector{LorentzVector}[]

read_events(f, maxevents=maxevents, skipevents=skipevents) do parts
input_particles = LorentzVector[]
for p in parts
if p.status == 1
push!(input_particles, p.momentum)
end
end
push!(events, input_particles)
end

events
end

parse_command_line(args) = begin
s = ArgParseSettings(autofix_names = true)
@add_arg_table! s begin
"--summary"
help = "Print only summary information, filename and average density"
action = :store_true

"--maxevents", "-n"
help = "Maximum number of events to read. -1 to read all events from the file."
arg_type = Int
default = -1

"--skip", "-s"
help = "Number of events to skip at beginning of the file."
arg_type = Int
default = 0

"files"
help = "The HepMC3 event files to read."
required = true
nargs = '+'
end
return parse_args(args, s; as_symbols = true)
end


function main()
args = parse_command_line(ARGS)

for file in args[:files]
events = read_events(file, args[:maxevents], args[:skip])
n_events = length(events)
n_particles = Int[]
for e in events
push!(n_particles, length(e))
end
average_n = mean(n_particles)
if args[:summary]
println("$file,$average_n")
else
println("File $file")
println(" Number of events: $n_events")
println(" Average number of particles: ", mean(n_particles))
println(histogram(n_particles))
end
end
end

main()

0 comments on commit c3fa8df

Please sign in to comment.