Skip to content

Commit

Permalink
do not strictly depend on awkward 2.6.8
Browse files Browse the repository at this point in the history
  • Loading branch information
ikrommyd committed Sep 18, 2024
1 parent a85cb38 commit ab98731
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 28 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ classifiers = [
"Topic :: Utilities",
]
dependencies = [
"awkward>=2.6.8",
"coffea[dask,rucio]>=2024.5.0",
"dask[array]<2024.9.0",
"distributed<2024.9.0",
Expand Down
3 changes: 2 additions & 1 deletion scripts/merge_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def main():
files = paths

final_files = [fs.unstrip_protocol(f) for f in files]
ak.to_parquet_row_groups(generate(final_files), args.target, extensionarray=True)
# TODO: switch to extensionarray=True when coffea images are fixed
ak.to_parquet_row_groups(generate(final_files), args.target, extensionarray=False)
logger.info("Done.")


Expand Down
3 changes: 2 additions & 1 deletion src/egamma_tnp/utils/runner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ def save_array_to_parquet(array, output_dir, dataset, subdir, prefix=None, repar
array = array.repartition(n_to_one=repartition_n)

logger.info(f"Saving array from dataset {dataset} to Parquet file in {output_path}")
return dak.to_parquet(array, output_path, compute=False, prefix=prefix, extensionarray=True)
# TODO: switch to extensionarray=True when coffea images are fixed
return dak.to_parquet(array, output_path, compute=False, prefix=prefix, extensionarray=False)


def process_to_compute(to_compute, output_dir, repartition_n=5):
Expand Down
18 changes: 9 additions & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def assert_histograms_equal(h1, h2, flow):
assert h1.sum(flow=flow).variance == h2.sum(flow=flow).variance


def asert_arrays_equal(a1, a2):
def assert_arrays_equal(a1, a2):
for i in a1.fields:
assert ak.all(a1[i] == a2[i])
for j in a2.fields:
Expand Down Expand Up @@ -119,41 +119,41 @@ def test_cli():
with ProgressBar():
(out,) = dask.compute(to_compute)

ak.array_equal(
assert_arrays_equal(
out["get_tnp_arrays_1"][0]["sample/1"],
ak.from_parquet("tests/output/sample_1/get_tnp_arrays_1/NTuples-part0.parquet"),
)
assert os.path.exists("tests/output/sample_1/get_tnp_arrays_1/report.json")
ak.array_equal(
assert_arrays_equal(
out["get_tnp_arrays_1"][0]["sample/2"],
ak.from_parquet("tests/output/sample_2/get_tnp_arrays_1/NTuples-part0.parquet"),
)
assert os.path.exists("tests/output/sample_2/get_tnp_arrays_1/report.json")

ak.array_equal(
assert_arrays_equal(
out["get_tnp_arrays_2"][0]["sample/1"],
ak.from_parquet("tests/output/sample_1/get_tnp_arrays_2/NTuples-part0.parquet"),
)
assert os.path.exists("tests/output/sample_1/get_tnp_arrays_2/report.json")
ak.array_equal(
assert_arrays_equal(
out["get_tnp_arrays_2"][0]["sample/2"],
ak.from_parquet("tests/output/sample_2/get_tnp_arrays_2/NTuples-part0.parquet"),
)
assert os.path.exists("tests/output/sample_2/get_tnp_arrays_2/report.json")

ak.array_equal(
assert_arrays_equal(
out["get_passing_and_failing_probes_1_hlt"]["sample/1"]["passing"],
ak.from_parquet("tests/output/sample_1/get_passing_and_failing_probes_1/passing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"),
)
ak.array_equal(
assert_arrays_equal(
out["get_passing_and_failing_probes_1_hlt"]["sample/1"]["failing"],
ak.from_parquet("tests/output/sample_1/get_passing_and_failing_probes_1/failing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"),
)
ak.array_equal(
assert_arrays_equal(
out["get_passing_and_failing_probes_1_hlt"]["sample/2"]["passing"],
ak.from_parquet("tests/output/sample_2/get_passing_and_failing_probes_1/passing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"),
)
ak.array_equal(
assert_arrays_equal(
out["get_passing_and_failing_probes_1_hlt"]["sample/2"]["failing"],
ak.from_parquet("tests/output/sample_2/get_passing_and_failing_probes_1/failing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"),
)
Expand Down
23 changes: 15 additions & 8 deletions tests/test_tag_and_probe_nanoaod.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
from egamma_tnp import ElectronTagNProbeFromNanoAOD, PhotonTagNProbeFromNanoAOD


def assert_arrays_equal(a1, a2):
for i in a1.fields:
assert ak.all(a1[i] == a2[i])
for j in a2.fields:
assert ak.all(a1[j] == a2[j])


def test_tag_and_probe_electrons_trigger():
fileset = {"sample": {"files": {os.path.abspath("tests/samples/DYto2E.root"): "Events"}}}

Expand All @@ -31,8 +38,8 @@ def test_tag_and_probe_electrons_trigger():
result = tag_n_probe.get_passing_and_failing_probes(
"HLT_Ele32_WPTight_Gsf", cut_and_count=False, vars=["Electron_pt", "tag_Ele_eta", "el_pt", "el_eta", "MET_pt", "event"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 467
assert len(result["failing"]) == 183
assert len(solution[0]) == 467
Expand All @@ -59,8 +66,8 @@ def test_tag_and_probe_electrons_id():
result = tag_n_probe.get_passing_and_failing_probes(
"cutBased >= 4", cut_and_count=False, vars=["Electron_pt", "tag_Ele_eta", "el_pt", "el_eta", "MET_pt", "event"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 649
assert len(result["failing"]) == 0
assert len(solution[0]) == 649
Expand Down Expand Up @@ -91,8 +98,8 @@ def test_tag_and_probe_photons_trigger(start_from_diphotons):
result = tag_n_probe.get_passing_and_failing_probes(
"HLT_Ele32_WPTight_Gsf", cut_and_count=False, vars=["Photon_pt", "tag_Ele_eta", "ph_pt", "ph_eta", "MET_pt", "event"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
if start_from_diphotons:
assert len(result["passing"]) == 336
assert len(result["failing"]) == 101
Expand Down Expand Up @@ -128,8 +135,8 @@ def test_tag_and_probe_photons_id(start_from_diphotons):
result = tag_n_probe.get_passing_and_failing_probes(
"cutBased >= 3", cut_and_count=False, vars=["Photon_pt", "tag_Ele_eta", "ph_pt", "ph_eta", "MET_pt", "event"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
if start_from_diphotons:
assert len(result["passing"]) == 436
assert len(result["failing"]) == 146
Expand Down
23 changes: 15 additions & 8 deletions tests/test_tag_and_probe_ntuples.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
from egamma_tnp import ElectronTagNProbeFromMiniNTuples, ElectronTagNProbeFromNanoNTuples, PhotonTagNProbeFromMiniNTuples, PhotonTagNProbeFromNanoNTuples


def assert_arrays_equal(a1, a2):
for i in a1.fields:
assert ak.all(a1[i] == a2[i])
for j in a2.fields:
assert ak.all(a1[j] == a2[j])


def test_mini_tag_and_probe_electrons():
fileset = {"sample": {"files": {os.path.abspath("tests/samples/TnPNTuples_el.root"): "fitter_tree"}}}

Expand All @@ -27,8 +34,8 @@ def test_mini_tag_and_probe_electrons():
result = tag_n_probe.get_passing_and_failing_probes(
"passingCutBasedTight122XV1", cut_and_count=False, vars=["el_pt", "el_eta", "truePU", "tag_Ele_eta"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 414
assert len(result["failing"]) == 113
assert len(solution[0]) == 414
Expand All @@ -53,8 +60,8 @@ def test_mini_tag_and_probe_photons():
result = tag_n_probe.get_passing_and_failing_probes(
"passingCutBasedTight122XV1", cut_and_count=False, vars=["ph_et", "ph_eta", "truePU", "tag_Ele_eta"], compute=True
)["sample"]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 372
assert len(result["failing"]) == 73
assert len(solution[0]) == 372
Expand All @@ -79,8 +86,8 @@ def test_nano_tag_and_probe_electrons():
result = tag_n_probe.get_passing_and_failing_probes("cutBased >= 4", cut_and_count=False, vars=["el_pt", "el_eta", "PV_npvs", "tag_Ele_eta"], compute=True)[
"sample"
]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 978
assert len(result["failing"]) == 0
assert len(solution[0]) == 978
Expand All @@ -105,8 +112,8 @@ def test_nano_tag_and_probe_photons():
result = tag_n_probe.get_passing_and_failing_probes("cutBased >= 3", cut_and_count=False, vars=["ph_pt", "ph_eta", "PV_npvs", "tag_Ele_eta"], compute=True)[
"sample"
]
ak.array_equal(result["passing"], solution[0])
ak.array_equal(result["failing"], solution[1])
assert_arrays_equal(result["passing"], solution[0])
assert_arrays_equal(result["failing"], solution[1])
assert len(result["passing"]) == 669
assert len(result["failing"]) == 135
assert len(solution[0]) == 669
Expand Down

0 comments on commit ab98731

Please sign in to comment.