diff --git a/pyproject.toml b/pyproject.toml index 2d084e0a..ba95b153 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward>=2.6.8", "coffea[dask,rucio]>=2024.5.0", "dask[array]<2024.9.0", "distributed<2024.9.0", diff --git a/scripts/merge_parquet.py b/scripts/merge_parquet.py index 1b064e26..bfcf1585 100644 --- a/scripts/merge_parquet.py +++ b/scripts/merge_parquet.py @@ -34,7 +34,8 @@ def main(): files = paths final_files = [fs.unstrip_protocol(f) for f in files] - ak.to_parquet_row_groups(generate(final_files), args.target, extensionarray=True) + # TODO: switch to extensionarray=True when coffea images are fixed + ak.to_parquet_row_groups(generate(final_files), args.target, extensionarray=False) logger.info("Done.") diff --git a/src/egamma_tnp/utils/runner_utils.py b/src/egamma_tnp/utils/runner_utils.py index ebf0c3b5..07d032b5 100644 --- a/src/egamma_tnp/utils/runner_utils.py +++ b/src/egamma_tnp/utils/runner_utils.py @@ -191,7 +191,8 @@ def save_array_to_parquet(array, output_dir, dataset, subdir, prefix=None, repar array = array.repartition(n_to_one=repartition_n) logger.info(f"Saving array from dataset {dataset} to Parquet file in {output_path}") - return dak.to_parquet(array, output_path, compute=False, prefix=prefix, extensionarray=True) + # TODO: switch to extensionarray=True when coffea images are fixed + return dak.to_parquet(array, output_path, compute=False, prefix=prefix, extensionarray=False) def process_to_compute(to_compute, output_dir, repartition_n=5): diff --git a/tests/test_cli.py b/tests/test_cli.py index c2647c5c..2d6c6d4a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,7 +20,7 @@ def assert_histograms_equal(h1, h2, flow): assert h1.sum(flow=flow).variance == h2.sum(flow=flow).variance -def asert_arrays_equal(a1, a2): +def assert_arrays_equal(a1, a2): for i in a1.fields: assert ak.all(a1[i] == a2[i]) for j in a2.fields: @@ -119,41 +119,41 @@ def test_cli(): with ProgressBar(): (out,) = dask.compute(to_compute) - ak.array_equal( + assert_arrays_equal( out["get_tnp_arrays_1"][0]["sample/1"], ak.from_parquet("tests/output/sample_1/get_tnp_arrays_1/NTuples-part0.parquet"), ) assert os.path.exists("tests/output/sample_1/get_tnp_arrays_1/report.json") - ak.array_equal( + assert_arrays_equal( out["get_tnp_arrays_1"][0]["sample/2"], ak.from_parquet("tests/output/sample_2/get_tnp_arrays_1/NTuples-part0.parquet"), ) assert os.path.exists("tests/output/sample_2/get_tnp_arrays_1/report.json") - ak.array_equal( + assert_arrays_equal( out["get_tnp_arrays_2"][0]["sample/1"], ak.from_parquet("tests/output/sample_1/get_tnp_arrays_2/NTuples-part0.parquet"), ) assert os.path.exists("tests/output/sample_1/get_tnp_arrays_2/report.json") - ak.array_equal( + assert_arrays_equal( out["get_tnp_arrays_2"][0]["sample/2"], ak.from_parquet("tests/output/sample_2/get_tnp_arrays_2/NTuples-part0.parquet"), ) assert os.path.exists("tests/output/sample_2/get_tnp_arrays_2/report.json") - ak.array_equal( + assert_arrays_equal( out["get_passing_and_failing_probes_1_hlt"]["sample/1"]["passing"], ak.from_parquet("tests/output/sample_1/get_passing_and_failing_probes_1/passing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"), ) - ak.array_equal( + assert_arrays_equal( out["get_passing_and_failing_probes_1_hlt"]["sample/1"]["failing"], ak.from_parquet("tests/output/sample_1/get_passing_and_failing_probes_1/failing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"), ) - ak.array_equal( + assert_arrays_equal( out["get_passing_and_failing_probes_1_hlt"]["sample/2"]["passing"], ak.from_parquet("tests/output/sample_2/get_passing_and_failing_probes_1/passing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"), ) - ak.array_equal( + assert_arrays_equal( out["get_passing_and_failing_probes_1_hlt"]["sample/2"]["failing"], ak.from_parquet("tests/output/sample_2/get_passing_and_failing_probes_1/failing_HLT_Ele30_WPTight_Gsf_NTuples-part0.parquet"), ) diff --git a/tests/test_tag_and_probe_nanoaod.py b/tests/test_tag_and_probe_nanoaod.py index 153db510..ae46a611 100644 --- a/tests/test_tag_and_probe_nanoaod.py +++ b/tests/test_tag_and_probe_nanoaod.py @@ -10,6 +10,13 @@ from egamma_tnp import ElectronTagNProbeFromNanoAOD, PhotonTagNProbeFromNanoAOD +def assert_arrays_equal(a1, a2): + for i in a1.fields: + assert ak.all(a1[i] == a2[i]) + for j in a2.fields: + assert ak.all(a1[j] == a2[j]) + + def test_tag_and_probe_electrons_trigger(): fileset = {"sample": {"files": {os.path.abspath("tests/samples/DYto2E.root"): "Events"}}} @@ -31,8 +38,8 @@ def test_tag_and_probe_electrons_trigger(): result = tag_n_probe.get_passing_and_failing_probes( "HLT_Ele32_WPTight_Gsf", cut_and_count=False, vars=["Electron_pt", "tag_Ele_eta", "el_pt", "el_eta", "MET_pt", "event"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 467 assert len(result["failing"]) == 183 assert len(solution[0]) == 467 @@ -59,8 +66,8 @@ def test_tag_and_probe_electrons_id(): result = tag_n_probe.get_passing_and_failing_probes( "cutBased >= 4", cut_and_count=False, vars=["Electron_pt", "tag_Ele_eta", "el_pt", "el_eta", "MET_pt", "event"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 649 assert len(result["failing"]) == 0 assert len(solution[0]) == 649 @@ -91,8 +98,8 @@ def test_tag_and_probe_photons_trigger(start_from_diphotons): result = tag_n_probe.get_passing_and_failing_probes( "HLT_Ele32_WPTight_Gsf", cut_and_count=False, vars=["Photon_pt", "tag_Ele_eta", "ph_pt", "ph_eta", "MET_pt", "event"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) if start_from_diphotons: assert len(result["passing"]) == 336 assert len(result["failing"]) == 101 @@ -128,8 +135,8 @@ def test_tag_and_probe_photons_id(start_from_diphotons): result = tag_n_probe.get_passing_and_failing_probes( "cutBased >= 3", cut_and_count=False, vars=["Photon_pt", "tag_Ele_eta", "ph_pt", "ph_eta", "MET_pt", "event"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) if start_from_diphotons: assert len(result["passing"]) == 436 assert len(result["failing"]) == 146 diff --git a/tests/test_tag_and_probe_ntuples.py b/tests/test_tag_and_probe_ntuples.py index ac898c68..57aa5b8f 100644 --- a/tests/test_tag_and_probe_ntuples.py +++ b/tests/test_tag_and_probe_ntuples.py @@ -9,6 +9,13 @@ from egamma_tnp import ElectronTagNProbeFromMiniNTuples, ElectronTagNProbeFromNanoNTuples, PhotonTagNProbeFromMiniNTuples, PhotonTagNProbeFromNanoNTuples +def assert_arrays_equal(a1, a2): + for i in a1.fields: + assert ak.all(a1[i] == a2[i]) + for j in a2.fields: + assert ak.all(a1[j] == a2[j]) + + def test_mini_tag_and_probe_electrons(): fileset = {"sample": {"files": {os.path.abspath("tests/samples/TnPNTuples_el.root"): "fitter_tree"}}} @@ -27,8 +34,8 @@ def test_mini_tag_and_probe_electrons(): result = tag_n_probe.get_passing_and_failing_probes( "passingCutBasedTight122XV1", cut_and_count=False, vars=["el_pt", "el_eta", "truePU", "tag_Ele_eta"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 414 assert len(result["failing"]) == 113 assert len(solution[0]) == 414 @@ -53,8 +60,8 @@ def test_mini_tag_and_probe_photons(): result = tag_n_probe.get_passing_and_failing_probes( "passingCutBasedTight122XV1", cut_and_count=False, vars=["ph_et", "ph_eta", "truePU", "tag_Ele_eta"], compute=True )["sample"] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 372 assert len(result["failing"]) == 73 assert len(solution[0]) == 372 @@ -79,8 +86,8 @@ def test_nano_tag_and_probe_electrons(): result = tag_n_probe.get_passing_and_failing_probes("cutBased >= 4", cut_and_count=False, vars=["el_pt", "el_eta", "PV_npvs", "tag_Ele_eta"], compute=True)[ "sample" ] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 978 assert len(result["failing"]) == 0 assert len(solution[0]) == 978 @@ -105,8 +112,8 @@ def test_nano_tag_and_probe_photons(): result = tag_n_probe.get_passing_and_failing_probes("cutBased >= 3", cut_and_count=False, vars=["ph_pt", "ph_eta", "PV_npvs", "tag_Ele_eta"], compute=True)[ "sample" ] - ak.array_equal(result["passing"], solution[0]) - ak.array_equal(result["failing"], solution[1]) + assert_arrays_equal(result["passing"], solution[0]) + assert_arrays_equal(result["failing"], solution[1]) assert len(result["passing"]) == 669 assert len(result["failing"]) == 135 assert len(solution[0]) == 669