From 218ce037826ad3802ac91dbb1f327b42d7e3dc6f Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Wed, 29 Nov 2023 14:25:04 -0600 Subject: [PATCH 1/6] add report= to trigger dask-awkward creating graceful-failure-report --- src/uproot/_dask.py | 65 ++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index ae8ee8b69..137dbeeba 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -38,6 +38,7 @@ def dask( allow_missing=False, open_files=True, form_mapping=None, + report=None, **options, ): """ @@ -259,6 +260,7 @@ def dask( interp_options, form_mapping, steps_per_file, + report, ) else: return _get_dak_array_delay_open( @@ -274,6 +276,7 @@ def dask( interp_options, form_mapping, steps_per_file, + report, ) else: raise NotImplementedError() @@ -1151,6 +1154,7 @@ def _get_dak_array( interp_options, form_mapping, steps_per_file, + report, ): dask_awkward = uproot.extras.dask_awkward() awkward = uproot.extras.awkward() @@ -1306,15 +1310,27 @@ def real_filter_branch(branch): else: expected_form, form_mapping_info = form_mapping(base_form) + fn = _UprootRead( + ttrees, + common_keys, + interp_options, + base_form=base_form, + expected_form=expected_form, + form_mapping_info=form_mapping_info, + ) + + if report is not None: + return dask_awkward.from_map( + fn, + partition_args, + divisions=tuple(divisions), + label="from-uproot", + empty_on_raise=(Exception,), + empty_backend="cpu", + ) + return dask_awkward.from_map( - _UprootRead( - ttrees, - common_keys, - interp_options, - base_form=base_form, - expected_form=expected_form, - form_mapping_info=form_mapping_info, - ), + fn, partition_args, divisions=tuple(divisions), label="from-uproot", @@ -1334,6 +1350,7 @@ def _get_dak_array_delay_open( interp_options, form_mapping, steps_per_file, + report, ): dask_awkward = uproot.extras.dask_awkward() awkward = uproot.extras.awkward() @@ -1396,17 +1413,29 @@ def _get_dak_array_delay_open( else: expected_form, form_mapping_info = form_mapping(base_form) + fn = _UprootOpenAndRead( + custom_classes, + allow_missing, + real_options, + common_keys, + interp_options, + base_form=base_form, + expected_form=expected_form, + form_mapping_info=form_mapping_info, + ) + + if report is not None: + return dask_awkward.from_map( + fn, + partition_args, + divisions=None if divisions is None else tuple(divisions), + label="from-uproot", + empty_on_raise=(Exception,), + empty_backend="cpu", + ) + return dask_awkward.from_map( - _UprootOpenAndRead( - custom_classes, - allow_missing, - real_options, - common_keys, - interp_options, - base_form=base_form, - expected_form=expected_form, - form_mapping_info=form_mapping_info, - ), + fn, partition_args, divisions=None if divisions is None else tuple(divisions), label="from-uproot", From ff5bd940706e881e6f00c30f903c8c6e524b28f4 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Fri, 1 Dec 2023 09:07:07 -0600 Subject: [PATCH 2/6] add mock_empty --- src/uproot/_dask.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 137dbeeba..db122f2cd 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -949,6 +949,13 @@ def mock(self) -> AwkArray: behavior=self.form_mapping_info.behavior, ) + def mock_empty() -> AwkArray: + awkward = uproot.extras.awkward() + return ak.Array( + self.expected_form.length_zero_array(highlevel=False), + behavior=self.form_mapping_info.behavior, + ) + def prepare_for_projection(self) -> tuple[AwkArray, TypeTracerReport, dict]: awkward = uproot.extras.awkward() dask_awkward = uproot.extras.dask_awkward() From 0e0b045e52b4a2d21616309cb89af577a17cab36 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Fri, 1 Dec 2023 09:14:57 -0600 Subject: [PATCH 3/6] whoops --- src/uproot/_dask.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index db122f2cd..ffbc0e031 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -949,9 +949,9 @@ def mock(self) -> AwkArray: behavior=self.form_mapping_info.behavior, ) - def mock_empty() -> AwkArray: + def mock_empty(self) -> AwkArray: awkward = uproot.extras.awkward() - return ak.Array( + return awkward.Array( self.expected_form.length_zero_array(highlevel=False), behavior=self.form_mapping_info.behavior, ) From f7eef9be99d4e8e24b6db7eb3f6d094e743f0dd6 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Fri, 1 Dec 2023 15:49:30 -0600 Subject: [PATCH 4/6] fix mock_empty --- src/uproot/_dask.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index ffbc0e031..92b031c22 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -949,10 +949,12 @@ def mock(self) -> AwkArray: behavior=self.form_mapping_info.behavior, ) - def mock_empty(self) -> AwkArray: + def mock_empty(self, backend="cpu") -> AwkArray: awkward = uproot.extras.awkward() - return awkward.Array( + return awkward.to_backend( self.expected_form.length_zero_array(highlevel=False), + backend=backend, + highlevel=True, behavior=self.form_mapping_info.behavior, ) From d50f41c6429cca47dcf3543cab238f8432316e70 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Fri, 1 Dec 2023 16:07:28 -0600 Subject: [PATCH 5/6] backend is passed in upstream; use OSError --- src/uproot/_dask.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 92b031c22..eda499694 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -949,7 +949,7 @@ def mock(self) -> AwkArray: behavior=self.form_mapping_info.behavior, ) - def mock_empty(self, backend="cpu") -> AwkArray: + def mock_empty(self, backend) -> AwkArray: awkward = uproot.extras.awkward() return awkward.to_backend( self.expected_form.length_zero_array(highlevel=False), @@ -1334,7 +1334,7 @@ def real_filter_branch(branch): partition_args, divisions=tuple(divisions), label="from-uproot", - empty_on_raise=(Exception,), + empty_on_raise=(OSError,) empty_backend="cpu", ) @@ -1439,7 +1439,7 @@ def _get_dak_array_delay_open( partition_args, divisions=None if divisions is None else tuple(divisions), label="from-uproot", - empty_on_raise=(Exception,), + empty_on_raise=(OSError,), empty_backend="cpu", ) From 70445be1f60fbd2dd064985ce20de6878073a6a0 Mon Sep 17 00:00:00 2001 From: Doug Davis Date: Sat, 2 Dec 2023 09:02:44 -0600 Subject: [PATCH 6/6] Update src/uproot/_dask.py Co-authored-by: Lindsey Gray --- src/uproot/_dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index eda499694..5045a8fd9 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -1334,7 +1334,7 @@ def real_filter_branch(branch): partition_args, divisions=tuple(divisions), label="from-uproot", - empty_on_raise=(OSError,) + empty_on_raise=(OSError,), empty_backend="cpu", )