-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add computable report when graceful-read-failure is active in from_map
#415
Changes from all commits
3f4398d
eb3ea40
925412e
e26ba21
ead7fd1
3b16e8c
6fd0e6a
193da52
1113e67
ffe0f85
41379ff
4de29ea
06c5af2
a6a1d40
06fb97e
81bf584
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,16 @@ | ||
from __future__ import annotations | ||
|
||
import functools | ||
import logging | ||
import math | ||
from collections.abc import Callable, Iterable, Mapping | ||
from dataclasses import dataclass | ||
from typing import TYPE_CHECKING, Any, cast | ||
from typing import TYPE_CHECKING, Any, cast, overload | ||
|
||
import awkward as ak | ||
import numpy as np | ||
from awkward.forms.listoffsetform import ListOffsetForm | ||
from awkward.forms.numpyform import NumpyForm | ||
from awkward.forms.recordform import RecordForm | ||
from awkward.types.numpytype import primitive_to_dtype | ||
from awkward.typetracer import length_zero_if_typetracer | ||
from dask.base import flatten, tokenize | ||
|
@@ -32,6 +34,7 @@ | |
new_array_object, | ||
typetracer_array, | ||
) | ||
from dask_awkward.utils import first, second | ||
|
||
if TYPE_CHECKING: | ||
from dask.array.core import Array as DaskArray | ||
|
@@ -496,29 +499,147 @@ def __call__(self, packed_arg): | |
) | ||
|
||
|
||
_default_failure_array_form = RecordForm( | ||
[ | ||
ListOffsetForm( | ||
"i64", | ||
ListOffsetForm( | ||
"i64", | ||
NumpyForm("uint8", parameters={"__array__": "char"}), | ||
parameters={"__array__": "string"}, | ||
), | ||
), | ||
ListOffsetForm( | ||
"i64", | ||
ListOffsetForm( | ||
"i64", | ||
ListOffsetForm( | ||
"i64", | ||
NumpyForm("uint8", parameters={"__array__": "char"}), | ||
parameters={"__array__": "string"}, | ||
), | ||
), | ||
), | ||
ListOffsetForm( | ||
"i64", | ||
NumpyForm("uint8", parameters={"__array__": "char"}), | ||
parameters={"__array__": "string"}, | ||
), | ||
ListOffsetForm( | ||
"i64", | ||
NumpyForm("uint8", parameters={"__array__": "char"}), | ||
parameters={"__array__": "string"}, | ||
), | ||
], | ||
["args", "kwargs", "exception", "message"], | ||
) | ||
|
||
|
||
def on_success_default(*args: Any, **kwargs: Any) -> ak.Array: | ||
return ak.Array(_default_failure_array_form.length_one_array(highlevel=False)) | ||
|
||
|
||
def on_failure_default( | ||
exception: type[BaseException], | ||
*args: Any, | ||
**kwargs: Any, | ||
) -> ak.Array: | ||
return ak.Array( | ||
[ | ||
{ | ||
"args": [repr(a) for a in args], | ||
"kwargs": [[k, repr(v)] for k, v in kwargs.items()], | ||
"exception": type(exception).__name__, | ||
"message": str(exception), | ||
}, | ||
], | ||
) | ||
|
||
|
||
class ReturnEmptyOnRaise: | ||
def __init__( | ||
self, | ||
fn: Callable[..., ak.Array], | ||
allowed_exceptions: tuple[type[BaseException], ...], | ||
backend: BackendT, | ||
on_success: Callable[..., ak.Array], | ||
on_failure: Callable[..., ak.Array], | ||
): | ||
self._empty_on_error_wrapped = fn | ||
self.fn = fn | ||
self.allowed_exceptions = allowed_exceptions | ||
self.backend = backend | ||
self.on_success = on_success | ||
self.on_failure = on_failure | ||
|
||
def recreate(self, fn): | ||
return return_empty_on_raise( | ||
fn, | ||
self.allowed_exceptions, | ||
self.backend, | ||
self.on_success, | ||
self.on_failure, | ||
) | ||
|
||
def __call__(self, *args, **kwargs): | ||
try: | ||
result = self.fn(*args, **kwargs) | ||
return result, self.on_success(*args, **kwargs) | ||
except self.allowed_exceptions as err: | ||
result = self.fn.mock_empty(self.backend) | ||
return result, self.on_failure(err, *args, **kwargs) | ||
|
||
|
||
def return_empty_on_raise( | ||
fn: Callable, | ||
fn: Callable[..., ak.Array], | ||
allowed_exceptions: tuple[type[BaseException], ...], | ||
backend: BackendT, | ||
) -> Callable: | ||
@functools.wraps(fn) | ||
def wrapped(*args, **kwargs): | ||
try: | ||
return fn(*args, **kwargs) | ||
except allowed_exceptions as err: | ||
logmsg = ( | ||
"%s call failed with args %s and kwargs %s; empty array returned. %s" | ||
% ( | ||
str(fn), | ||
str(args), | ||
str(kwargs), | ||
str(err), | ||
) | ||
) | ||
logger.info(logmsg) | ||
return fn.mock_empty(backend) | ||
on_success: Callable[..., ak.Array], | ||
on_failure: Callable[..., ak.Array], | ||
) -> ReturnEmptyOnRaise: | ||
return ReturnEmptyOnRaise( | ||
fn, | ||
allowed_exceptions, | ||
backend, | ||
on_success, | ||
on_failure, | ||
) | ||
|
||
|
||
@overload | ||
def from_map( | ||
func: Callable, | ||
*iterables: Iterable, | ||
args: tuple[Any, ...] | None = None, | ||
label: str | None = None, | ||
token: str | None = None, | ||
divisions: tuple[int, ...] | tuple[None, ...] | None = None, | ||
meta: ak.Array | None = None, | ||
empty_on_raise: None = None, | ||
empty_backend: None = None, | ||
on_success: Callable[..., ak.Array] = on_success_default, | ||
on_failure: Callable[..., ak.Array] = on_failure_default, | ||
**kwargs: Any, | ||
) -> Array: | ||
... | ||
|
||
return wrapped | ||
|
||
@overload | ||
def from_map( | ||
func: Callable, | ||
*iterables: Iterable, | ||
empty_on_raise: tuple[type[BaseException], ...], | ||
empty_backend: BackendT, | ||
args: tuple[Any, ...] | None = None, | ||
label: str | None = None, | ||
token: str | None = None, | ||
divisions: tuple[int, ...] | tuple[None, ...] | None = None, | ||
meta: ak.Array | None = None, | ||
on_success: Callable[..., ak.Array] = on_success_default, | ||
on_failure: Callable[..., ak.Array] = on_failure_default, | ||
**kwargs: Any, | ||
) -> tuple[Array, Array]: | ||
... | ||
|
||
|
||
def from_map( | ||
|
@@ -531,8 +652,10 @@ def from_map( | |
meta: ak.Array | None = None, | ||
empty_on_raise: tuple[type[BaseException], ...] | None = None, | ||
empty_backend: BackendT | None = None, | ||
on_success: Callable[..., ak.Array] = on_success_default, | ||
on_failure: Callable[..., ak.Array] = on_failure_default, | ||
**kwargs: Any, | ||
) -> Array: | ||
) -> Array | tuple[Array, Array]: | ||
"""Create an Array collection from a custom mapping. | ||
|
||
Parameters | ||
|
@@ -654,6 +777,8 @@ def from_map( | |
io_func, | ||
allowed_exceptions=empty_on_raise, | ||
backend=empty_backend, | ||
on_success=on_success, | ||
on_failure=on_failure, | ||
) | ||
|
||
dsk = AwkwardInputLayer(name=name, inputs=inputs, io_func=io_func) | ||
|
@@ -664,6 +789,11 @@ def from_map( | |
else: | ||
result = new_array_object(hlg, name, meta=array_meta, npartitions=len(inputs)) | ||
|
||
if empty_on_raise and empty_backend: | ||
res = result.map_partitions(first, meta=array_meta, output_divisions=1) | ||
rep = result.map_partitions(second, meta=empty_typetracer()) | ||
Comment on lines
+793
to
+794
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here And There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we not specify |
||
return res, rep | ||
|
||
return result | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd be tempted to remove the unwrapping here, and require that the caller unwrap the IO function. That way it's a bit more explicit?