From 4c7265dfcf0bea167bfe0885a57f0b660691cc6e Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 8 Nov 2023 18:23:49 +0000 Subject: [PATCH 01/12] feat: add attrs and highlevel --- src/dask_awkward/lib/core.py | 9 ++ src/dask_awkward/lib/io/io.py | 14 +- src/dask_awkward/lib/io/json.py | 1 + src/dask_awkward/lib/io/parquet.py | 37 ++++- src/dask_awkward/lib/operations.py | 5 +- src/dask_awkward/lib/reducers.py | 226 ++++++++++++++++++++++------- src/dask_awkward/lib/str.py | 50 +++++++ src/dask_awkward/lib/structure.py | 15 +- 8 files changed, 285 insertions(+), 72 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index a7a6458f..79ba9f30 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -1482,6 +1482,7 @@ def new_array_object( *, meta: ak.Array | None = None, behavior: dict | None = None, + attrs: dict | None = None, npartitions: int | None = None, divisions: tuple[int, ...] | tuple[None, ...] | None = None, ) -> Array: @@ -1500,6 +1501,10 @@ def new_array_object( typetracer for the new Array. If the configuration option ``awkward.compute-unknown-meta`` is set to ``False``, undefined `meta` will be assigned an empty typetracer. + behavior : dict, optional + Custom #ak.behavior for the output array. + attrs : dict, optional + Custom attributes for the output array. npartitions : int, optional Total number of partitions; if used `divisions` will be a tuple of length `npartitions` + 1 with all elements``None``. @@ -1543,6 +1548,8 @@ def new_array_object( if behavior is not None: actual_meta.behavior = behavior + if attrs is not None: + actual_meta.attrs = attrs out = Array(dsk, name, actual_meta, divs) if actual_meta.__doc__ != actual_meta.__class__.__doc__: @@ -1872,6 +1879,8 @@ def non_trivial_reduction( keepdims: bool, mask_identity: bool, reducer: Callable, + behavior: dict | None = None, + attrs: dict | None = None, combiner: Callable | None = None, token: str | None = None, dtype: Any | None = None, diff --git a/src/dask_awkward/lib/io/io.py b/src/dask_awkward/lib/io/io.py index 6153021b..af23b662 100644 --- a/src/dask_awkward/lib/io/io.py +++ b/src/dask_awkward/lib/io/io.py @@ -104,14 +104,17 @@ def from_awkward( class _FromListsFn: - def __init__(self, behavior: dict | None = None): + def __init__(self, behavior: dict | None = None, attrs: dict | None = None): self.behavior = behavior + self.attrs = attrs def __call__(self, x: list) -> ak.Array: - return ak.Array(x, behavior=self.behavior) + return ak.Array(x, behavior=self.behavior, attrs=self.attrs) -def from_lists(source: list, behavior: dict | None = None) -> Array: +def from_lists( + source: list, behavior: dict | None = None, attrs: dict | None = None +) -> Array: """Create an Array collection from a list of lists. Parameters @@ -140,11 +143,10 @@ def from_lists(source: list, behavior: dict | None = None) -> Array: lists = list(source) divs = (0, *np.cumsum(list(map(len, lists)))) return from_map( - _FromListsFn(behavior=behavior), + _FromListsFn(behavior=behavior, attrs=attrs), lists, - meta=typetracer_array(ak.Array(lists[0])), + meta=typetracer_array(ak.Array(lists[0], attrs=attrs, behavior=behavior)), divisions=divs, - behavior=behavior, label="from-lists", ) diff --git a/src/dask_awkward/lib/io/json.py b/src/dask_awkward/lib/io/json.py index 43ba19e3..5c06cd1a 100644 --- a/src/dask_awkward/lib/io/json.py +++ b/src/dask_awkward/lib/io/json.py @@ -433,6 +433,7 @@ def from_json( resize: float = 8, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, blocksize: int | str | None = None, delimiter: bytes | None = None, compression: str | None = "infer", diff --git a/src/dask_awkward/lib/io/parquet.py b/src/dask_awkward/lib/io/parquet.py index 8a3c0417..265a20a4 100644 --- a/src/dask_awkward/lib/io/parquet.py +++ b/src/dask_awkward/lib/io/parquet.py @@ -42,6 +42,7 @@ def __init__( unnamed_root: bool = False, original_form: Form | None = None, behavior: dict | None = None, + attrs: dict | None = None, **kwargs: Any, ) -> None: self.fs = fs @@ -53,6 +54,7 @@ def __init__( self.columns = [f".{c}" for c in self.columns] self.original_form = original_form self.behavior = behavior + self.attrs = attrs self.kwargs = kwargs @abc.abstractmethod @@ -111,17 +113,22 @@ def __init__( ) def __call__(self, source: Any) -> Any: - array = ak_from_parquet._load( + layout = ak_from_parquet._load( [source], parquet_columns=self.columns, subrg=[None], subform=self.form, - highlevel=True, + highlevel=False, + attrs=None, + behavior=None, fs=self.fs, - behavior=self.behavior, **self.kwargs, ) - return ak.Array(unproject_layout(self.original_form, array.layout)) + return ak.Array( + unproject_layout(self.original_form, layout), + attrs=self.attrs, + behavior=self.behavior, + ) def project_columns(self, columns): return _FromParquetFileWiseFn( @@ -130,6 +137,7 @@ def project_columns(self, columns): listsep=self.listsep, unnamed_root=self.unnamed_root, original_form=self.form, + attrs=self.attrs, behavior=self.behavior, **self.kwargs, ) @@ -145,6 +153,7 @@ def __init__( unnamed_root: bool = False, original_form: Form | None = None, behavior: dict | None = None, + attrs: dict | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -154,6 +163,7 @@ def __init__( unnamed_root=unnamed_root, original_form=original_form, behavior=behavior, + attrs=attrs, **kwargs, ) @@ -161,17 +171,22 @@ def __call__(self, pair: Any) -> ak.Array: subrg, source = pair if isinstance(subrg, int): subrg = [[subrg]] - array = ak_from_parquet._load( + layout = ak_from_parquet._load( [source], parquet_columns=self.columns, subrg=subrg, subform=self.form, - highlevel=True, + highlevel=False, + attrs=None, + behavior=None, fs=self.fs, - behavior=self.behavior, **self.kwargs, ) - return ak.Array(unproject_layout(self.original_form, array.layout)) + return ak.Array( + unproject_layout(self.original_form, layout), + behavior=self.behavior, + attrs=self.attrs, + ) def project_columns(self, columns): return _FromParquetFragmentWiseFn( @@ -180,6 +195,7 @@ def project_columns(self, columns): unnamed_root=self.unnamed_root, original_form=self.form, behavior=self.behavior, + attrs=self.attrs, **self.kwargs, ) @@ -194,6 +210,7 @@ def from_parquet( generate_bitmasks: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ignore_metadata: bool = True, scan_files: bool = False, split_row_groups: bool | None = False, @@ -263,6 +280,8 @@ def from_parquet( ignore_metadata, scan_files, split_row_groups, + behavior, + attrs, ) ( @@ -307,6 +326,7 @@ def from_parquet( footer_sample_size=footer_sample_size, generate_bitmasks=generate_bitmasks, behavior=behavior, + attrs=attrs, ), actual_paths, label=label, @@ -345,6 +365,7 @@ def from_parquet( footer_sample_size=footer_sample_size, generate_bitmasks=generate_bitmasks, behavior=behavior, + attrs=attrs, ), pairs, label=label, diff --git a/src/dask_awkward/lib/operations.py b/src/dask_awkward/lib/operations.py index 32e94479..929ead25 100644 --- a/src/dask_awkward/lib/operations.py +++ b/src/dask_awkward/lib/operations.py @@ -33,6 +33,7 @@ def concatenate( mergebool: bool = True, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: label = "concatenate" token = tokenize(arrays, axis, mergebool, highlevel, behavior) @@ -49,7 +50,7 @@ def concatenate( g[(name, i)] = k i += 1 - meta = ak.concatenate(metas) + meta = ak.concatenate(metas, behavior=behavior, attrs=attrs) assert isinstance(meta, ak.Array) prev_names = [iarr.name for iarr in arrays] @@ -65,7 +66,7 @@ def concatenate( if partition_compatibility(*arrays) == PartitionCompatibility.NO: raise IncompatiblePartitions("concatenate", *arrays) - fn = _ConcatenateFnAxisGT0(axis=axis) + fn = _ConcatenateFnAxisGT0(axis=axis, behavior=behavior, attrs=attrs) return map_partitions(fn, *arrays) else: diff --git a/src/dask_awkward/lib/reducers.py b/src/dask_awkward/lib/reducers.py index 716327cc..ccf75d94 100644 --- a/src/dask_awkward/lib/reducers.py +++ b/src/dask_awkward/lib/reducers.py @@ -39,6 +39,8 @@ def all( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -49,6 +51,8 @@ def all( is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -58,6 +62,8 @@ def all( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @@ -67,6 +73,8 @@ def any( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -77,6 +85,8 @@ def any( is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -86,6 +96,8 @@ def any( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @@ -95,6 +107,8 @@ def argmax( axis: int | None = None, keepdims: bool = False, mask_identity: bool = True, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -105,6 +119,8 @@ def argmax( is_positional=True, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -114,6 +130,8 @@ def argmax( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @@ -123,6 +141,8 @@ def argmin( axis: int | None = None, keepdims: bool = False, mask_identity: bool = True, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -133,6 +153,8 @@ def argmin( is_positional=True, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -142,18 +164,22 @@ def argmin( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @borrow_docstring(ak.corr) def corr( - x, - y, - weight=None, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + y: Array, + weight: Array | int | float | complex | None = None, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -163,6 +189,8 @@ def count( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -174,6 +202,8 @@ def count( is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -183,6 +213,8 @@ def count( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @@ -192,6 +224,8 @@ def count_nonzero( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -203,6 +237,8 @@ def count_nonzero( is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -212,33 +248,47 @@ def count_nonzero( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @borrow_docstring(ak.covar) def covar( - x, - y, - weight=None, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + y: Array, + weight: Array | int | float | complex | None = None, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: raise DaskAwkwardNotImplemented("TODO") @borrow_docstring(ak.linear_fit) def linear_fit( - x, - y, - weight=None, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + y: Array, + weight: Array | int | float | complex | None = None, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: raise DaskAwkwardNotImplemented("TODO") +class _MaxFn: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, array, **kwargs): + return ak.max(array, **self.kwargs, **kwargs) + + @borrow_docstring(ak.max) def max( array: Array, @@ -246,36 +296,49 @@ def max( keepdims: bool = False, initial: float | None = None, mask_identity: bool = True, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( axis=axis, label="max", array=array, - reducer=ak.max, + reducer=_MaxFn(initial=initial), is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( - ak.max, + _MaxFn(initial=initial), array, output_divisions=1, axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @borrow_docstring(ak.mean) def mean( - array, - weight=None, - axis=None, - keepdims=False, - mask_identity=False, -): + array: Array, + weight: Array | int | float | complex | None = None, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Array: + if weight is not None: + raise DaskAwkwardNotImplemented( + f"weight={weight} is not supported for this array yet." + ) + if axis == 0 or axis == -1 * array.ndim: raise DaskAwkwardNotImplemented( f"axis={axis} is not supported for this array yet." @@ -288,10 +351,20 @@ def mean( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) raise DaskAwkwardNotImplemented("TODO") +class _MinFn: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __call__(self, array, **kwargs): + return ak.min(array, **self.kwargs, **kwargs) + + @borrow_docstring(ak.min) def min( array: Array, @@ -299,42 +372,57 @@ def min( keepdims: bool = False, initial: float | None = None, mask_identity: bool = True, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( axis=axis, label="min", array=array, - reducer=ak.min, + reducer=_MinFn(initial=initial), is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( - ak.min, + _MinFn(initial=initial), array, output_divisions=1, axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @borrow_docstring(ak.moment) def moment( - x, - n, - weight=None, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + n: int, + weight: Array | int | float | complex | None = None, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: raise DaskAwkwardNotImplemented("TODO") @borrow_docstring(ak.prod) -def prod(array, axis=None, keepdims=False, mask_identity=False): +def prod( + array: Array, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( axis=axis, @@ -344,6 +432,8 @@ def prod(array, axis=None, keepdims=False, mask_identity=False): is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -353,16 +443,28 @@ def prod(array, axis=None, keepdims=False, mask_identity=False): axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @borrow_docstring(ak.ptp) -def ptp(arr, axis=None, keepdims=False, mask_identity=True): +def ptp( + arr: Array, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = True, +) -> Any: raise DaskAwkwardNotImplemented("TODO") @borrow_docstring(ak.softmax) -def softmax(x, axis=None, keepdims=False, mask_identity=False): +def softmax( + x: Array, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, +) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -376,13 +478,15 @@ def __call__(self, array): @borrow_docstring(ak.std) def std( - x, - weight=None, - ddof=0, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + weight: Array | int | float | complex | None = None, + ddof: int = 0, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: if weight is not None: raise DaskAwkwardNotImplemented("weight argument is not supported.") if axis is None or axis == 0 or axis == -1 * x.ndim: @@ -396,6 +500,8 @@ def std( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ), x, output_divisions=1, @@ -409,6 +515,8 @@ def sum( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -419,6 +527,8 @@ def sum( is_positional=False, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) else: return map_partitions( @@ -428,6 +538,8 @@ def sum( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ) @@ -441,13 +553,15 @@ def __call__(self, array): @borrow_docstring(ak.var) def var( - x, - weight=None, - ddof=0, - axis=None, - keepdims=False, - mask_identity=False, -): + x: Array, + weight: Array | int | float | complex | None = None, + ddof: int = 0, + axis: int | None = None, + keepdims: bool = False, + mask_identity: bool = False, + behavior: dict | None = None, + attrs: dict | None = None, +) -> Any: if weight is not None: raise DaskAwkwardNotImplemented("weight argument is not supported.") if axis is None or axis == 0 or axis == -1 * x.ndim: @@ -461,6 +575,8 @@ def var( axis=axis, keepdims=keepdims, mask_identity=mask_identity, + behavior=behavior, + attrs=attrs, ), x, output_divisions=1, diff --git a/src/dask_awkward/lib/str.py b/src/dask_awkward/lib/str.py index 85324ae9..8fbfb203 100644 --- a/src/dask_awkward/lib/str.py +++ b/src/dask_awkward/lib/str.py @@ -29,6 +29,7 @@ def capitalize( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.capitalize, @@ -46,6 +47,7 @@ def center( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.center, @@ -65,6 +67,7 @@ def count_substring( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.count_substring, @@ -84,6 +87,7 @@ def count_substring_regex( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.count_substring_regex, @@ -103,6 +107,7 @@ def ends_with( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.ends_with, @@ -121,6 +126,7 @@ def extract_regex( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.extract_regex, @@ -139,6 +145,7 @@ def find_substring( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.find_substring, @@ -158,6 +165,7 @@ def find_substring_regex( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.find_substring_regex, @@ -177,6 +185,7 @@ def index_in( skip_nones: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.index_in, @@ -194,6 +203,7 @@ def is_alnum( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_alnum, @@ -209,6 +219,7 @@ def is_alpha( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_alpha, @@ -224,6 +235,7 @@ def is_ascii( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_ascii, @@ -239,6 +251,7 @@ def is_decimal( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_decimal, @@ -254,6 +267,7 @@ def is_digit( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_digit, @@ -271,6 +285,7 @@ def is_in( skip_nones: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_in, @@ -288,6 +303,7 @@ def is_lower( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_lower, @@ -303,6 +319,7 @@ def is_numeric( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_numeric, @@ -318,6 +335,7 @@ def is_printable( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_printable, @@ -333,6 +351,7 @@ def is_space( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_space, @@ -348,6 +367,7 @@ def is_title( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_title, @@ -363,6 +383,7 @@ def is_upper( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.is_upper, @@ -379,6 +400,7 @@ def join( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.join, @@ -394,6 +416,7 @@ def join_element_wise( *arrays: Array, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.join_element_wise, @@ -409,6 +432,7 @@ def length( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.length, @@ -424,6 +448,7 @@ def lower( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.lower, @@ -441,6 +466,7 @@ def lpad( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.lpad, @@ -459,6 +485,7 @@ def ltrim( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.ltrim, @@ -475,6 +502,7 @@ def ltrim_whitespace( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.ltrim_whitespace, @@ -492,6 +520,7 @@ def match_like( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.match_like, @@ -511,6 +540,7 @@ def match_substring( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.match_substring, @@ -530,6 +560,7 @@ def match_substring_regex( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.match_substring_regex, @@ -548,6 +579,7 @@ def repeat( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.repeat, @@ -567,6 +599,7 @@ def replace_slice( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.replace_slice, @@ -588,6 +621,7 @@ def replace_substring( max_replacements: int | None = None, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.replace_substring, @@ -609,6 +643,7 @@ def replace_substring_regex( max_replacements: int | None = None, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.replace_substring_regex, @@ -627,6 +662,7 @@ def reverse( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.reverse, @@ -644,6 +680,7 @@ def rpad( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.rpad, @@ -662,6 +699,7 @@ def rtrim( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.rtrim, @@ -678,6 +716,7 @@ def rtrim_whitespace( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.rtrim_whitespace, @@ -696,6 +735,7 @@ def slice( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.slice, @@ -717,6 +757,7 @@ def split_pattern( reverse: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.split_pattern, @@ -738,6 +779,7 @@ def split_pattern_regex( reverse: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.split_pattern_regex, @@ -758,6 +800,7 @@ def split_whitespace( reverse: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.split_whitespace, @@ -776,6 +819,7 @@ def starts_with( ignore_case: bool = False, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.starts_with, @@ -793,6 +837,7 @@ def swapcase( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.swapcase, @@ -808,6 +853,7 @@ def title( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.title, @@ -823,6 +869,7 @@ def to_categorical( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.to_categorical, @@ -839,6 +886,7 @@ def trim( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.trim, @@ -855,6 +903,7 @@ def trim_whitespace( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.trim_whitespace, @@ -870,6 +919,7 @@ def upper( *, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( akstr.upper, diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index e781985a..83a7aa36 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -291,6 +291,7 @@ def combinations( with_name: str | None = None, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -341,6 +342,7 @@ def fill_none( axis: int = -1, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -363,6 +365,7 @@ def drop_none( axis: int | None = None, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -385,6 +388,7 @@ def firsts( axis: int = 1, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if axis == 1: return map_partitions( @@ -416,6 +420,7 @@ def flatten( axis: int | None = 1, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -574,6 +579,7 @@ def num( axis: int = 1, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Any: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -596,7 +602,7 @@ def num( return new_scalar_object( hlg, name, - meta=TypeTracerArray._new(dtype=np.int64, shape=()), + meta=TypeTracerArray._new(dtype=np.dtype(np.int64), shape=()), ) else: return map_partitions( @@ -614,6 +620,7 @@ def ones_like( array: Array, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, dtype: DTypeLike | None = None, ) -> Array: if not highlevel: @@ -865,6 +872,7 @@ def where( mergebool: bool = True, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -952,6 +960,7 @@ def with_name( name: str, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -986,6 +995,7 @@ def with_parameter( value: Any, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( _WithParameterFn(parameter=parameter, value=value, behavior=behavior), @@ -1008,6 +1018,7 @@ def without_parameters( array: Array, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, ) -> Array: return map_partitions( _WithoutParameterFn(behavior=behavior), @@ -1022,6 +1033,7 @@ def zeros_like( array: Array, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, dtype: DTypeLike | None = None, ) -> Array: if not highlevel: @@ -1064,6 +1076,7 @@ def zip( with_name: str | None = None, highlevel: bool = True, behavior: dict | None = None, + attrs: dict | None = None, right_broadcast: bool = False, optiontype_outside_record: bool = False, ) -> Array: From 07c346413fc7c601285cc6426af349c7d616b5c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:02:04 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/dask_awkward/lib/structure.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index eda53ba7..1073d768 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -108,7 +108,7 @@ def argcartesian( with_name=with_name, highlevel=highlevel, behavior=behavior, - attrs=attrs + attrs=attrs, ) return map_partitions(fn, *arrays, label="argcartesian", output_divisions=1) raise DaskAwkwardNotImplemented("TODO") @@ -157,7 +157,7 @@ def argcombinations( with_name=with_name, highlevel=highlevel, behavior=behavior, - attrs=attrs + attrs=attrs, ) return map_partitions( fn, @@ -191,11 +191,7 @@ def argsort( if axis == 0: raise DaskAwkwardNotImplemented("TODO") fn = _ArgsortFn( - axis=axis, - ascending=ascending, - stable=stable, - behavior=behavior, - attrs=attrs + axis=axis, ascending=ascending, stable=stable, behavior=behavior, attrs=attrs ) return map_partitions(fn, array, label="argsort", output_divisions=1) @@ -722,8 +718,8 @@ def pad_none( @borrow_docstring(ak.ravel) def ravel( - array: Array, - highlevel: bool = True, + array: Array, + highlevel: bool = True, behavior: dict | None = None, attrs: Mapping[str, Any] = None, ) -> Array: @@ -744,8 +740,8 @@ def ravel( @borrow_docstring(ak.run_lengths) def run_lengths( - array: Array, - highlevel: bool = True, + array: Array, + highlevel: bool = True, behavior: dict | None = None, attrs: Mapping[str, Any] = None, ) -> Array: @@ -779,11 +775,11 @@ def __call__(self, array): @borrow_docstring(ak.singletons) def singletons( - array: Array, - axis: int = 0, - highlevel: bool = True, + array: Array, + axis: int = 0, + highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] | None = None + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -811,7 +807,7 @@ def sort( stable: bool = True, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] | None = None + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -833,7 +829,7 @@ def strings_astype( to: np.dtype | str, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] | None = None + attrs: Mapping[str, Any] | None = None, ) -> Array: raise DaskAwkwardNotImplemented("TODO") From bcc88445f3ce378d4eb355d0db5cf09d4bfe4dc4 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 13 Nov 2023 16:18:04 +0000 Subject: [PATCH 03/12] refactor: use new public API --- src/dask_awkward/lib/core.py | 18 ++++++++---------- src/dask_awkward/lib/structure.py | 31 +++++++++++++++++-------------- tests/test_structure.py | 6 +----- 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 79ba9f30..38c5464c 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -17,13 +17,14 @@ import dask.config import numpy as np from awkward._do import remove_structure as ak_do_remove_structure -from awkward._nplikes.typetracer import ( +from awkward.highlevel import NDArrayOperatorsMixin, _dir_pattern +from awkward.typetracer import ( MaybeNone, OneOf, TypeTracerArray, + create_unknown_scalar, is_unknown_scalar, ) -from awkward.highlevel import NDArrayOperatorsMixin, _dir_pattern from dask.base import ( DaskMethodsMixin, dont_optimize, @@ -140,7 +141,9 @@ def key(self) -> Key: return (self._name, 0) def _check_meta(self, m: Any) -> Any | None: - if isinstance(m, (MaybeNone, OneOf)) or is_unknown_scalar(m): + if m is None: + return m + elif isinstance(m, (MaybeNone, OneOf)) or is_unknown_scalar(m): return m elif isinstance(m, ak.Array) and len(m) == 1: return m @@ -348,12 +351,9 @@ def new_scalar_object(dsk: HighLevelGraph, name: str, *, meta: Any) -> Scalar: Resulting collection. """ - if meta is None: - meta = ak.Array(TypeTracerArray._new(dtype=np.dtype(None), shape=())) - if isinstance(meta, MaybeNone): meta = ak.Array(meta.content) - else: + elif meta is not None: try: if ak.backend(meta) != "typetracer": raise TypeError( @@ -411,9 +411,7 @@ def new_known_scalar( dtype = np.dtype(dtype) llg = AwkwardMaterializedLayer({(name, 0): s}, previous_layer_names=[]) hlg = HighLevelGraph.from_collections(name, llg, dependencies=()) - return Scalar( - hlg, name, meta=TypeTracerArray._new(dtype=dtype, shape=()), known_value=s - ) + return Scalar(hlg, name, meta=create_unknown_scalar(dtype), known_value=s) class Record(Scalar): diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 1073d768..58200c2e 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -8,7 +8,7 @@ import awkward as ak import numpy as np -from awkward._nplikes.typetracer import TypeTracerArray +from awkward.typetracer import create_unknown_scalar from dask.base import is_dask_collection, tokenize from dask.highlevelgraph import HighLevelGraph @@ -94,7 +94,7 @@ def argcartesian( with_name: str | None = None, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -135,7 +135,7 @@ def argcombinations( with_name: str | None = None, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -184,7 +184,7 @@ def argsort( stable: bool = True, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -362,7 +362,7 @@ def drop_none( axis: int | None = None, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -491,6 +491,7 @@ def isclose( equal_nan: bool = False, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -534,6 +535,7 @@ def local_index( axis: int = -1, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -555,6 +557,7 @@ def mask( valid_when: bool = True, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if partition_compatibility(array, mask) == PartitionCompatibility.NO: raise IncompatiblePartitions("mask", array, mask) @@ -603,7 +606,7 @@ def num( axis: int = 1, highlevel: bool = True, behavior: dict | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -623,11 +626,7 @@ def num( {(name, 0): (_numaxis0, *keys)}, previous_layer_names=[per_axis.name] ) hlg = HighLevelGraph.from_collections(name, matlayer, dependencies=(per_axis,)) - return new_scalar_object( - hlg, - name, - meta=TypeTracerArray._new(dtype=np.dtype(np.int64), shape=()), - ) + return new_scalar_object(hlg, name, meta=create_unknown_scalar(np.int64)) else: return map_partitions( ak.num, @@ -644,7 +643,7 @@ def ones_like( array: Array, highlevel: bool = True, behavior: dict | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, dtype: DTypeLike | None = None, ) -> Array: if not highlevel: @@ -697,6 +696,7 @@ def pad_none( clip: bool = False, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -721,7 +721,7 @@ def ravel( array: Array, highlevel: bool = True, behavior: dict | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -743,7 +743,7 @@ def run_lengths( array: Array, highlevel: bool = True, behavior: dict | None = None, - attrs: Mapping[str, Any] = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -863,6 +863,7 @@ def unflatten( axis: int = 0, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -911,6 +912,7 @@ def values_astype( to: np.dtype | str, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -1005,6 +1007,7 @@ def with_field( where: str | Sequence[str] | None = None, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") diff --git a/tests/test_structure.py b/tests/test_structure.py index 23a190b4..1967e551 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -388,11 +388,7 @@ def test_sort(daa, caa, ascending): def test_copy(daa): - with pytest.raises( - DaskAwkwardNotImplemented, - match="This function is not necessary in the context of dask-awkward.", - ): - dak.copy(daa) + assert dak.copy(daa) is daa @pytest.mark.parametrize( From 796614075fe479f427b634457e9902281ad5f7d2 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 13 Nov 2023 16:20:47 +0000 Subject: [PATCH 04/12] refactor: clean up type hints --- src/dask_awkward/lib/core.py | 10 +- src/dask_awkward/lib/io/io.py | 16 ++- src/dask_awkward/lib/io/json.py | 14 +- src/dask_awkward/lib/io/parquet.py | 15 ++- src/dask_awkward/lib/operations.py | 7 +- src/dask_awkward/lib/reducers.py | 69 +++++----- src/dask_awkward/lib/str.py | 202 ++++++++++++++--------------- src/dask_awkward/lib/structure.py | 32 ++--- 8 files changed, 187 insertions(+), 178 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 38c5464c..b24741be 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -7,7 +7,7 @@ import operator import sys import warnings -from collections.abc import Callable, Hashable, Sequence +from collections.abc import Callable, Hashable, Mapping, Sequence from enum import IntEnum from functools import cached_property, partial, wraps from numbers import Number @@ -1479,8 +1479,8 @@ def new_array_object( name: str, *, meta: ak.Array | None = None, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, npartitions: int | None = None, divisions: tuple[int, ...] | tuple[None, ...] | None = None, ) -> Array: @@ -1877,8 +1877,8 @@ def non_trivial_reduction( keepdims: bool, mask_identity: bool, reducer: Callable, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, combiner: Callable | None = None, token: str | None = None, dtype: Any | None = None, diff --git a/src/dask_awkward/lib/io/io.py b/src/dask_awkward/lib/io/io.py index 629d8c7e..d15d585f 100644 --- a/src/dask_awkward/lib/io/io.py +++ b/src/dask_awkward/lib/io/io.py @@ -3,7 +3,7 @@ import functools import logging import math -from collections.abc import Callable, Iterable +from collections.abc import Callable, Iterable, Mapping from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast @@ -65,7 +65,7 @@ def __call__(self, start: int, stop: int, **kwargs: Any) -> ak.Array: def from_awkward( source: ak.Array, npartitions: int, - behavior: dict | None = None, + behavior: Mapping | None = None, label: str | None = None, ) -> Array: """Create an Array collection from a concrete :class:`awkward.Array` object. @@ -116,7 +116,9 @@ def from_awkward( class _FromListsFn: - def __init__(self, behavior: dict | None = None, attrs: dict | None = None): + def __init__( + self, behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None + ): self.behavior = behavior self.attrs = attrs @@ -125,7 +127,9 @@ def __call__(self, x: list) -> ak.Array: def from_lists( - source: list, behavior: dict | None = None, attrs: dict | None = None + source: list, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: """Create an Array collection from a list of lists. @@ -166,7 +170,7 @@ def from_lists( def from_delayed( source: list[Delayed] | Delayed, meta: ak.Array | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, divisions: tuple[int, ...] | tuple[None, ...] | None = None, prefix: str = "from-delayed", ) -> Array: @@ -349,7 +353,7 @@ def to_dask_array( return new_da_object(graph, name, meta=None, chunks=chunks, dtype=dtype) -def from_dask_array(array: DaskArray, behavior: dict | None = None) -> Array: +def from_dask_array(array: DaskArray, behavior: Mapping | None = None) -> Array: """Convert a Dask Array collection to a Dask Awkard Array collection. Parameters diff --git a/src/dask_awkward/lib/io/json.py b/src/dask_awkward/lib/io/json.py index 5c06cd1a..9e83becf 100644 --- a/src/dask_awkward/lib/io/json.py +++ b/src/dask_awkward/lib/io/json.py @@ -3,7 +3,7 @@ import abc import logging import math -from collections.abc import Callable +from collections.abc import Callable, Mapping from typing import TYPE_CHECKING, Any, Literal, overload import awkward as ak @@ -43,7 +43,7 @@ def __init__( form: Form, compression: str | None = None, schema: str | dict | list | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, **kwargs: Any, ) -> None: self.compression = compression @@ -91,7 +91,7 @@ def __init__( form: Form, compression: str | None = None, schema: str | dict | list | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -125,7 +125,7 @@ def __init__( form: Form, compression: str | None = None, schema: str | dict | list | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -163,7 +163,7 @@ def __init__( form: Form, compression: str | None = None, schema: str | dict | list | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -432,8 +432,8 @@ def from_json( initial: int = 1024, resize: float = 8, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, blocksize: int | str | None = None, delimiter: bytes | None = None, compression: str | None = "infer", diff --git a/src/dask_awkward/lib/io/parquet.py b/src/dask_awkward/lib/io/parquet.py index 265a20a4..e16de16f 100644 --- a/src/dask_awkward/lib/io/parquet.py +++ b/src/dask_awkward/lib/io/parquet.py @@ -5,6 +5,7 @@ import logging import math import operator +from collections.abc import Mapping from typing import TYPE_CHECKING, Any, Literal, TypeVar import awkward as ak @@ -41,8 +42,8 @@ def __init__( listsep: str = "list.item", unnamed_root: bool = False, original_form: Form | None = None, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, **kwargs: Any, ) -> None: self.fs = fs @@ -99,7 +100,7 @@ def __init__( listsep: str = "list.item", unnamed_root: bool = False, original_form: Form | None = None, - behavior: dict | None = None, + behavior: Mapping | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -152,8 +153,8 @@ def __init__( listsep: str = "list.item", unnamed_root: bool = False, original_form: Form | None = None, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__( @@ -209,8 +210,8 @@ def from_parquet( footer_sample_size: int = 1_000_000, generate_bitmasks: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ignore_metadata: bool = True, scan_files: bool = False, split_row_groups: bool | None = False, diff --git a/src/dask_awkward/lib/operations.py b/src/dask_awkward/lib/operations.py index 929ead25..2074ee2a 100644 --- a/src/dask_awkward/lib/operations.py +++ b/src/dask_awkward/lib/operations.py @@ -1,5 +1,8 @@ from __future__ import annotations +from collections.abc import Mapping +from typing import Any + import awkward as ak from dask.base import tokenize from dask.highlevelgraph import HighLevelGraph @@ -32,8 +35,8 @@ def concatenate( axis: int = 0, mergebool: bool = True, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: label = "concatenate" token = tokenize(arrays, axis, mergebool, highlevel, behavior) diff --git a/src/dask_awkward/lib/reducers.py b/src/dask_awkward/lib/reducers.py index ccf75d94..33a8d63c 100644 --- a/src/dask_awkward/lib/reducers.py +++ b/src/dask_awkward/lib/reducers.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Mapping from typing import TYPE_CHECKING, Any import awkward as ak @@ -39,8 +40,8 @@ def all( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -73,8 +74,8 @@ def any( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -107,8 +108,8 @@ def argmax( axis: int | None = None, keepdims: bool = False, mask_identity: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -141,8 +142,8 @@ def argmin( axis: int | None = None, keepdims: bool = False, mask_identity: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -177,8 +178,8 @@ def corr( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -189,8 +190,8 @@ def count( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -224,8 +225,8 @@ def count_nonzero( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -261,8 +262,8 @@ def covar( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -275,8 +276,8 @@ def linear_fit( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -296,8 +297,8 @@ def max( keepdims: bool = False, initial: float | None = None, mask_identity: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -331,8 +332,8 @@ def mean( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if weight is not None: raise DaskAwkwardNotImplemented( @@ -372,8 +373,8 @@ def min( keepdims: bool = False, initial: float | None = None, mask_identity: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -408,8 +409,8 @@ def moment( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -420,8 +421,8 @@ def prod( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -484,8 +485,8 @@ def std( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if weight is not None: raise DaskAwkwardNotImplemented("weight argument is not supported.") @@ -515,8 +516,8 @@ def sum( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if axis is None or axis == 0 or axis == -1 * array.ndim: return non_trivial_reduction( @@ -559,8 +560,8 @@ def var( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: if weight is not None: raise DaskAwkwardNotImplemented("weight argument is not supported.") diff --git a/src/dask_awkward/lib/str.py b/src/dask_awkward/lib/str.py index 8fbfb203..719557a7 100644 --- a/src/dask_awkward/lib/str.py +++ b/src/dask_awkward/lib/str.py @@ -1,7 +1,7 @@ from __future__ import annotations import functools -from collections.abc import Callable +from collections.abc import Callable, Mapping from typing import Any, TypeVar import awkward.operations.str as akstr @@ -28,8 +28,8 @@ def capitalize( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.capitalize, @@ -46,8 +46,8 @@ def center( padding: str | bytes = " ", *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.center, @@ -66,8 +66,8 @@ def count_substring( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.count_substring, @@ -86,8 +86,8 @@ def count_substring_regex( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.count_substring_regex, @@ -106,8 +106,8 @@ def ends_with( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.ends_with, @@ -125,8 +125,8 @@ def extract_regex( pattern: bytes | str, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.extract_regex, @@ -144,8 +144,8 @@ def find_substring( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.find_substring, @@ -164,8 +164,8 @@ def find_substring_regex( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.find_substring_regex, @@ -184,8 +184,8 @@ def index_in( *, skip_nones: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.index_in, @@ -202,8 +202,8 @@ def is_alnum( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_alnum, @@ -218,8 +218,8 @@ def is_alpha( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_alpha, @@ -234,8 +234,8 @@ def is_ascii( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_ascii, @@ -250,8 +250,8 @@ def is_decimal( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_decimal, @@ -266,8 +266,8 @@ def is_digit( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_digit, @@ -284,8 +284,8 @@ def is_in( *, skip_nones: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_in, @@ -302,8 +302,8 @@ def is_lower( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_lower, @@ -318,8 +318,8 @@ def is_numeric( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_numeric, @@ -334,8 +334,8 @@ def is_printable( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_printable, @@ -350,8 +350,8 @@ def is_space( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_space, @@ -366,8 +366,8 @@ def is_title( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_title, @@ -382,8 +382,8 @@ def is_upper( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.is_upper, @@ -399,8 +399,8 @@ def join( separator: Any, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.join, @@ -415,8 +415,8 @@ def join( def join_element_wise( *arrays: Array, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.join_element_wise, @@ -431,8 +431,8 @@ def length( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.length, @@ -447,8 +447,8 @@ def lower( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.lower, @@ -465,8 +465,8 @@ def lpad( padding: str | bytes = " ", *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.lpad, @@ -484,8 +484,8 @@ def ltrim( characters: str | bytes, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.ltrim, @@ -501,8 +501,8 @@ def ltrim_whitespace( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.ltrim_whitespace, @@ -519,8 +519,8 @@ def match_like( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.match_like, @@ -539,8 +539,8 @@ def match_substring( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.match_substring, @@ -559,8 +559,8 @@ def match_substring_regex( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.match_substring_regex, @@ -578,8 +578,8 @@ def repeat( num_repeats: Any, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.repeat, @@ -598,8 +598,8 @@ def replace_slice( replacement: str | bytes, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.replace_slice, @@ -620,8 +620,8 @@ def replace_substring( *, max_replacements: int | None = None, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.replace_substring, @@ -642,8 +642,8 @@ def replace_substring_regex( *, max_replacements: int | None = None, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.replace_substring_regex, @@ -661,8 +661,8 @@ def reverse( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.reverse, @@ -679,8 +679,8 @@ def rpad( padding: str | bytes = " ", *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.rpad, @@ -698,8 +698,8 @@ def rtrim( characters: str | bytes, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.rtrim, @@ -715,8 +715,8 @@ def rtrim_whitespace( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.rtrim_whitespace, @@ -734,8 +734,8 @@ def slice( step: int = 1, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.slice, @@ -756,8 +756,8 @@ def split_pattern( max_splits: int | None = None, reverse: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.split_pattern, @@ -778,8 +778,8 @@ def split_pattern_regex( max_splits: int | None = None, reverse: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.split_pattern_regex, @@ -799,8 +799,8 @@ def split_whitespace( max_splits: int | None = None, reverse: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.split_whitespace, @@ -818,8 +818,8 @@ def starts_with( *, ignore_case: bool = False, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.starts_with, @@ -836,8 +836,8 @@ def swapcase( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.swapcase, @@ -852,8 +852,8 @@ def title( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.title, @@ -868,8 +868,8 @@ def to_categorical( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.to_categorical, @@ -885,8 +885,8 @@ def trim( characters: str | bytes, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.trim, @@ -902,8 +902,8 @@ def trim_whitespace( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.trim_whitespace, @@ -918,8 +918,8 @@ def upper( array: Array, *, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: return map_partitions( akstr.upper, diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 58200c2e..ede9e31f 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -605,7 +605,7 @@ def num( array: Any, axis: int = 1, highlevel: bool = True, - behavior: dict | None = None, + behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None, ) -> Any: if not highlevel: @@ -642,7 +642,7 @@ def num( def ones_like( array: Array, highlevel: bool = True, - behavior: dict | None = None, + behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None, dtype: DTypeLike | None = None, ) -> Array: @@ -720,7 +720,7 @@ def pad_none( def ravel( array: Array, highlevel: bool = True, - behavior: dict | None = None, + behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: @@ -742,7 +742,7 @@ def ravel( def run_lengths( array: Array, highlevel: bool = True, - behavior: dict | None = None, + behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: @@ -954,8 +954,8 @@ def where( y: Array, mergebool: bool = True, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -1048,8 +1048,8 @@ def with_name( array: Array, name: str | None, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -1083,8 +1083,8 @@ def with_parameter( parameter: str, value: Any, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -1108,8 +1108,8 @@ def __call__(self, array): def without_parameters( array: Array, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -1125,8 +1125,8 @@ def without_parameters( def zeros_like( array: Array, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, dtype: DTypeLike | None = None, ) -> Array: if not highlevel: @@ -1168,8 +1168,8 @@ def zip( parameters: Mapping[str, Any] | None = None, with_name: str | None = None, highlevel: bool = True, - behavior: dict | None = None, - attrs: dict | None = None, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, right_broadcast: bool = False, optiontype_outside_record: bool = False, ) -> Array: From 3fcfacc4310a743cfb3404d884b766e5227c5051 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 18:56:53 +0000 Subject: [PATCH 05/12] fix: more attrs work --- src/dask_awkward/lib/reducers.py | 4 + src/dask_awkward/lib/structure.py | 167 ++++++++++++++++++------------ 2 files changed, 107 insertions(+), 64 deletions(-) diff --git a/src/dask_awkward/lib/reducers.py b/src/dask_awkward/lib/reducers.py index 33a8d63c..c3019778 100644 --- a/src/dask_awkward/lib/reducers.py +++ b/src/dask_awkward/lib/reducers.py @@ -455,6 +455,8 @@ def ptp( axis: int | None = None, keepdims: bool = False, mask_identity: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") @@ -465,6 +467,8 @@ def softmax( axis: int | None = None, keepdims: bool = False, mask_identity: bool = False, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Any: raise DaskAwkwardNotImplemented("TODO") diff --git a/src/dask_awkward/lib/structure.py b/src/dask_awkward/lib/structure.py index 4b7cb77f..0ceebf32 100644 --- a/src/dask_awkward/lib/structure.py +++ b/src/dask_awkward/lib/structure.py @@ -9,7 +9,7 @@ import awkward as ak import numpy as np -from awkward.typetracer import create_unknown_scalar +from awkward.typetracer import create_unknown_scalar, is_unknown_scalar from dask.base import is_dask_collection, tokenize from dask.highlevelgraph import HighLevelGraph @@ -310,6 +310,7 @@ def combinations( with_name=with_name, highlevel=highlevel, behavior=behavior, + attrs=attrs, ) return map_partitions( fn, @@ -356,7 +357,9 @@ def fill_none( if not highlevel: raise ValueError("Only highlevel=True is supported") - fn = _FillNoneFn(value, axis=axis, highlevel=highlevel, behavior=behavior) + fn = _FillNoneFn( + value, axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs + ) return map_partitions(fn, array, label="fill-none", output_divisions=1) @@ -379,7 +382,7 @@ def drop_none( if not highlevel: raise ValueError("Only highlevel=True is supported") - fn = _DropNoneFn(axis=axis, highlevel=highlevel, behavior=behavior) + fn = _DropNoneFn(axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs) return map_partitions(fn, array, label="drop-none", output_divisions=1) @@ -401,11 +404,7 @@ def firsts( ) -> Any: if axis == 1: return map_partitions( - _FirstsFn( - axis=axis, - highlevel=highlevel, - behavior=behavior, - ), + _FirstsFn(axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs), array, label="firsts", output_divisions=1, @@ -434,11 +433,7 @@ def flatten( if not highlevel: raise ValueError("Only highlevel=True is supported") return map_partitions( - _FlattenFn( - axis=axis, - highlevel=highlevel, - behavior=behavior, - ), + _FlattenFn(axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs), array, label="flatten", output_divisions=None, @@ -447,7 +442,11 @@ def flatten( @borrow_docstring(ak.from_regular) def from_regular( - array: Array, axis: int = 1, highlevel: bool = True, behavior: Mapping | None = None + array: Array, + axis: int = 1, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -462,6 +461,7 @@ def from_regular( highlevel=highlevel, behavior=behavior, label="from-regular", + attrs=attrs, ) @@ -471,7 +471,8 @@ def full_like( fill_value: Any, highlevel: bool = True, behavior: Mapping | None = None, - dtype: np.dtype | str | None = None, + dtype: DTypeLike | str | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -489,6 +490,7 @@ def full_like( fill_value, highlevel=highlevel, behavior=behavior, + attrs=attrs, dtype=dtype, output_divisions=1, ) @@ -522,6 +524,7 @@ def isclose( behavior=behavior, label="is-close", output_divisions=1, + attrs=attrs, ) @@ -535,9 +538,13 @@ def __call__(self, array): @borrow_docstring(ak.is_none) def is_none( - array: Array, axis: int = 0, highlevel: bool = True, behavior: Mapping | None = None + array: Array, + axis: int = 0, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: - fn = _IsNoneFn(axis=axis, highlevel=highlevel, behavior=behavior) + fn = _IsNoneFn(axis=axis, highlevel=highlevel, behavior=behavior, attrs=attrs) return map_partitions(fn, array, label="is-none", output_divisions=1) @@ -559,6 +566,7 @@ def local_index( axis=axis, highlevel=highlevel, behavior=behavior, + attrs=attrs, ) @@ -576,11 +584,7 @@ def mask( if not highlevel: raise ValueError("Only highlevel=True is supported") return map_partitions( - ak.mask, - array, - mask, - valid_when=valid_when, - behavior=behavior, + ak.mask, array, mask, valid_when=valid_when, behavior=behavior, attrs=attrs ) @@ -593,6 +597,7 @@ def nan_to_num( neginf: Any | None = None, highlevel: bool = True, behavior: Any | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: # return map_partitions( # ak.nan_to_num, @@ -610,7 +615,7 @@ def nan_to_num( def _numaxis0(*integers): f = first(integers) - if isinstance(f, TypeTracerArray): + if is_unknown_scalar(f): return f return np.sum(np.array(integers)) @@ -676,16 +681,15 @@ def ones_like( @borrow_docstring(ak.to_packed) def to_packed( - array: Array, highlevel: bool = True, behavior: Mapping | None = None + array: Array, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") - return map_partitions( - ak.to_packed, - array, - behavior=behavior, - ) + return map_partitions(ak.to_packed, array, behavior=behavior, attrs=attrs) class _PadNoneFn: @@ -719,12 +723,7 @@ def pad_none( if axis == 0: DaskAwkwardNotImplemented("axis=0 for pad_none is not supported") return map_partitions( - _PadNoneFn( - target=target, - axis=axis, - clip=clip, - behavior=behavior, - ), + _PadNoneFn(target=target, axis=axis, clip=clip, behavior=behavior, attrs=attrs), array, label="pad-none", output_divisions=1, @@ -851,7 +850,11 @@ def strings_astype( @borrow_docstring(ak.to_regular) def to_regular( - array: Array, axis: int = 1, highlevel: bool = True, behavior: Mapping | None = None + array: Array, + axis: int = 1, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") @@ -868,6 +871,7 @@ def to_regular( axis=axis, behavior=behavior, label="to-regular", + attrs=attrs, ) @@ -900,25 +904,36 @@ def unflatten( ) -def _array_with_behavior(array: Array, behavior: Mapping | None) -> Array: +def _array_with_rebuilt_meta( + array: Array, behavior: Mapping | None, attrs: Mapping[str, Any] | None +) -> Array: + if attrs is None: + attrs = array._meta.attrs + if behavior is None: - new_meta = array._meta - else: - new_meta = ak.Array(array._meta, behavior=behavior) + behavior = array._meta.behavior + + new_meta = ak.Array(array._meta, behavior=behavior, attrs=attrs) + return Array(array.dask, array.name, new_meta, array.divisions) @borrow_docstring(ak.unzip) def unzip( - array: Array, highlevel: bool = True, behavior: Mapping | None = None + array: Array, + highlevel: bool = True, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> tuple[Array, ...]: if not highlevel: raise ValueError("Only highlevel=True is supported") fields = ak.fields(array._meta) if len(fields) == 0: - return (_array_with_behavior(array, behavior),) + return (_array_with_rebuilt_meta(array, behavior, attrs),) else: - return tuple(_array_with_behavior(array[field], behavior) for field in fields) + return tuple( + _array_with_rebuilt_meta(array[field], behavior, attrs) for field in fields + ) @borrow_docstring(ak.values_astype) @@ -937,6 +952,7 @@ def values_astype( to=to, behavior=behavior, label="values-astype", + attrs=attrs, ) @@ -946,10 +962,12 @@ def __init__( mergebool: bool = True, highlevel: bool = True, behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, ) -> None: self.mergebool = mergebool self.highlevel = highlevel self.behavior = behavior + self.attrs = attrs def __call__(self, condition: ak.Array, x: ak.Array, y: ak.Array) -> ak.Array: return ak.where( @@ -959,6 +977,7 @@ def __call__(self, condition: ak.Array, x: ak.Array, y: ak.Array) -> ak.Array: mergebool=self.mergebool, highlevel=self.highlevel, behavior=self.behavior, + attrs=self.attrs, ) @@ -987,7 +1006,7 @@ def where( raise IncompatiblePartitions("where", *dask_args) return map_partitions( - _WhereFn(mergebool=mergebool, behavior=behavior), + _WhereFn(mergebool=mergebool, behavior=behavior, attrs=attrs), condition, x, y, @@ -998,20 +1017,19 @@ def where( class _WithFieldFn: def __init__( self, - where: str | Sequence[str] | None = None, - highlevel: bool = True, - behavior: Mapping | None = None, + where: str | Sequence[str] | None, + highlevel: bool, + behavior: Mapping | None, + attrs: Mapping[str, Any] | None, ) -> None: self.where = where self.highlevel = highlevel self.behavior = behavior + self.attrs = attrs def __call__(self, base: ak.Array, what: ak.Array) -> ak.Array: return ak.with_field( - base, - what, - where=self.where, - behavior=self.behavior, + base, what, where=self.where, behavior=self.behavior, attrs=self.attrs ) @@ -1041,7 +1059,7 @@ def with_field( if partition_compatibility(*dask_args) == PartitionCompatibility.NO: raise IncompatiblePartitions("with_field", *dask_args) return map_partitions( - _WithFieldFn(where=where, highlevel=highlevel, behavior=behavior), + _WithFieldFn(where=where, highlevel=highlevel, behavior=behavior, attrs=attrs), base, what, label="with-field", @@ -1050,12 +1068,18 @@ def with_field( class _WithNameFn: - def __init__(self, name: str | None, behavior: Mapping | None = None) -> None: + def __init__( + self, + name: str | None, + behavior: Mapping | None, + attrs: Mapping[str, Any] | None, + ) -> None: self.name = name self.behavior = behavior + self.attrs = attrs def __call__(self, array: ak.Array) -> ak.Array: - return ak.with_name(array, self.name, behavior=self.behavior) + return ak.with_name(array, self.name, behavior=self.behavior, attrs=self.attrs) @borrow_docstring(ak.with_name) @@ -1070,7 +1094,7 @@ def with_name( raise ValueError("Only highlevel=True is supported") return map_partitions( - _WithNameFn(name=name, behavior=behavior), + _WithNameFn(name=name, behavior=behavior, attrs=attrs), array, label="with-name", output_divisions=1, @@ -1078,10 +1102,17 @@ def with_name( class _WithParameterFn: - def __init__(self, parameter, value, behavior): + def __init__( + self, + parameter: str, + value: Any, + behavior: Mapping | None, + attrs: Mapping[str, Any] | None, + ): self.parameter = parameter self.value = value self.behavior = behavior + self.attrs = attrs def __call__(self, array): return ak.with_parameter( @@ -1089,6 +1120,7 @@ def __call__(self, array): parameter=self.parameter, value=self.value, behavior=self.behavior, + attrs=self.attrs, ) @@ -1104,7 +1136,9 @@ def with_parameter( if not highlevel: raise ValueError("Only highlevel=True is supported") return map_partitions( - _WithParameterFn(parameter=parameter, value=value, behavior=behavior), + _WithParameterFn( + parameter=parameter, value=value, behavior=behavior, attrs=attrs + ), array, label="with-parameter", output_divisions=1, @@ -1112,11 +1146,12 @@ def with_parameter( class _WithoutParameterFn: - def __init__(self, behavior): + def __init__(self, behavior: Mapping | None, attrs: Mapping[str, Any] | None): self.behavior = behavior + self.attrs = attrs def __call__(self, array): - return ak.without_parameters(array, behavior=self.behavior) + return ak.without_parameters(array, behavior=self.behavior, attrs=self.attrs) @borrow_docstring(ak.without_parameters) @@ -1129,7 +1164,7 @@ def without_parameters( if not highlevel: raise ValueError("Only highlevel=True is supported") return map_partitions( - _WithoutParameterFn(behavior=behavior), + _WithoutParameterFn(behavior=behavior, attrs=attrs), array, label="without-parameters", output_divisions=1, @@ -1153,6 +1188,7 @@ def zeros_like( behavior=behavior, dtype=dtype, output_divisions=1, + attrs=attrs, ) @@ -1184,14 +1220,14 @@ def zip( with_name: str | None = None, highlevel: bool = True, behavior: Mapping | None = None, - attrs: Mapping[str, Any] | None = None, right_broadcast: bool = False, optiontype_outside_record: bool = False, + attrs: Mapping[str, Any] | None = None, ) -> Array: if not highlevel: raise ValueError("Only highlevel=True is supported") - if isinstance(arrays, dict): + if isinstance(arrays, Mapping): keys, colls, metadict = [], [], {} for k, coll in arrays.items(): keys.append(k) @@ -1207,6 +1243,7 @@ def zip( behavior=behavior, right_broadcast=right_broadcast, optiontype_outside_record=optiontype_outside_record, + attrs=attrs, ) return map_partitions( @@ -1219,6 +1256,7 @@ def zip( behavior=behavior, right_broadcast=right_broadcast, optiontype_outside_record=optiontype_outside_record, + attrs=attrs, ), *colls, label="zip", @@ -1226,7 +1264,7 @@ def zip( opt_touch_all=True, ) - elif isinstance(arrays, (list, tuple)): + elif isinstance(arrays, Sequence): fn = _ZipListInputFn( depth_limit=depth_limit, parameters=parameters, @@ -1235,6 +1273,7 @@ def zip( behavior=behavior, right_broadcast=right_broadcast, optiontype_outside_record=optiontype_outside_record, + attrs=attrs, ) return map_partitions( fn, @@ -1244,7 +1283,7 @@ def zip( else: raise DaskAwkwardNotImplemented( - "only sized iterables are supported by dak.zip (dict, list, or tuple)" + "only mappings or sequences are supported by dak.zip (e.g. dict, list, or tuple)" ) From 2b5cef1eb2f41cfbd2777e7bc898ec2e826a1f25 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 19:00:41 +0000 Subject: [PATCH 06/12] fix: more attrs --- src/dask_awkward/lib/core.py | 2 +- src/dask_awkward/lib/io/io.py | 32 ++++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index 0bf227b1..e894a2a6 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -1505,7 +1505,7 @@ def new_array_object( ``awkward.compute-unknown-meta`` is set to ``False``, undefined `meta` will be assigned an empty typetracer. behavior : dict, optional - Custom #ak.behavior for the output array. + Custom ak.behavior for the output array. attrs : dict, optional Custom attributes for the output array. npartitions : int, optional diff --git a/src/dask_awkward/lib/io/io.py b/src/dask_awkward/lib/io/io.py index d15d585f..3148e70f 100644 --- a/src/dask_awkward/lib/io/io.py +++ b/src/dask_awkward/lib/io/io.py @@ -173,6 +173,7 @@ def from_delayed( behavior: Mapping | None = None, divisions: tuple[int, ...] | tuple[None, ...] | None = None, prefix: str = "from-delayed", + attrs: Mapping[str, Any] | None = None, ) -> Array: """Create an Array collection from a set of :class:`~dask.delayed.Delayed` objects. @@ -186,10 +187,14 @@ def from_delayed( Metadata (typetracer array) if known, if ``None`` the first partition (first element of the list of ``Delayed`` objects) will be computed to determine the metadata. + behavior : dict, optional + Custom ak.behavior for the output array. divisions : tuple[int | None, ...], optional Partition boundaries (if known). prefix : str Prefix for the keys in the task graph. + attrs : mapping, optional + Custom attributes for the output array. Returns ------- @@ -213,11 +218,7 @@ def from_delayed( raise ValueError("divisions must be a tuple of length len(source) + 1") hlg = HighLevelGraph.from_collections(name, dsk, dependencies=parts) return new_array_object( - hlg, - name=name, - meta=meta, - behavior=behavior, - divisions=divs, + hlg, name=name, meta=meta, behavior=behavior, divisions=divs, attrs=attrs ) @@ -353,13 +354,21 @@ def to_dask_array( return new_da_object(graph, name, meta=None, chunks=chunks, dtype=dtype) -def from_dask_array(array: DaskArray, behavior: Mapping | None = None) -> Array: +def from_dask_array( + array: DaskArray, + behavior: Mapping | None = None, + attrs: Mapping[str, Any] | None = None, +) -> Array: """Convert a Dask Array collection to a Dask Awkard Array collection. Parameters ---------- array : dask.array.Array Array to convert. + behavior : dict, optional + Custom ak.behavior for the output array. + attrs : mapping, optional + Custom attributes for the output array. Returns ------- @@ -396,11 +405,18 @@ def from_dask_array(array: DaskArray, behavior: Mapping | None = None) -> Array: hlg = HighLevelGraph.from_collections(name, layer, dependencies=[array]) if np.any(np.isnan(array.chunks)): return new_array_object( - hlg, name, npartitions=array.npartitions, meta=meta, behavior=behavior + hlg, + name, + npartitions=array.npartitions, + meta=meta, + behavior=behavior, + attrs=attrs, ) else: divs = (0, *np.cumsum(array.chunks)) - return new_array_object(hlg, name, divisions=divs, meta=meta, behavior=behavior) + return new_array_object( + hlg, name, divisions=divs, meta=meta, behavior=behavior, attrs=attrs + ) def to_dataframe( From 66fc3090cfaf3ad6a6d42f5910c0bd39a16e31ee Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 19:20:09 +0000 Subject: [PATCH 07/12] refactor: more docs / change signatures --- src/dask_awkward/lib/io/io.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dask_awkward/lib/io/io.py b/src/dask_awkward/lib/io/io.py index 3148e70f..35d2a54b 100644 --- a/src/dask_awkward/lib/io/io.py +++ b/src/dask_awkward/lib/io/io.py @@ -116,9 +116,7 @@ def from_awkward( class _FromListsFn: - def __init__( - self, behavior: Mapping | None = None, attrs: Mapping[str, Any] | None = None - ): + def __init__(self, behavior: Mapping | None, attrs: Mapping[str, Any] | None): self.behavior = behavior self.attrs = attrs @@ -138,6 +136,10 @@ def from_lists( source : list[list[Any]] List of lists, each outer list will become a partition in the collection. + behavior : dict, optional + Custom ak.behavior for the output array. + attrs : mapping, optional + Custom attributes for the output array. Returns ------- @@ -545,6 +547,8 @@ def from_map( number of partitions in the output collection (only one element of each iterable will be passed to `func` for each partition). + args : tuple + Tuple of positional arguments to append after mapped arguments. label : str, optional String to use as the function-name label in the output collection-key names. From ce002fb9f24f91a60eca30ca76703fbbe09c0ba9 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 19:35:47 +0000 Subject: [PATCH 08/12] test: fix anticipated error message --- tests/test_structure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_structure.py b/tests/test_structure.py index 53d4efcf..e6ab781d 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -88,7 +88,9 @@ def test_zip_tuple_input(caa: ak.Array, daa: dak.Array) -> None: def test_zip_bad_input(daa: dak.Array) -> None: da1 = daa.points.x gd = (x for x in (da1, da1)) - with pytest.raises(DaskAwkwardNotImplemented, match="only sized iterables"): + with pytest.raises( + DaskAwkwardNotImplemented, match="only mappings or sequences are supported" + ): dak.zip(gd) From 8751e4696db49e681c4837f0a7a13a3052e511d6 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 19:38:21 +0000 Subject: [PATCH 09/12] test: fix test? --- tests/test_getitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_getitem.py b/tests/test_getitem.py index 420b14ea..635bc28e 100644 --- a/tests/test_getitem.py +++ b/tests/test_getitem.py @@ -47,6 +47,7 @@ def test_list_with_ints_raise(daa: dak.Array) -> None: def test_single_int(daa: dak.Array, caa: ak.Array) -> None: + daa.eager_compute_divisions() total = len(daa) assert daa.known_divisions for i in range(total): From e96564db0625d186c42496bb02fe6c4b40a1e0eb Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 19:58:41 +0000 Subject: [PATCH 10/12] test: don't modify fixture --- tests/test_getitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_getitem.py b/tests/test_getitem.py index 635bc28e..c6e36909 100644 --- a/tests/test_getitem.py +++ b/tests/test_getitem.py @@ -47,6 +47,7 @@ def test_list_with_ints_raise(daa: dak.Array) -> None: def test_single_int(daa: dak.Array, caa: ak.Array) -> None: + daa = dak.copy(daa) daa.eager_compute_divisions() total = len(daa) assert daa.known_divisions From a3ca4b100805ec71510e85bfa434ba60f4b234bf Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 21:22:13 +0000 Subject: [PATCH 11/12] fix: propagate attrs --- src/dask_awkward/lib/core.py | 15 +++++++++++++-- src/dask_awkward/lib/io/io.py | 6 ++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/dask_awkward/lib/core.py b/src/dask_awkward/lib/core.py index e894a2a6..1711504f 100644 --- a/src/dask_awkward/lib/core.py +++ b/src/dask_awkward/lib/core.py @@ -817,7 +817,14 @@ def layout(self) -> Content: raise ValueError("This collection's meta is None; unknown layout.") @property - def behavior(self) -> dict: + def attrs(self) -> dict: + """awkward Array attrs dictionary.""" + if self._meta is not None: + return self._meta.attrs + raise ValueError("This collection's meta is None; no attrs property available.") + + @property + def behavior(self) -> Mapping: """awkward Array behavior dictionary.""" if self._meta is not None: return self._meta.behavior @@ -2232,7 +2239,11 @@ def typetracer_array(a: ak.Array | Array) -> ak.Array: if isinstance(a, Array): return a._meta elif isinstance(a, ak.Array): - return ak.Array(a.layout.to_typetracer(forget_length=True)) + return ak.Array( + a.layout.to_typetracer(forget_length=True), + behavior=a._behavior, + attrs=a._attrs, + ) else: msg = ( "`a` should be an awkward array or a Dask awkward collection.\n" diff --git a/src/dask_awkward/lib/io/io.py b/src/dask_awkward/lib/io/io.py index 35d2a54b..56956f27 100644 --- a/src/dask_awkward/lib/io/io.py +++ b/src/dask_awkward/lib/io/io.py @@ -67,6 +67,7 @@ def from_awkward( npartitions: int, behavior: Mapping | None = None, label: str | None = None, + attrs: Mapping[str, Any] | None = None, ) -> Array: """Create an Array collection from a concrete :class:`awkward.Array` object. @@ -76,8 +77,12 @@ def from_awkward( The concrete awkward array. npartitions : int The total number of partitions for the collection. + behavior : dict, optional + Custom ak.behavior for the output array. label : str, optional Label for the task. + attrs : mapping, optional + Custom attributes for the output array. Returns ------- @@ -112,6 +117,7 @@ def from_awkward( divisions=locs, meta=meta, behavior=behavior, + attrs=attrs, ) From 8e8e7575e6ecee1fea68be499ba9d66e3b9c3b4c Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 16 Nov 2023 21:35:23 +0000 Subject: [PATCH 12/12] fix: bump Awkward lower bound --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3d50be57..0d92654b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ "Topic :: Software Development", ] dependencies = [ - "awkward >=2.4.5", + "awkward >=2.5.0", "dask >=2023.04.0", "typing_extensions >=4.8.0", ]