From fb245f13057595c8134f9c687cecd2623a9e7aee Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Wed, 18 Dec 2024 20:34:17 -0500 Subject: [PATCH] improve error message when indexing placeholder arrays (non-trivially, only records) (#3353) --- src/awkward/_nplikes/array_module.py | 2 +- src/awkward/_nplikes/placeholder.py | 31 ++++-- src/awkward/operations/ak_from_buffers.py | 121 +++++++++++++++++++--- 3 files changed, 131 insertions(+), 23 deletions(-) diff --git a/src/awkward/_nplikes/array_module.py b/src/awkward/_nplikes/array_module.py index 568c7fc29e..65c132334c 100644 --- a/src/awkward/_nplikes/array_module.py +++ b/src/awkward/_nplikes/array_module.py @@ -306,7 +306,7 @@ def reshape( ) -> ArrayLikeT | PlaceholderArray: if isinstance(x, PlaceholderArray): next_shape = self._compute_compatible_shape(shape, x.shape) - return PlaceholderArray(self, next_shape, x.dtype) + return PlaceholderArray(self, next_shape, x.dtype, x._field_path) if copy is None: return self._module.reshape(x, shape) diff --git a/src/awkward/_nplikes/placeholder.py b/src/awkward/_nplikes/placeholder.py index 20a2af6ee7..92b8517326 100644 --- a/src/awkward/_nplikes/placeholder.py +++ b/src/awkward/_nplikes/placeholder.py @@ -17,10 +17,21 @@ class PlaceholderArray(ArrayLike): - def __init__(self, nplike: NumpyLike, shape: tuple[ShapeItem, ...], dtype: DType): + def __init__( + self, + nplike: NumpyLike, + shape: tuple[ShapeItem, ...], + dtype: DType, + field_path: tuple[str, ...] = (), + ): self._nplike = nplike self._shape = shape self._dtype = np.dtype(dtype) + self._field_path = field_path + + @property + def field_path(self) -> str: + return ".".join(self._field_path) @property def dtype(self) -> DType: @@ -67,7 +78,7 @@ def view(self, dtype: DTypeLike) -> Self: shape = self._shape[:-1] + (last,) else: shape = self._shape - return type(self)(self._nplike, shape, dtype) + return type(self)(self._nplike, shape, dtype, self._field_path) def __getitem__(self, index): # Typetracers permit slices that don't touch data or shapes @@ -92,11 +103,19 @@ def __getitem__(self, index): start, stop, step = index.indices(length) new_length = (stop - start) // step - return type(self)(self._nplike, (new_length,), self._dtype) - else: - raise TypeError( - f"{type(self).__name__} supports only trivial slices, not {type(index).__name__}" + return type(self)( + self._nplike, (new_length,), self._dtype, self._field_path ) + else: + msg = f"{type(self).__name__} supports only trivial slices, not {type(index).__name__}" + if self.field_path: + msg += f"\n\nAwkward-array attempted to access a field '{self.field_path}', but " + msg += ( + "it has been excluded during a pre-run phase (possibly by Dask). " + ) + msg += "If this was supposed to happen automatically (e.g. you're using Dask), " + msg += "please report it to the developers at: https://github.com/scikit-hep/awkward/issues" + raise TypeError(msg) def __setitem__(self, key, value): raise RuntimeError diff --git a/src/awkward/operations/ak_from_buffers.py b/src/awkward/operations/ak_from_buffers.py index 2b65d24913..b843c40b3f 100644 --- a/src/awkward/operations/ak_from_buffers.py +++ b/src/awkward/operations/ak_from_buffers.py @@ -147,13 +147,20 @@ def _impl( getkey = regularize_buffer_key(buffer_key) - out = _reconstitute(form, length, container, getkey, backend, byteorder, simplify) + out = _reconstitute( + form, length, container, getkey, backend, byteorder, simplify, field_path=() + ) return wrap_layout(out, highlevel=highlevel, attrs=attrs, behavior=behavior) def _from_buffer( - nplike: NumpyLike, buffer, dtype: np.dtype, count: ShapeItem, byteorder: str + nplike: NumpyLike, + buffer, + dtype: np.dtype, + count: ShapeItem, + byteorder: str, + field_path: tuple, ) -> ArrayLike: # Unknown-length information implies that we didn't load shape-buffers (offsets, etc) # for the parent of this node. Thus, this node and its children *must* only @@ -161,12 +168,12 @@ def _from_buffer( if count is unknown_length: # We may actually have a known buffer here, but as we do not know the length, # we cannot safely trim it. Thus, introduce a placeholder anyway - return PlaceholderArray(nplike, (unknown_length,), dtype) + return PlaceholderArray(nplike, (unknown_length,), dtype, field_path) # Known-length information implies that we should have known-length buffers here # We could choose to make this an error, and have the caller re-implement some # of #ak.from_buffers, or we can just introduce the known lengths where possible elif isinstance(buffer, PlaceholderArray) and buffer.size is unknown_length: - return PlaceholderArray(nplike, (count,), dtype) + return PlaceholderArray(nplike, (count,), dtype, field_path) elif isinstance(buffer, PlaceholderArray) or nplike.is_own_array(buffer): # Require 1D buffers array = nplike.reshape(buffer.view(dtype), shape=(-1,), copy=False) @@ -185,7 +192,9 @@ def _from_buffer( return array -def _reconstitute(form, length, container, getkey, backend, byteorder, simplify): +def _reconstitute( + form, length, container, getkey, backend, byteorder, simplify, field_path +): if isinstance(form, ak.forms.EmptyForm): if length != 0: raise ValueError(f"EmptyForm node, but the expected length is {length}") @@ -201,6 +210,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=dtype, count=real_length, byteorder=byteorder, + field_path=field_path, ) if form.inner_shape != (): data = backend.nplike.reshape(data, (length, *form.inner_shape)) @@ -211,7 +221,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) elif isinstance(form, ak.forms.UnmaskedForm): content = _reconstitute( - form.content, length, container, getkey, backend, byteorder, simplify + form.content, + length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) if simplify: make = ak.contents.UnmaskedArray.simplified @@ -231,9 +248,17 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.mask], count=next_length, byteorder=byteorder, + field_path=field_path, ) content = _reconstitute( - form.content, length, container, getkey, backend, byteorder, simplify + form.content, + length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) if simplify: make = ak.contents.BitMaskedArray.simplified @@ -256,9 +281,17 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.mask], count=length, byteorder=byteorder, + field_path=field_path, ) content = _reconstitute( - form.content, length, container, getkey, backend, byteorder, simplify + form.content, + length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) if simplify: make = ak.contents.ByteMaskedArray.simplified @@ -279,6 +312,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.index], count=length, byteorder=byteorder, + field_path=field_path, ) if isinstance(index, PlaceholderArray): next_length = unknown_length @@ -287,7 +321,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) 0 if len(index) == 0 else max(0, backend.index_nplike.max(index) + 1) ) content = _reconstitute( - form.content, next_length, container, getkey, backend, byteorder, simplify + form.content, + next_length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) if simplify: make = ak.contents.IndexedOptionArray.simplified @@ -307,6 +348,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.index], count=length, byteorder=byteorder, + field_path=field_path, ) if isinstance(index, PlaceholderArray): next_length = unknown_length @@ -319,7 +361,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) ) ) content = _reconstitute( - form.content, next_length, container, getkey, backend, byteorder, simplify + form.content, + next_length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) if simplify: make = ak.contents.IndexedArray.simplified @@ -340,6 +389,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.starts], count=length, byteorder=byteorder, + field_path=field_path, ) stops = _from_buffer( backend.index_nplike, @@ -347,6 +397,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.stops], count=length, byteorder=byteorder, + field_path=field_path, ) if isinstance(stops, PlaceholderArray): next_length = unknown_length @@ -356,7 +407,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) 0 if len(starts) == 0 else backend.index_nplike.max(reduced_stops) ) content = _reconstitute( - form.content, next_length, container, getkey, backend, byteorder, simplify + form.content, + next_length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) return ak.contents.ListArray( ak.index.Index(starts), @@ -373,6 +431,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.offsets], count=length + 1, byteorder=byteorder, + field_path=field_path, ) if isinstance(offsets, PlaceholderArray): @@ -380,7 +439,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) else: next_length = 0 if len(offsets) == 1 else offsets[-1] content = _reconstitute( - form.content, next_length, container, getkey, backend, byteorder, simplify + form.content, + next_length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) return ak.contents.ListOffsetArray( ak.index.Index(offsets), @@ -391,7 +457,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) elif isinstance(form, ak.forms.RegularForm): next_length = length * form.size content = _reconstitute( - form.content, next_length, container, getkey, backend, byteorder, simplify + form.content, + next_length, + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) return ak.contents.RegularArray( content, @@ -403,9 +476,16 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) elif isinstance(form, ak.forms.RecordForm): contents = [ _reconstitute( - content, length, container, getkey, backend, byteorder, simplify + content, + length, + container, + getkey, + backend, + byteorder, + simplify, + (*field_path, field), ) - for content in form.contents + for content, field in zip(form.contents, form.fields) ] return ak.contents.RecordArray( contents, @@ -423,6 +503,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.tags], count=length, byteorder=byteorder, + field_path=field_path, ) index = _from_buffer( backend.index_nplike, @@ -430,6 +511,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) dtype=index_to_dtype[form.index], count=length, byteorder=byteorder, + field_path=field_path, ) if isinstance(index, PlaceholderArray) or isinstance(tags, PlaceholderArray): lengths = [unknown_length] * len(form.contents) @@ -443,7 +525,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify) lengths.append(backend.index_nplike.max(selected_index) + 1) contents = [ _reconstitute( - content, lengths[i], container, getkey, backend, byteorder, simplify + content, + lengths[i], + container, + getkey, + backend, + byteorder, + simplify, + field_path, ) for i, content in enumerate(form.contents) ]