Skip to content

Commit

Permalink
improve error message when indexing placeholder arrays (non-trivially…
Browse files Browse the repository at this point in the history
…, only records) (#3353)
  • Loading branch information
pfackeldey authored Dec 19, 2024
1 parent 287347e commit fb245f1
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/awkward/_nplikes/array_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def reshape(
) -> ArrayLikeT | PlaceholderArray:
if isinstance(x, PlaceholderArray):
next_shape = self._compute_compatible_shape(shape, x.shape)
return PlaceholderArray(self, next_shape, x.dtype)
return PlaceholderArray(self, next_shape, x.dtype, x._field_path)

if copy is None:
return self._module.reshape(x, shape)
Expand Down
31 changes: 25 additions & 6 deletions src/awkward/_nplikes/placeholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,21 @@


class PlaceholderArray(ArrayLike):
def __init__(self, nplike: NumpyLike, shape: tuple[ShapeItem, ...], dtype: DType):
def __init__(
self,
nplike: NumpyLike,
shape: tuple[ShapeItem, ...],
dtype: DType,
field_path: tuple[str, ...] = (),
):
self._nplike = nplike
self._shape = shape
self._dtype = np.dtype(dtype)
self._field_path = field_path

@property
def field_path(self) -> str:
return ".".join(self._field_path)

@property
def dtype(self) -> DType:
Expand Down Expand Up @@ -67,7 +78,7 @@ def view(self, dtype: DTypeLike) -> Self:
shape = self._shape[:-1] + (last,)
else:
shape = self._shape
return type(self)(self._nplike, shape, dtype)
return type(self)(self._nplike, shape, dtype, self._field_path)

def __getitem__(self, index):
# Typetracers permit slices that don't touch data or shapes
Expand All @@ -92,11 +103,19 @@ def __getitem__(self, index):
start, stop, step = index.indices(length)
new_length = (stop - start) // step

return type(self)(self._nplike, (new_length,), self._dtype)
else:
raise TypeError(
f"{type(self).__name__} supports only trivial slices, not {type(index).__name__}"
return type(self)(
self._nplike, (new_length,), self._dtype, self._field_path
)
else:
msg = f"{type(self).__name__} supports only trivial slices, not {type(index).__name__}"
if self.field_path:
msg += f"\n\nAwkward-array attempted to access a field '{self.field_path}', but "
msg += (
"it has been excluded during a pre-run phase (possibly by Dask). "
)
msg += "If this was supposed to happen automatically (e.g. you're using Dask), "
msg += "please report it to the developers at: https://github.com/scikit-hep/awkward/issues"
raise TypeError(msg)

def __setitem__(self, key, value):
raise RuntimeError
Expand Down
121 changes: 105 additions & 16 deletions src/awkward/operations/ak_from_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,26 +147,33 @@ def _impl(

getkey = regularize_buffer_key(buffer_key)

out = _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
out = _reconstitute(
form, length, container, getkey, backend, byteorder, simplify, field_path=()
)

return wrap_layout(out, highlevel=highlevel, attrs=attrs, behavior=behavior)


def _from_buffer(
nplike: NumpyLike, buffer, dtype: np.dtype, count: ShapeItem, byteorder: str
nplike: NumpyLike,
buffer,
dtype: np.dtype,
count: ShapeItem,
byteorder: str,
field_path: tuple,
) -> ArrayLike:
# Unknown-length information implies that we didn't load shape-buffers (offsets, etc)
# for the parent of this node. Thus, this node and its children *must* only
# contain placeholders
if count is unknown_length:
# We may actually have a known buffer here, but as we do not know the length,
# we cannot safely trim it. Thus, introduce a placeholder anyway
return PlaceholderArray(nplike, (unknown_length,), dtype)
return PlaceholderArray(nplike, (unknown_length,), dtype, field_path)
# Known-length information implies that we should have known-length buffers here
# We could choose to make this an error, and have the caller re-implement some
# of #ak.from_buffers, or we can just introduce the known lengths where possible
elif isinstance(buffer, PlaceholderArray) and buffer.size is unknown_length:
return PlaceholderArray(nplike, (count,), dtype)
return PlaceholderArray(nplike, (count,), dtype, field_path)
elif isinstance(buffer, PlaceholderArray) or nplike.is_own_array(buffer):
# Require 1D buffers
array = nplike.reshape(buffer.view(dtype), shape=(-1,), copy=False)
Expand All @@ -185,7 +192,9 @@ def _from_buffer(
return array


def _reconstitute(form, length, container, getkey, backend, byteorder, simplify):
def _reconstitute(
form, length, container, getkey, backend, byteorder, simplify, field_path
):
if isinstance(form, ak.forms.EmptyForm):
if length != 0:
raise ValueError(f"EmptyForm node, but the expected length is {length}")
Expand All @@ -201,6 +210,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=dtype,
count=real_length,
byteorder=byteorder,
field_path=field_path,
)
if form.inner_shape != ():
data = backend.nplike.reshape(data, (length, *form.inner_shape))
Expand All @@ -211,7 +221,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)

elif isinstance(form, ak.forms.UnmaskedForm):
content = _reconstitute(
form.content, length, container, getkey, backend, byteorder, simplify
form.content,
length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
if simplify:
make = ak.contents.UnmaskedArray.simplified
Expand All @@ -231,9 +248,17 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.mask],
count=next_length,
byteorder=byteorder,
field_path=field_path,
)
content = _reconstitute(
form.content, length, container, getkey, backend, byteorder, simplify
form.content,
length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
if simplify:
make = ak.contents.BitMaskedArray.simplified
Expand All @@ -256,9 +281,17 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.mask],
count=length,
byteorder=byteorder,
field_path=field_path,
)
content = _reconstitute(
form.content, length, container, getkey, backend, byteorder, simplify
form.content,
length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
if simplify:
make = ak.contents.ByteMaskedArray.simplified
Expand All @@ -279,6 +312,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.index],
count=length,
byteorder=byteorder,
field_path=field_path,
)
if isinstance(index, PlaceholderArray):
next_length = unknown_length
Expand All @@ -287,7 +321,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
0 if len(index) == 0 else max(0, backend.index_nplike.max(index) + 1)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
form.content,
next_length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
if simplify:
make = ak.contents.IndexedOptionArray.simplified
Expand All @@ -307,6 +348,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.index],
count=length,
byteorder=byteorder,
field_path=field_path,
)
if isinstance(index, PlaceholderArray):
next_length = unknown_length
Expand All @@ -319,7 +361,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
form.content,
next_length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
if simplify:
make = ak.contents.IndexedArray.simplified
Expand All @@ -340,13 +389,15 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.starts],
count=length,
byteorder=byteorder,
field_path=field_path,
)
stops = _from_buffer(
backend.index_nplike,
raw_array2,
dtype=index_to_dtype[form.stops],
count=length,
byteorder=byteorder,
field_path=field_path,
)
if isinstance(stops, PlaceholderArray):
next_length = unknown_length
Expand All @@ -356,7 +407,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
0 if len(starts) == 0 else backend.index_nplike.max(reduced_stops)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
form.content,
next_length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
return ak.contents.ListArray(
ak.index.Index(starts),
Expand All @@ -373,14 +431,22 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.offsets],
count=length + 1,
byteorder=byteorder,
field_path=field_path,
)

if isinstance(offsets, PlaceholderArray):
next_length = unknown_length
else:
next_length = 0 if len(offsets) == 1 else offsets[-1]
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
form.content,
next_length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
return ak.contents.ListOffsetArray(
ak.index.Index(offsets),
Expand All @@ -391,7 +457,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
elif isinstance(form, ak.forms.RegularForm):
next_length = length * form.size
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
form.content,
next_length,
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
return ak.contents.RegularArray(
content,
Expand All @@ -403,9 +476,16 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
elif isinstance(form, ak.forms.RecordForm):
contents = [
_reconstitute(
content, length, container, getkey, backend, byteorder, simplify
content,
length,
container,
getkey,
backend,
byteorder,
simplify,
(*field_path, field),
)
for content in form.contents
for content, field in zip(form.contents, form.fields)
]
return ak.contents.RecordArray(
contents,
Expand All @@ -423,13 +503,15 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
dtype=index_to_dtype[form.tags],
count=length,
byteorder=byteorder,
field_path=field_path,
)
index = _from_buffer(
backend.index_nplike,
raw_array2,
dtype=index_to_dtype[form.index],
count=length,
byteorder=byteorder,
field_path=field_path,
)
if isinstance(index, PlaceholderArray) or isinstance(tags, PlaceholderArray):
lengths = [unknown_length] * len(form.contents)
Expand All @@ -443,7 +525,14 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
lengths.append(backend.index_nplike.max(selected_index) + 1)
contents = [
_reconstitute(
content, lengths[i], container, getkey, backend, byteorder, simplify
content,
lengths[i],
container,
getkey,
backend,
byteorder,
simplify,
field_path,
)
for i, content in enumerate(form.contents)
]
Expand Down

0 comments on commit fb245f1

Please sign in to comment.