Skip to content

Commit

Permalink
feat: add enforce_concatenated_form (#2860)
Browse files Browse the repository at this point in the history
* feat: add enforce_concatenated_form

* fix: handle `NumpyArray` in `ListArray._mergemany`

* refactor: rename test

* refactor: improve error message

* test: update test for categorical changes

* test: fix merging backend requirement

* style: pre-commit fixes

* Update src/awkward/contents/content.py

Co-authored-by: Jim Pivarski <[email protected]>

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jim Pivarski <[email protected]>
  • Loading branch information
3 people authored Dec 5, 2023
1 parent 123fa09 commit b2cb026
Show file tree
Hide file tree
Showing 16 changed files with 817 additions and 205 deletions.
9 changes: 2 additions & 7 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._nplikes.typetracer import MaybeNone, TypeTracer
from awkward._parameters import (
type_parameters_equal,
)
from awkward._regularize import is_integer, is_integer_like
from awkward._slicing import NO_HEAD
from awkward._typing import (
Expand Down Expand Up @@ -577,11 +574,9 @@ def _mergeable_next(self, other: Content, mergebool: bool) -> bool:
# Is the other content is an identity, or a union?
if other.is_identity_like or other.is_union:
return True
# We can only combine option types whose array-record parameters agree
# Is the other array indexed or optional?
elif other.is_option or other.is_indexed:
return self._content._mergeable_next(
other.content, mergebool
) and type_parameters_equal(self._parameters, other._parameters)
return self._content._mergeable_next(other.content, mergebool)
else:
return self._content._mergeable_next(other, mergebool)

Expand Down
7 changes: 2 additions & 5 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from awkward._nplikes.typetracer import MaybeNone, TypeTracer
from awkward._parameters import (
parameters_intersect,
type_parameters_equal,
)
from awkward._regularize import is_integer_like
from awkward._slicing import NO_HEAD
Expand Down Expand Up @@ -721,11 +720,9 @@ def _mergeable_next(self, other: Content, mergebool: bool) -> bool:
# Is the other content is an identity, or a union?
if other.is_identity_like or other.is_union:
return True
# We can only combine option types whose array-record parameters agree
# Is the other array indexed or optional?
elif other.is_option or other.is_indexed:
return self._content._mergeable_next(
other.content, mergebool
) and type_parameters_equal(self._parameters, other._parameters)
return self._content._mergeable_next(other.content, mergebool)
else:
return self._content._mergeable_next(other, mergebool)

Expand Down
46 changes: 13 additions & 33 deletions src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpy_like import IndexType, NumpyMetadata
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._nplikes.typetracer import TypeTracer
from awkward._parameters import (
parameters_are_equal,
type_parameters_equal,
Expand Down Expand Up @@ -762,41 +761,22 @@ def _merging_strategy(

head = [self]
tail = []
i = 0

while i < len(others):
other = others[i]
if isinstance(
other,
(
ak.contents.IndexedArray,
ak.contents.IndexedOptionArray,
ak.contents.ByteMaskedArray,
ak.contents.BitMaskedArray,
ak.contents.UnmaskedArray,
ak.contents.UnionArray,
),
):

it_others = iter(others)

for other in it_others:
if other.is_indexed or other.is_option or other.is_union:
tail.append(other)
tail.extend(it_others)
break
else:
head.append(other)
i = i + 1

while i < len(others):
tail.append(others[i])
i = i + 1

if any(isinstance(x.backend.nplike, TypeTracer) for x in head + tail):
head = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in head
]
tail = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in tail
]

return (head, tail)

assert not any(x.backend.nplike.known_data for x in head + tail) or all(
x.backend.nplike.known_data for x in head + tail
)

return head, tail

def _local_index(self, axis: int, depth: int):
raise NotImplementedError
Expand Down
64 changes: 41 additions & 23 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from awkward._parameters import (
parameters_intersect,
parameters_union,
type_parameters_equal,
)
from awkward._regularize import is_integer_like
from awkward._slicing import NO_HEAD
Expand Down Expand Up @@ -500,11 +499,9 @@ def _mergeable_next(self, other: Content, mergebool: bool) -> bool:
# Is the other content is an identity, or a union?
if other.is_identity_like or other.is_union:
return True
# We can only combine option/indexed types whose array-record parameters agree
# Is the other array indexed or optional?
elif other.is_option or other.is_indexed:
return self._content._mergeable_next(
other.content, mergebool
) and type_parameters_equal(self._parameters, other._parameters)
return self._content._mergeable_next(other.content, mergebool)
else:
return self._content._mergeable_next(other, mergebool)

Expand All @@ -517,32 +514,38 @@ def _merging_strategy(self, others):
head = [self]
tail = []

i = 0
while i < len(others):
other = others[i]
it_others = iter(others)
for other in it_others:
if isinstance(other, ak.contents.UnionArray):
tail.append(other)
tail.extend(it_others)
break
else:
head.append(other)
i = i + 1

while i < len(others):
tail.append(others[i])
i = i + 1

if any(isinstance(x.backend.nplike, TypeTracer) for x in head + tail):
head = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in head
]
tail = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in tail
]
if any(x.backend.nplike.known_data for x in head + tail) and not all(
x.backend.nplike.known_data for x in head + tail
):
raise RuntimeError

return (head, tail)
return head, tail

def _reverse_merge(self, other):
if isinstance(other, ak.contents.EmptyArray):
return self

# FIXME: support categorical-categorical merging
if (
other.is_indexed
and other.parameter("__array__")
== self.parameter("__array__")
== "categorical"
):
raise NotImplementedError(
"merging categorical arrays is currently not implemented. "
"Use `ak.enforce_type` to drop the categorical type and use general merging."
)

theirlength = other.length
mylength = self.length
index = ak.index.Index64.empty(
Expand Down Expand Up @@ -663,9 +666,24 @@ def _mergemany(self, others: Sequence[Content]) -> Content:
contentlength_so_far += array.length
length_so_far += array.length

# Categoricals may only survive if all contents are categorical
if (
parameters is not None
and parameters.get("__array__") == "categorical"
):
parameters = {**parameters}
del parameters["__array__"]

tail_contents = contents[1:]
nextcontent = contents[0]._mergemany(tail_contents)

# FIXME: support categorical merging?
if parameters is not None and parameters.get("__array__") == "categorical":
raise NotImplementedError(
"merging categorical arrays is currently not implemented. "
"Use `ak.enforce_type` to drop the categorical type and use general merging."
)

# Options win out!
if any(x.is_option for x in head):
next = ak.contents.IndexedOptionArray(
Expand Down
77 changes: 45 additions & 32 deletions src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from awkward._parameters import (
parameters_intersect,
parameters_union,
type_parameters_equal,
)
from awkward._regularize import is_integer_like
from awkward._slicing import NO_HEAD
Expand Down Expand Up @@ -633,11 +632,9 @@ def _mergeable_next(self, other: Content, mergebool: bool) -> bool:
# Is the other content is an identity, or a union?
if other.is_identity_like or other.is_union:
return True
# We can only combine option/indexed types whose array-record parameters agree
# Is the other array indexed or optional?
elif other.is_option or other.is_indexed:
return self._content._mergeable_next(
other.content, mergebool
) and type_parameters_equal(self._parameters, other._parameters)
return self._content._mergeable_next(other.content, mergebool)
else:
return self._content._mergeable_next(other, mergebool)

Expand All @@ -650,35 +647,38 @@ def _merging_strategy(self, others):
head = [self]
tail = []

i = 0
while i < len(others):
other = others[i]
it_others = iter(others)
for other in it_others:
if isinstance(other, ak.contents.UnionArray):
tail.append(other)
tail.extend(it_others)
break
else:
head.append(other)
i = i + 1

while i < len(others):
tail.append(others[i])
i = i + 1

if any(isinstance(x.backend.nplike, TypeTracer) for x in head + tail):
head = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in head
]
tail = [
x if isinstance(x.backend.nplike, TypeTracer) else x.to_typetracer()
for x in tail
]
if any(x.backend.nplike.known_data for x in head + tail) and not all(
x.backend.nplike.known_data for x in head + tail
):
raise RuntimeError

return (head, tail)
return head, tail

def _reverse_merge(self, other):
if isinstance(other, ak.contents.EmptyArray):
return self

# FIXME: support categorical-categorical merging
if (
other.is_indexed
and other.parameter("__array__")
== self.parameter("__array__")
== "categorical"
):
raise NotImplementedError(
"merging categorical arrays is currently not implemented. "
"Use `ak.enforce_type` to drop the categorical type and use general merging."
)

theirlength = other.length
mylength = self.length
index = ak.index.Index64.empty(
Expand All @@ -688,6 +688,7 @@ def _reverse_merge(self, other):

content = other._mergemany([self._content])

# Fill index::0→theirlength with arange(theirlength)
assert index.nplike is self._backend.index_nplike
self._backend.maybe_kernel_error(
self._backend["awkward_IndexedArray_fill_count", index.dtype.type](
Expand All @@ -697,30 +698,27 @@ def _reverse_merge(self, other):
0,
)
)
reinterpreted_index = ak.index.Index(
self._backend.index_nplike.asarray(self.index.data),
nplike=self._backend.index_nplike,
)

# Fill index::theirlength->end with self.index[:mylength]+theirlength
assert (
index.nplike is self._backend.index_nplike
and reinterpreted_index.nplike is self._backend.index_nplike
and self.index.nplike is self._backend.index_nplike
)
self._backend.maybe_kernel_error(
self._backend[
"awkward_IndexedArray_fill",
index.dtype.type,
reinterpreted_index.dtype.type,
self.index.dtype.type,
](
index.data,
theirlength,
reinterpreted_index.data,
self.index.data,
mylength,
theirlength,
)
)
# We can directly merge with other options, but we must merge parameters
if other.is_option:
# We can directly merge with other options and indexed types, but we must merge parameters
if other.is_option or other.is_indexed:
parameters = parameters_union(self._parameters, other._parameters)
# Otherwise, this option parameters win out
else:
Expand Down Expand Up @@ -806,12 +804,27 @@ def _mergemany(self, others: Sequence[Content]) -> Content:

length_so_far += array.length

# Categoricals may only survive if all contents are categorical
if (
parameters is not None
and parameters.get("__array__") == "categorical"
):
parameters = {**parameters}
del parameters["__array__"]

tail_contents = contents[1:]
nextcontent = contents[0]._mergemany(tail_contents)
next = ak.contents.IndexedOptionArray(
nextindex, nextcontent, parameters=parameters
)

# FIXME: support categorical merging?
if parameters is not None and parameters.get("__array__") == "categorical":
raise NotImplementedError(
"merging categorical arrays is currently not implemented. "
"Use `ak.enforce_type` to drop the categorical type and use general merging."
)

if len(tail) == 0:
return next

Expand Down
Loading

0 comments on commit b2cb026

Please sign in to comment.