Skip to content

Commit

Permalink
Assign fields to records (deeply, through all structure). (#107)
Browse files Browse the repository at this point in the history
It didn't need an `inverse` function on `IndexedArrays` and `UnionArrays` because in the end, I abstracted the broadcasting from ufunc-handling so that it can also be used for field assignment.

There is also an immutable version of field-assignment as `ak.withfield`.

* [WIP] Assign fields to records (deeply, through all structure).

* Stubs for setitem_field (immutable).

* Stub for tests.

* setitem_field behavior defined for RecordArray.

* Removed 'append' method so that RecordArray and RecordType are immutable (apart from 'identities' and 'parameters').

* Fixed tests.

* RegularArray::setitem_field.

* Fix tests.

* Pull broadcasting logic out of awkward1._numpy into a general function.

* ak.withfield and Array.__setitem__ automatically get broadcasting.
  • Loading branch information
jpivarski authored Feb 4, 2020
1 parent d840a69 commit b2e0042
Show file tree
Hide file tree
Showing 18 changed files with 452 additions and 331 deletions.
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.106
0.1.107
225 changes: 7 additions & 218 deletions awkward1/_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,230 +40,19 @@ def array_ufunc(ufunc, method, inputs, kwargs, classes, functions):
if method != "__call__" or len(inputs) == 0 or "out" in kwargs:
return NotImplemented

def unwrap(x):
if isinstance(x, (awkward1.highlevel.Array, awkward1.highlevel.Record)):
return x.layout
elif isinstance(x, awkward1.highlevel.FillableArray):
return x.snapshot().layout
elif isinstance(x, awkward1.layout.FillableArray):
return x.snapshot()
elif isinstance(x, (awkward1.layout.Content, awkward1.layout.Record)):
return x
elif isinstance(x, numpy.ndarray):
if issubclass(x.dtype.type, numpy.number):
return awkward1.highlevel.Array(x).layout
else:
raise ValueError("numpy.ndarray with {0} cannot be used in {1}".format(repr(x.dtype), ufunc))
elif isinstance(x, Iterable):
return unwrap(numpy.array(x))
else:
return x

def checklength(inputs):
length = len(inputs[0])
for x in inputs[1:]:
if len(x) != length:
raise ValueError("cannot broadcast {0} of length {1} with {2} of length {3}".format(type(inputs[0]).__name__, length, type(x).__name__, len(x)))

def apply(inputs):
# handle implicit right-broadcasting (i.e. NumPy-like)
if any(isinstance(x, awkward1._util.listtypes) for x in inputs):
maxdepth = max(x.purelist_depth for x in inputs if isinstance(x, awkward1.layout.Content))
if maxdepth > 0 and all(x.purelist_isregular for x in inputs if isinstance(x, awkward1.layout.Content)):
nextinputs = []
for x in inputs:
if isinstance(x, awkward1.layout.Content):
while x.purelist_depth < maxdepth:
x = awkward1.layout.RegularArray(x, 1)
nextinputs.append(x)
if any(x is not y for x, y in zip(inputs, nextinputs)):
return apply(nextinputs)

# now all lengths must agree
checklength([x for x in inputs if isinstance(x, awkward1.layout.Content)])
inputs = [awkward1.operations.convert.tolayout(x, allowrecord=True, allowother=True) for x in inputs]

def getfunction(inputs):
signature = (ufunc,) + tuple(x.parameters.get("__class__") if isinstance(x, awkward1.layout.Content) else type(x) for x in inputs)
custom = awkward1._util.regular_functions(functions).get(signature)

# the rest of this is one switch statement
if custom is not None:
return custom(*inputs, **kwargs)

elif any(isinstance(x, awkward1._util.unknowntypes) for x in inputs):
return apply([x if not isinstance(x, awkward1._util.unknowntypes) else awkward1.layout.NumpyArray(numpy.array([], dtype=numpy.int64)) for x in inputs])

elif any(isinstance(x, awkward1.layout.NumpyArray) and x.ndim > 1 for x in inputs):
return apply([x if not (isinstance(x, awkward1.layout.NumpyArray) and x.ndim > 1) else x.regularize_shape() for x in inputs])

elif any(isinstance(x, awkward1._util.indexedtypes) for x in inputs):
return apply([x if not isinstance(x, awkward1._util.indexedtypes) else x.project() for x in inputs])

elif any(isinstance(x, awkward1._util.uniontypes) for x in inputs):
tagslist = []
length = None
for x in inputs:
if isinstance(x, awkward1._util.uniontypes):
tagslist.append(numpy.asarray(x.tags))
if length is None:
length = len(tagslist[-1])
elif length != len(tagslist[-1]):
raise ValueError("cannot broadcast UnionArray of length {0} with UnionArray of length {1}".format(length, len(tagslist[-1])))

combos = numpy.stack(tagslist, axis=-1)
combos = combos.view([(str(i), combos.dtype) for i in range(len(tagslist))]).reshape(length)

tags = numpy.empty(length, dtype=numpy.int8)
index = numpy.empty(length, dtype=numpy.int64)
contents = []
for tag, combo in enumerate(numpy.unique(combos)):
mask = (combos == combo)
tags[mask] = tag
index[mask] = numpy.arange(numpy.count_nonzero(mask))
nextinputs = []
for i, x in enumerate(inputs):
if isinstance(x, awkward1._util.uniontypes):
nextinputs.append(x[mask].project(combo[str(i)]))
elif isinstance(x, awkward1.layout.Content):
nextinputs.append(x[mask])
else:
nextinputs.append(x)
contents.append(apply(nextinputs))

tags = awkward1.layout.Index8(tags)
index = awkward1.layout.Index64(index)
return awkward1.layout.UnionArray8_64(tags, index, contents)

elif any(isinstance(x, awkward1._util.optiontypes) for x in inputs):
mask = None
for x in inputs:
if isinstance(x, (awkward1.layout.IndexedOptionArray32, awkward1.layout.IndexedOptionArray64)):
m = numpy.asarray(x.index) < 0
if mask is None:
mask = m
else:
numpy.bitwise_or(mask, m, out=mask)

nextmask = awkward1.layout.Index8(mask.view(numpy.int8))
index = numpy.full(len(mask), -1, dtype=numpy.int64)
index[~mask] = numpy.arange(len(mask) - numpy.count_nonzero(mask), dtype=numpy.int64)
index = awkward1.layout.Index64(index)
if any(not isinstance(x, awkward1._util.optiontypes) for x in inputs):
nextindex = numpy.arange(len(mask), dtype=numpy.int64)
nextindex[mask] = -1
nextindex = awkward1.layout.Index64(nextindex)

nextinputs = []
for x in inputs:
if isinstance(x, awkward1._util.optiontypes):
nextinputs.append(x.project(nextmask))
else:
nextinputs.append(awkward1.layout.IndexedOptionArray64(nextindex, x).project(nextmask))

return awkward1.layout.IndexedOptionArray64(index, apply(nextinputs))

elif any(isinstance(x, awkward1._util.listtypes) for x in inputs):
if all(isinstance(x, awkward1.layout.RegularArray) or not isinstance(x, awkward1._util.listtypes) for x in inputs):
maxsize = max([x.size for x in inputs if isinstance(x, awkward1.layout.RegularArray)])
for x in inputs:
if isinstance(x, awkward1.layout.RegularArray):
if maxsize > 1 and x.size == 1:
tmpindex = awkward1.layout.Index64(numpy.repeat(numpy.arange(len(x), dtype=numpy.int64), maxsize))
nextinputs = []
for x in inputs:
if isinstance(x, awkward1.layout.RegularArray):
if maxsize > 1 and x.size == 1:
nextinputs.append(awkward1.layout.IndexedArray64(tmpindex, x.content).project())
elif x.size == maxsize:
nextinputs.append(x.content)
else:
raise ValueError("cannot broadcast RegularArray of size {0} with RegularArray of size {1}".format(x.size, maxsize))
else:
nextinputs.append(x)
return awkward1.layout.RegularArray(apply(nextinputs), maxsize)

else:
for x in inputs:
if isinstance(x, awkward1._util.listtypes) and not isinstance(x, awkward1.layout.RegularArray):
first = x
break
offsets = first.compact_offsets64()
nextinputs = []
for x in inputs:
if isinstance(x, awkward1._util.listtypes):
nextinputs.append(x.broadcast_tooffsets64(offsets).content)
# handle implicit left-broadcasting (unlike NumPy)
elif isinstance(x, awkward1.layout.Content):
nextinputs.append(awkward1.layout.RegularArray(x, 1).broadcast_tooffsets64(offsets).content)
else:
nextinputs.append(x)
return awkward1.layout.ListOffsetArray64(offsets, apply(nextinputs))

elif any(isinstance(x, awkward1._util.recordtypes) for x in inputs):
keys = None
length = None
istuple = True
for x in inputs:
if isinstance(x, awkward1._util.recordtypes):
if keys is None:
keys = x.keys()
elif set(keys) != set(x.keys()):
raise ValueError("cannot broadcast records because keys don't match:\n {0}\n {1}".format(", ".join(sorted(keys)), ", ".join(sorted(x.keys()))))
if length is None:
length = len(x)
elif length != len(x):
raise ValueError("cannot broadcast RecordArray of length {0} with RecordArray of length {1}".format(length, len(x)))
if not x.istuple:
istuple = False

if len(keys) == 0:
return awkward1.layout.RecordArray(length, istuple)
else:
contents = []
for key in keys:
contents.append(apply([x if not isinstance(x, awkward1._util.recordtypes) else x[key] for x in inputs]))
return awkward1.layout.RecordArray(contents, keys)

else:
assert all(isinstance(x, awkward1.layout.NumpyArray) or not isinstance(x, awkward1.layout.Content) for x in inputs)
result = getattr(ufunc, method)(*inputs, **kwargs)
return awkward1.layout.NumpyArray(result)

isscalar = []

def pack(inputs):
maxlen = -1
for x in inputs:
if isinstance(x, awkward1.layout.Content):
maxlen = max(maxlen, len(x))
if maxlen < 0:
maxlen = 1
nextinputs = []
for x in inputs:
if isinstance(x, awkward1.layout.Record):
index = numpy.full(maxlen, x.at, dtype=numpy.int64)
nextinputs.append(awkward1.layout.RegularArray(x.array[index], maxlen))
isscalar.append(True)
elif isinstance(x, awkward1.layout.Content):
nextinputs.append(awkward1.layout.RegularArray(x, len(x)))
isscalar.append(False)
else:
nextinputs.append(x)
isscalar.append(True)
return nextinputs

def unpack(x):
if all(isscalar):
if len(x) == 0:
return x.getitem_nothing().getitem_nothing()
else:
return x[0][0]
return lambda: custom(*inputs, **kwargs)
elif all(isinstance(x, awkward1.layout.NumpyArray) or not isinstance(x, awkward1.layout.Content) for x in inputs):
return lambda: awkward1.layout.NumpyArray(getattr(ufunc, method)(*inputs, **kwargs))
else:
if len(x) == 0:
return x.getitem_nothing()
else:
return x[0]
return None

return awkward1._util.wrap(unpack(apply(pack([unwrap(x) for x in inputs]))), classes, functions)
return awkward1._util.wrap(awkward1._util.broadcast_and_apply(inputs, getfunction), classes, functions)

try:
NDArrayOperatorsMixin = numpy.lib.mixins.NDArrayOperatorsMixin
Expand Down
Loading

0 comments on commit b2e0042

Please sign in to comment.