Skip to content

Commit

Permalink
Start using RegularArray everywhere it needs to be used. (#24)
Browse files Browse the repository at this point in the history
This check-in includes a commented-out attempt at `lower_getitem_nothing` in `awkward1._numba.array.numpyarray`. In the next PR, it will be gone. I concluded that there is no non-dependent type inference (where array dimensionality is part of the type but the length is not) for `__getitem__` of arrays for `NumpyArray` because an empty array would reduce the dimensionality by one yet return nothing, but reducing the dimensionality of a one-dimensional `NumpyArray` would return a scalar, which is not nothing. Whereas the C++ implementation could be fully transferred from a `ListArray` wrapper to a `RegularArray` wrapper (because it is dynamically typed, ironically), the Numba implementation could not (because it is not dynamically typed). The Numba implementation only wraps with `RegularArray` if the types are sound.

* Start using RegularArray everywhere it needs to be used.

* [skip ci] ListArray64 wrapper -> RegularArray wrapper partly works.

* [skip ci] ListArray64 wrapper -> RegularArray wrapper still only works partially.

* ListArray64 wrapper -> RegularArray wrapper now works

* RegularArray::tojson_part and fix EmptyArray::tojson_part.

* Centralize lower_getitem_tuple and lower_getitem_other.

* Avoid selecting types based on a value (length of array) in Content::getitem(Slice).

* getitem_tuple for Numba mirrors C++ as well as possible: if the 'outtpe' is 'NumpyArrayType', then you're going to have to 'lower_getitem_int'; 'lower_getitem_nothing' is impossible.

* ListArray and ListOffsetArray::getitem_next(array, not advanced) returns a RegularArray now (C++ and Numba).

* ListArray and ListOffsetArray::getitem_next(array) returns the same shape of RegularArrays as the index array.

* Now newaxis uses RegularArray, too.

* Ambiguous identities in ListArray are now refused with an error.

* [skip ci] writing awkward_identity*_from_listoffsetarray*.

* ListOffsetArray now uses awkward_identity*_from_listoffsetarray*.

* Addressed the FIXMEs in getitem.cpp; ready to merge PR.
  • Loading branch information
jpivarski authored Nov 20, 2019
1 parent a0e669a commit 4ce3970
Show file tree
Hide file tree
Showing 40 changed files with 584 additions and 348 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
* [ ] Start the `awkward → awkward0`, `awkward1 → awkward` transition.
* [ ] Translation to and from Apache Arrow and Parquet in C++.
* [ ] Persistence to any medium that stores named binary blobs, as before, but accessible via C++ (especially for writing). The persistence format might differ slightly from the existing one (break backward compatibility, if needed).
* [ ] Universal `array.get[...]` as a softer form of `array[...]` that skips non-existent indexes, rather than raising errors.
* [ ] Universal `array.get[...]` as a softer form of `array[...]` that inserts `None` for non-existent indexes, rather than raising errors.
* [ ] Explicit interface with [NumExpr](https://numexpr.readthedocs.io/en/latest/index.html).

### At some point in the future
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.23
0.1.24
7 changes: 7 additions & 0 deletions awkward1/_numba/array/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ def carry(self):
def lower_len(self):
return lower_len

@property
def lower_getitem_nothing(self):
return lower_getitem_nothing

@property
def lower_getitem_range(self):
return lower_getitem_range
Expand Down Expand Up @@ -95,6 +99,9 @@ def box(tpe, val, c):
def lower_len(context, builder, sig, args):
return context.get_constant(numba.intp, 0)

def lower_getitem_nothing(context, builder, tpe, val):
return val

@numba.extending.lower_builtin(operator.getitem, EmptyArrayType, numba.types.slice2_type)
def lower_getitem_range(context, builder, sig, args):
rettpe, (tpe, wheretpe) = sig.return_type, sig.args
Expand Down
34 changes: 12 additions & 22 deletions awkward1/_numba/array/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def getitem_tuple(self, wheretpe):
return outtpe.getitem_int()

def getitem_next(self, wheretpe, isadvanced):
import awkward1._numba.array.listoffsetarray
import awkward1._numba.array.regulararray
if len(wheretpe.types) == 0:
return self
headtpe = wheretpe.types[0]
Expand All @@ -76,7 +76,7 @@ def getitem_next(self, wheretpe, isadvanced):
raise NotImplementedError("array.ndim != 1")
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, True)
if not isadvanced:
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
return awkward1._numba.array.regulararray.RegularArrayType(contenttpe, self.idtpe)
else:
return contenttpe

Expand All @@ -90,6 +90,10 @@ def carry(self):
def lower_len(self):
return lower_len

@property
def lower_getitem_nothing(self):
return content.lower_getitem_nothing

@property
def lower_getitem_int(self):
return lower_getitem_int
Expand Down Expand Up @@ -222,31 +226,19 @@ def lower_getitem_range(context, builder, sig, args):

@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.BaseTuple)
def lower_getitem_tuple(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args

wheretpe, whereval = util.preprocess_slicetuple(context, builder, wheretpe, whereval)
nexttpe, nextval = util.wrap_for_slicetuple(context, builder, arraytpe, arrayval)

outtpe = nexttpe.getitem_next(wheretpe, False)
outval = nexttpe.lower_getitem_next(context, builder, nexttpe, wheretpe, nextval, whereval, None)

return outtpe.lower_getitem_int(context, builder, rettpe(outtpe, numba.int64), (outval, context.get_constant(numba.int64, 0)))
return content.lower_getitem_tuple(context, builder, sig, args)

@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.Array)
@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.List)
@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.ArrayCompatible)
@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.EllipsisType)
@numba.extending.lower_builtin(operator.getitem, ListArrayType, type(numba.typeof(numpy.newaxis)))
def lower_getitem_other(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args
wrappedtpe = numba.types.Tuple((wheretpe,))
wrappedval = context.make_tuple(builder, wrappedtpe, (whereval,))
return lower_getitem_tuple(context, builder, rettpe(arraytpe, wrappedtpe), (arrayval, wrappedval))
return content.lower_getitem_other(context, builder, sig, args)

def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval, advanced):
import awkward1._numba.array.listoffsetarray
import awkward1._numba.array.regulararray

if len(wheretpe.types) == 0:
return arrayval
Expand Down Expand Up @@ -400,10 +392,8 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,

nextcarry = util.newindex64(context, builder, numba.int64, lencarry)
nextadvanced = util.newindex64(context, builder, numba.int64, lencarry)
nextoffsets = util.newindex(arraytpe.indexname, context, builder, numba.int64, lenoffsets)
util.call(context, builder, kernel,
(util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
util.arrayptr(context, builder, util.index64tpe, nextcarry),
(util.arrayptr(context, builder, util.index64tpe, nextcarry),
util.arrayptr(context, builder, util.index64tpe, nextadvanced),
util.arrayptr(context, builder, arraytpe.startstpe, proxyin.starts),
util.arrayptr(context, builder, arraytpe.stopstpe, proxyin.stops),
Expand All @@ -421,10 +411,10 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
contenttpe = nexttpe.getitem_next(tailtpe, True)
contentval = nexttpe.lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, nextadvanced)

outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.indexname), contenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.regulararray.RegularArrayType(contenttpe, arraytpe.idtpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.offsets = nextoffsets
proxyout.content = contentval
proxyout.size = lenflathead
if outtpe.idtpe != numba.none:
proxyout.id = awkward1._numba.identity.lower_getitem_any(context, builder, outtpe.idtpe, util.index64tpe, proxyin.id, flathead)
return proxyout._getvalue()
Expand Down
31 changes: 11 additions & 20 deletions awkward1/_numba/array/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def getitem_tuple(self, wheretpe):
return out.getitem_int()

def getitem_next(self, wheretpe, isadvanced):
import awkward1._numba.array.regulararray
if len(wheretpe.types) == 0:
return self
headtpe = wheretpe.types[0]
Expand All @@ -70,7 +71,7 @@ def getitem_next(self, wheretpe, isadvanced):
raise NotImplementedError("array.ndim != 1")
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, True)
if not isadvanced:
return ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
return awkward1._numba.array.regulararray.RegularArrayType(contenttpe, self.idtpe)
else:
return contenttpe

Expand All @@ -85,6 +86,10 @@ def carry(self):
def lower_len(self):
return lower_len

@property
def lower_getitem_nothing(self):
return content.lower_getitem_nothing

@property
def lower_getitem_int(self):
return lower_getitem_int
Expand Down Expand Up @@ -219,27 +224,15 @@ def lower_getitem_range(context, builder, sig, args):

@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.BaseTuple)
def lower_getitem_tuple(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args

wheretpe, whereval = util.preprocess_slicetuple(context, builder, wheretpe, whereval)
nexttpe, nextval = util.wrap_for_slicetuple(context, builder, arraytpe, arrayval)

outtpe = nexttpe.getitem_next(wheretpe, False)
outval = nexttpe.lower_getitem_next(context, builder, nexttpe, wheretpe, nextval, whereval, None)
return outtpe.lower_getitem_int(context, builder, rettpe(outtpe, numba.int64), (outval, context.get_constant(numba.int64, 0)))
return content.lower_getitem_tuple(context, builder, sig, args)

@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.Array)
@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.List)
@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.ArrayCompatible)
@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.EllipsisType)
@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, type(numba.typeof(numpy.newaxis)))
def lower_getitem_other(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args
wrappedtpe = numba.types.Tuple((wheretpe,))
wrappedval = context.make_tuple(builder, wrappedtpe, (whereval,))
return lower_getitem_tuple(context, builder, rettpe(arraytpe, wrappedtpe), (arrayval, wrappedval))
return content.lower_getitem_other(context, builder, sig, args)

def starts_stops(context, builder, offsetstpe, offsetsval, lenstarts, lenoffsets):
proxyslicestarts = numba.cgutils.create_struct_proxy(numba.types.slice2_type)(context, builder)
Expand Down Expand Up @@ -411,10 +404,8 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,

nextcarry = util.newindex64(context, builder, numba.int64, lencarry)
nextadvanced = util.newindex64(context, builder, numba.int64, lencarry)
nextoffsets = util.newindex(arraytpe.indexname, context, builder, numba.int64, lenoffsets)
util.call(context, builder, kernel,
(util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
util.arrayptr(context, builder, util.index64tpe, nextcarry),
(util.arrayptr(context, builder, util.index64tpe, nextcarry),
util.arrayptr(context, builder, util.index64tpe, nextadvanced),
util.arrayptr(context, builder, arraytpe.offsetstpe, starts),
util.arrayptr(context, builder, arraytpe.offsetstpe, stops),
Expand All @@ -432,10 +423,10 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
contenttpe = nexttpe.getitem_next(tailtpe, True)
contentval = nexttpe.lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, nextadvanced)

outtpe = ListOffsetArrayType(util.indextpe(arraytpe.indexname), contenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.regulararray.RegularArrayType(contenttpe, arraytpe.idtpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.offsets = nextoffsets
proxyout.content = contentval
proxyout.size = lenflathead
if outtpe.idtpe != numba.none:
proxyout.id = awkward1._numba.identity.lower_getitem_any(context, builder, outtpe.idtpe, util.index64tpe, proxyin.id, flathead)
return proxyout._getvalue()
Expand Down
38 changes: 38 additions & 0 deletions awkward1/_numba/array/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def carry(self):
def lower_len(self):
return lower_len

@property
def lower_getitem_nothing(self):
return None

@property
def lower_getitem_int(self):
return lower_getitem
Expand Down Expand Up @@ -119,6 +123,40 @@ def lower_len(context, builder, sig, args):
proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
return numba.targets.arrayobj.array_len(context, builder, numba.intp(tpe.arraytpe), (proxyin.array,))

# def lower_getitem_nothing(context, builder, tpe, val):
# import awkward1._numba.identity
#
# proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
#
# proxyslice = numba.cgutils.create_struct_proxy(numba.types.slice2_type)(context, builder)
# proxyslice.start = context.get_constant(numba.intp, 0)
# proxyslice.stop = context.get_constant(numba.intp, 0)
# proxyslice.step = context.get_constant(numba.intp, 1)
# emptyslice = proxyslice._getvalue()
# emptyarray = numba.targets.arrayobj.getitem_arraynd_intp(context, builder, tpe.arraytpe(tpe.arraytpe, numba.types.slice2_type), (proxyin.array, emptyslice))
#
# if tpe.arraytpe.ndim > 1:
# shapetpe = numba.types.Tuple((numba.intp,) * tpe.arraytpe.ndim)
# shapeval = numba.targets.arrayobj.make_array(tpe.arraytpe)(context, builder, proxyin.array).shape
#
# newshapetpe = numba.types.Tuple((numba.intp,) * (tpe.arraytpe.ndim - 1))
# newshapeval = context.make_tuple(builder, newshapetpe, tuple(builder.extract_value(shapeval, i) for i in range(tpe.arraytpe.ndim - 1)))
#
# arraytpe = numba.types.Array(tpe.arraytpe.dtype, tpe.arraytpe.ndim - 1, tpe.arraytpe.layout)
# arrayval = numba.targets.arrayobj.array_reshape(context, builder, arraytpe(tpe.arraytpe, newshapetpe), (proxyin.array, newshapeval))
#
# else:
# arraytpe = tpe.arraytpe
# arrayval = emptyarray
#
# outtpe = NumpyArrayType(arraytpe, tpe.idtpe)
# proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
# proxyout.array = arrayval
# if tpe.idtpe != numba.none:
# proxyout.id = awkward1._numba.identity.lower_getitem_any(context, builder, tpe.idtpe, numba.types.slice2_type, proxyin.id, emptyslice)
#
# return proxyout._getvalue()

@numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.Integer)
@numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.SliceType)
@numba.extending.lower_builtin(operator.getitem, NumpyArrayType, numba.types.Array)
Expand Down
21 changes: 6 additions & 15 deletions awkward1/_numba/array/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def carry(self):
def lower_len(self):
return lower_len

@property
def lower_getitem_nothing(self):
return content.lower_getitem_nothing

@property
def lower_getitem_int(self):
return lower_getitem_int
Expand Down Expand Up @@ -188,28 +192,15 @@ def lower_getitem_range(context, builder, sig, args):

@numba.extending.lower_builtin(operator.getitem, RegularArrayType, numba.types.BaseTuple)
def lower_getitem_tuple(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args

wheretpe, whereval = util.preprocess_slicetuple(context, builder, wheretpe, whereval)
nexttpe, nextval = util.wrap_for_slicetuple(context, builder, arraytpe, arrayval)

outtpe = nexttpe.getitem_next(wheretpe, False)
outval = nexttpe.lower_getitem_next(context, builder, nexttpe, wheretpe, nextval, whereval, None)

return outtpe.lower_getitem_int(context, builder, rettpe(outtpe, numba.int64), (outval, context.get_constant(numba.int64, 0)))
return content.lower_getitem_tuple(context, builder, sig, args)

@numba.extending.lower_builtin(operator.getitem, RegularArrayType, numba.types.Array)
@numba.extending.lower_builtin(operator.getitem, RegularArrayType, numba.types.List)
@numba.extending.lower_builtin(operator.getitem, RegularArrayType, numba.types.ArrayCompatible)
@numba.extending.lower_builtin(operator.getitem, RegularArrayType, numba.types.EllipsisType)
@numba.extending.lower_builtin(operator.getitem, RegularArrayType, type(numba.typeof(numpy.newaxis)))
def lower_getitem_other(context, builder, sig, args):
rettpe, (arraytpe, wheretpe) = sig.return_type, sig.args
arrayval, whereval = args
wrappedtpe = numba.types.Tuple((wheretpe,))
wrappedval = context.make_tuple(builder, wrappedtpe, (whereval,))
return lower_getitem_tuple(context, builder, rettpe(arraytpe, wrappedtpe), (arrayval, wrappedval))
return content.lower_getitem_other(context, builder, sig, args)

def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval, advanced):
if len(wheretpe.types) == 0:
Expand Down
Loading

0 comments on commit 4ce3970

Please sign in to comment.