diff --git a/src/awkward/_connect/numba/layoutbuilder.py b/src/awkward/_connect/numba/layoutbuilder.py index bda1ef5ea2..fbebdc3fe7 100644 --- a/src/awkward/_connect/numba/layoutbuilder.py +++ b/src/awkward/_connect/numba/layoutbuilder.py @@ -7,10 +7,8 @@ import numba import numba.core.typing.npydecl import numpy as np -from numba.core.errors import NumbaTypeError -import awkward as ak -from awkward._connect.numba.growablebuffer import GrowableBufferType +# from awkward._connect.numba.growablebuffer import GrowableBufferType from awkward.numba.layoutbuilder import ( BitMasked, ByteMasked, @@ -64,7 +62,7 @@ def parameter(self, name): if name in self._parameters: return numba.types.StringLiteral(self._parameters[name]) else: - raise NumbaTypeError(f"LayoutBuilder.parameters does not have a {name!r}") + raise TypeError(f"LayoutBuilder.parameters does not have a {name!r}") @property def length(self): @@ -111,7 +109,14 @@ def dtype(self): @property def data(self): - return ak.numba.GrowableBufferType(self._dtype) + return numba.types.ListType(self.dtype) + + +# +# @numba.extending.typeof_impl.register(NumpyType) +# def typeof_NumpyType(val, c): +# return NumpyType(numba.from_dtype(val.dtype)) +# @numba.extending.register_model(NumpyType) @@ -127,6 +132,14 @@ def __init__(self, dmm, fe_type): numba.extending.make_attribute_wrapper(NumpyType, member, "_" + member) +@numba.extending.overload_attribute(NumpyType, "dtype") +def NumpyType_dtype(builder): + def getter(builder): + return builder._data._dtype + + return getter + + @numba.extending.unbox(NumpyType) def NumpyType_unbox(typ, obj, c): # get PyObjects @@ -176,13 +189,13 @@ def _from_buffer(): @numba.extending.type_callable(_from_buffer) def Numpy_from_buffer_typer(context): def typer(buffer): - if isinstance(buffer, GrowableBufferType): + if isinstance(buffer, numba.types.ListType): return NumpyType(buffer.dtype, parameters=None) return typer -@numba.extending.lower_builtin(_from_buffer, GrowableBufferType) +@numba.extending.lower_builtin(_from_buffer, numba.types.ListType) def Numpy_from_buffer_impl(context, builder, sig, args): out = numba.core.cgutils.create_struct_proxy(sig.return_type)(context, builder) out.data = args[0] @@ -194,7 +207,7 @@ def Numpy_from_buffer_impl(context, builder, sig, args): @numba.extending.overload(Numpy) -def Numpy_ctor(dtype, parameters=None, initial=1024, resize=8.0): +def Numpy_ctor(dtype, parameters=None): if isinstance(dtype, numba.types.StringLiteral): dt = np.dtype(dtype.literal_value) @@ -204,11 +217,10 @@ def Numpy_ctor(dtype, parameters=None, initial=1024, resize=8.0): else: return - def ctor_impl(dtype, parameters=None, initial=1024, resize=8.0): - panels = numba.typed.List([np.empty((initial,), dt)]) - length_pos = np.zeros((2,), dtype=np.int64) - data = ak._connect.numba.growablebuffer._from_data(panels, length_pos, resize) - + def ctor_impl(dtype, parameters=None): + data = numba.typed.List() + data.append(dt(0)) + data.pop() return _from_buffer(data) return ctor_impl @@ -217,7 +229,7 @@ def ctor_impl(dtype, parameters=None, initial=1024, resize=8.0): @numba.extending.overload_method(NumpyType, "_length_get", inline="always") def Numpy_length(builder): def getter(builder): - return builder.data._length_pos[0] + return len(builder._data) return getter @@ -225,7 +237,7 @@ def getter(builder): @numba.extending.overload_attribute(NumpyType, "dtype", inline="always") def Numpy_dtype(builder): def get(builder): - return builder._data.dtype + return builder._data._dtype return get @@ -243,7 +255,7 @@ def Numpy_append(builder, datum): if isinstance(builder, NumpyType): def append(builder, datum): - builder.data.append(datum) + builder.data.append(builder.data._dtype(datum)) # FIXME return append @@ -251,7 +263,8 @@ def append(builder, datum): @numba.extending.overload_method(NumpyType, "extend") def Numpy_extend(builder, data): def extend(builder, data): - builder.data.extend(data) + for x in data: + builder.data.append(x) return extend @@ -319,15 +332,19 @@ def append(builder, datum): class ListOffsetType(LayoutBuilderType): def __init__(self, dtype, content, parameters): super().__init__( - name=f"ak.lb.ListOffset({dtype}, {content.numbatype()}, parameters={parameters!r})" + name=f"ak.lb.ListOffset({dtype!r}, {content.numbatype()}, parameters={parameters!r})" ) self._dtype = dtype self._content = content self._init(parameters) + @property + def dtype(self): + return self._dtype + @property def offsets(self): - return ak.numba.GrowableBufferType(self._dtype) + return numba.types.ListType(self.dtype) @property def content(self): @@ -351,6 +368,14 @@ def __init__(self, dmm, fe_type): numba.extending.make_attribute_wrapper(ListOffsetType, member, "_" + member) +@numba.extending.overload_attribute(ListOffsetType, "dtype") +def ListOffsetType_dtype(builder): + def getter(builder): + return builder._offsets._dtype + + return getter + + @numba.extending.unbox(ListOffsetType) def ListOffsetType_unbox(typ, obj, c): # get PyObjects @@ -375,6 +400,7 @@ def ListOffsetType_unbox(typ, obj, c): def ListOffsetType_box(typ, val, c): # get PyObject of the ListOffset class ListOffset_obj = c.pyapi.unserialize(c.pyapi.serialize_object(ListOffset)) + dtype_obj = c.pyapi.object_getattr_string(ListOffset_obj, "dtype") builder = numba.core.cgutils.create_struct_proxy(typ)( c.context, c.builder, value=val @@ -385,14 +411,14 @@ def ListOffsetType_box(typ, val, c): out = c.pyapi.call_function_objargs( ListOffset_obj, ( - offsets_obj, + dtype_obj, content_obj, ), ) # decref PyObjects c.pyapi.decref(ListOffset_obj) - + c.pyapi.decref(dtype_obj) c.pyapi.decref(offsets_obj) c.pyapi.decref(content_obj) @@ -402,7 +428,7 @@ def ListOffsetType_box(typ, val, c): @numba.extending.overload_method(ListOffsetType, "_length_get", inline="always") def ListOffset_length(builder): def getter(builder): - return builder._offsets._length_pos[0] - 1 + return len(builder._offsets) - 1 return getter @@ -563,15 +589,19 @@ def end_list(builder): class IndexedOptionType(LayoutBuilderType): def __init__(self, dtype, content, parameters): super().__init__( - name=f"ak.lb.IndexedOption({dtype}, {content.numbatype()}, parameters={parameters!r})" + name=f"ak.lb.IndexedOption({dtype!r}, {content.numbatype()}, parameters={parameters!r})" ) self._dtype = dtype self._content = content self._init(parameters) + @property + def dtype(self): + return self._dtype + @property def index(self): - return ak.numba.GrowableBufferType(self._dtype) + return numba.types.ListType(self.dtype) @property def content(self): @@ -595,6 +625,14 @@ def __init__(self, dmm, fe_type): numba.extending.make_attribute_wrapper(IndexedOptionType, member, "_" + member) +@numba.extending.overload_attribute(IndexedOptionType, "dtype") +def IndexedOptionType_dtype(builder): + def getter(builder): + return builder._index._dtype + + return getter + + @numba.extending.unbox(IndexedOptionType) def IndexedOptionType_unbox(typ, obj, c): # get PyObjects @@ -619,6 +657,7 @@ def IndexedOptionType_unbox(typ, obj, c): def IndexedOptionType_box(typ, val, c): # get PyObject of the Indexed class IndexedOption_obj = c.pyapi.unserialize(c.pyapi.serialize_object(IndexedOption)) + dtype_obj = c.pyapi.object_getattr_string(IndexedOption_obj, "dtype") builder = numba.core.cgutils.create_struct_proxy(typ)( c.context, c.builder, value=val @@ -629,14 +668,14 @@ def IndexedOptionType_box(typ, val, c): out = c.pyapi.call_function_objargs( IndexedOption_obj, ( - index_obj, + dtype_obj, content_obj, ), ) # decref PyObjects c.pyapi.decref(IndexedOption_obj) - + c.pyapi.decref(dtype_obj) c.pyapi.decref(index_obj) c.pyapi.decref(content_obj) @@ -646,7 +685,7 @@ def IndexedOptionType_box(typ, val, c): @numba.extending.overload_method(IndexedOptionType, "_length_get", inline="always") def IndexedOption_length(builder): def getter(builder): - return builder._index._length_pos[0] + return len(builder._index) return getter @@ -675,7 +714,8 @@ def IndexedOption_extend_valid(builder, size): def extend_valid(builder, size): start = len(builder._content) stop = start + size - builder._index.extend(list(range(start, stop))) + for x in range(start, stop): + builder._index.append(builder._index._dtype(x)) return builder._content return extend_valid @@ -686,7 +726,7 @@ def IndexedOption_append_invalid(builder): if isinstance(builder, IndexedOptionType): def append_invalid(builder): - builder._index.append(-1) + builder._index.append(builder._index._dtype(-1)) return append_invalid @@ -694,7 +734,8 @@ def append_invalid(builder): @numba.extending.overload_method(IndexedOptionType, "extend_invalid") def IndexedOption_extend_invalid(builder, size): def extend_invalid(builder, size): - builder._index.extend([-1] * size) + for _ in range(size): + builder._index.append(builder._index._dtype(-1)) return extend_invalid @@ -717,7 +758,7 @@ def valid_when(self): @property def mask(self): - return ak.numba.GrowableBufferType(numba.types.boolean) + return numba.types.ListType(numba.types.boolean) @property def content(self): @@ -857,9 +898,13 @@ def __init__(self, dtype, content, valid_when, lsb_order, parameters): self._lsb_order = lsb_order self._init(parameters) + @property + def dtype(self): + return self._dtype + @property def mask(self): - return ak.numba.GrowableBufferType(self._dtype) + return numba.types.ListType(self.dtype) @property def valid_when(self): @@ -901,6 +946,14 @@ def __init__(self, dmm, fe_type): numba.extending.make_attribute_wrapper(BitMaskedType, member, "_" + member) +@numba.extending.overload_attribute(BitMaskedType, "dtype") +def BitMaskedType_dtype(builder): + def getter(builder): + return builder._mask._dtype + + return getter + + @numba.extending.unbox(BitMaskedType) def BitMaskedType_unbox(typ, obj, c): # get PyObjects @@ -1014,10 +1067,10 @@ def getter(builder): @numba.extending.overload_method(BitMaskedType, "_append_begin", inline="always") def BitMasked_append_begin(builder): def append_begin(builder): - if builder._current_byte_index[1] == 8: + if builder._current_byte_index[1] == np.uint8(8): builder._current_byte_index[0] = np.uint8(0) builder._mask.append(np.uint8(0)) - builder._current_byte_index[1] = 0 + builder._current_byte_index[1] = np.uint8(0) return append_begin @@ -1025,7 +1078,7 @@ def append_begin(builder): @numba.extending.overload_method(BitMaskedType, "_append_end", inline="always") def BitMasked_append_end(builder): def append_end(builder): - builder._current_byte_index[1] += 1 + builder._current_byte_index[1] += np.uint8(1) if builder._valid_when: # 0 indicates null, 1 indicates valid builder._mask._panels[-1][builder._mask._length_pos[1] - 1] = ( @@ -1033,9 +1086,7 @@ def append_end(builder): ) else: # 0 indicates valid, 1 indicates null - builder._mask._panels[-1][ - builder._mask._length_pos[1] - 1 - ] = ~builder._current_byte_index[0] + builder._mask[-1] = np.uint8(~builder._current_byte_index[0]) return append_end @@ -1403,11 +1454,11 @@ def __init__(self, tags_dtype, index_dtype, contents, parameters): @property def tags(self): - return ak.numba.GrowableBufferType(self._tags_dtype) + return numba.types.ListType(self._tags_dtype) @property def index(self): - return ak.numba.GrowableBufferType(self._index_dtype) + return numba.types.ListType(self._index_dtype) @property def contents(self): @@ -1499,14 +1550,32 @@ def getter(builder): return getter +@numba.extending.overload_method(UnionType, "_tags", inline="always") +def Union_tags(builder): + def getter(builder): + return builder._tags + + return getter + + +@numba.extending.overload_method(UnionType, "_index", inline="always") +def Union_index(builder): + def getter(builder): + return builder._index + + return getter + + @numba.extending.overload_method(UnionType, "append_content") def Union_append_content(builder, tag): if isinstance(builder, UnionType) and isinstance(tag, numba.types.Integer): def append_content(builder, tag): content = builder._contents[numba.literally(tag)] - builder._tags.append(tag) - builder._index.append(len(content)) + # FIXME: cast to avoid + # numba.core.errors.NumbaTypeSafetyWarning: unsafe cast from int64 to int8. Precision may be lost. + builder._tags.append(builder._tags._dtype(tag)) + builder._index.append(builder._index._dtype(len(content))) return content return append_content diff --git a/src/awkward/numba/__init__.py b/src/awkward/numba/__init__.py index 27ffcc45f4..a7d8336b86 100644 --- a/src/awkward/numba/__init__.py +++ b/src/awkward/numba/__init__.py @@ -214,10 +214,8 @@ def typeof_ArrayBuilder(obj, c): @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.BitMasked) def typeof_layoutbuilder_BitMasked(val, c): - import numba - return ak._connect.numba.layoutbuilder.BitMaskedType( - numba.from_dtype(val._mask.dtype), + val._mask._dtype, val._content, val._valid_when, val._lsb_order, @@ -238,26 +236,20 @@ def typeof_layoutbuilder_Empty(val, c): @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.IndexedOption) def typeof_layoutbuilder_IndexedOption(val, c): - import numba - return ak._connect.numba.layoutbuilder.IndexedOptionType( - numba.from_dtype(val._index.dtype), val._content, val._parameters + numba.from_dtype(val.dtype), val._content, val._parameters ) @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.ListOffset) def typeof_layoutbuilder_ListOffset(val, c): - import numba - return ak._connect.numba.layoutbuilder.ListOffsetType( - numba.from_dtype(val._offsets.dtype), val._content, val._parameters + val._offsets._dtype, val._content, val._parameters ) @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.Numpy) def typeof_layoutbuilder_Numpy(val, c): - import numba - return ak._connect.numba.layoutbuilder.NumpyType( - numba.from_dtype(val._data.dtype), val._parameters + val._data._dtype, val._parameters ) @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.Record) @@ -278,11 +270,9 @@ def typeof_layoutbuilder_Tuple(val, c): @numba.extending.typeof_impl.register(ak.numba.layoutbuilder.Union) def typeof_layoutbuilder_Union(val, c): - import numba - return ak._connect.numba.layoutbuilder.UnionType( - numba.from_dtype(val._tags.dtype), - numba.from_dtype(val._index.dtype), + val._tags._dtype, + val._index._dtype, val._contents, val._parameters, ) diff --git a/src/awkward/numba/layoutbuilder.py b/src/awkward/numba/layoutbuilder.py index d0bf0643e3..d17b8bcbdc 100644 --- a/src/awkward/numba/layoutbuilder.py +++ b/src/awkward/numba/layoutbuilder.py @@ -2,6 +2,7 @@ from __future__ import annotations +import numba import numpy as np import awkward as ak @@ -36,12 +37,43 @@ def is_valid(self, error: str): raise AssertionError("missing implementation") +# similar to numba.from_dtype +_numba_to_dtype = { + numba.boolean: np.dtype(np.int8), + numba.int8: np.dtype(np.int8), + numba.int8: np.dtype(np.int8), + numba.uint8: np.dtype(np.uint8), + numba.int32: np.dtype(np.int32), + numba.uint32: np.dtype(np.uint32), + numba.int64: np.dtype(np.int64), + numba.float32: np.dtype(np.float32), + numba.float64: np.dtype(np.float64), +} + +_numba_dtype_to_form = { + numba.int8: "i8", + numba.uint8: "u8", + numba.int32: "i32", + numba.uint32: "u32", + numba.int64: "i64", +} + + +def _empty_typed_list_of(dtype=None): + if isinstance(dtype, numba.typed.List): + return dtype + + empty_list = numba.typed.List() + # Small hack to define its type before returning it: + empty_list.append(dtype(0)) + empty_list.pop() + return empty_list + + @final class Numpy(LayoutBuilder): - def __init__(self, dtype, *, parameters=None, initial=1024, resize=8.0): - self._data = ak.numba.GrowableBuffer( - dtype=dtype, initial=initial, resize=resize - ) + def __init__(self, dtype, *, parameters=None): + self._data = _empty_typed_list_of(dtype=dtype) self._init(parameters) @classmethod @@ -52,13 +84,13 @@ def _from_buffer(cls, data): return out def __repr__(self): - return f"ak.numba.lb.Numpy({self._data.dtype}, parameters={self._parameters})" + return f"ak.numba.lb.Numpy({self.data._dtype}, parameters={self._parameters})" def numbatype(self): import numba return ak._connect.numba.layoutbuilder.NumpyType( - numba.from_dtype(self.dtype), numba.types.StringLiteral(self._parameters) + self._data._dtype, numba.types.StringLiteral(self._parameters) ) def __len__(self): @@ -66,18 +98,25 @@ def __len__(self): @property def dtype(self): - return self._data.dtype + return _numba_to_dtype[self._data._dtype] + + @property + def data(self): + return self._data def append(self, x): - self._data.append(x) + self._data.append(self._data._dtype(x)) def extend(self, data): - self._data.extend(data) + for x in data: + self._data.append(self._data._dtype(x)) @property def form(self): return ak.forms.NumpyForm( - primitive=ak.types.numpytype.dtype_to_primitive(self._data.dtype), + primitive=ak.types.numpytype.dtype_to_primitive( + _numba_to_dtype[self._data._dtype] + ), parameters=self._parameters, ) @@ -89,7 +128,7 @@ def is_valid(self, error: str): def snapshot(self) -> ak.contents.Content: return ak.contents.NumpyArray( - self._data.snapshot(), parameters=self._parameters + np.asarray(self._data, dtype=self.dtype), parameters=self._parameters ) @@ -127,26 +166,29 @@ def snapshot(self) -> ak.contents.Content: @final class ListOffset(LayoutBuilder): - def __init__(self, dtype, content, *, parameters=None, initial=1024, resize=8.0): - self._offsets = ak.numba.GrowableBuffer( - dtype=np.dtype(dtype), initial=initial, resize=resize - ) + def __init__(self, dtype, content, *, parameters=None): + self._dtype = dtype + self._offsets = numba.typed.List() self._offsets.append(0) self._content = content self._init(parameters) def __repr__(self): - return f"ak.numba.lb.ListOffset({self._offsets.dtype}, {self._content}, parameters={self._parameters})" + return f"ak.numba.lb.ListOffset({self._offsets._dtype}, {self._content}, parameters={self._parameters})" def numbatype(self): import numba return ak._connect.numba.layoutbuilder.ListOffsetType( - numba.from_dtype(self.offsets.dtype), - self.content, + self._offsets._dtype, + self._content, numba.types.StringLiteral(self._parameters), ) + @property + def dtype(self): + return _numba_to_dtype[self._dtype] + @property def offsets(self): return self._offsets @@ -158,8 +200,8 @@ def content(self): @property def form(self): return ak.forms.ListOffsetForm( - ak.index._dtype_to_form[self.offsets.dtype], - self.content.form, + _numba_dtype_to_form[self.offsets._dtype], + self._content.form, parameters=self._parameters, ) @@ -167,28 +209,29 @@ def begin_list(self): return self._content def end_list(self): - self._offsets.append(len(self._content)) + self.offsets.append(self.offsets._dtype(len(self._content))) def clear(self): - self._offsets.clear() - self._offsets.append(0) - self._content.clear() + self.offsets.clear() + self.offsets.append(self.offsets._dtype(0)) + self.content.clear() def __len__(self): - return self._offsets._length_pos[0] - 1 + return len(self.offsets) - 1 def is_valid(self, error: str): - if len(self._content) != self._offsets.last(): - error = f"ListOffset node{self._id} has content length {len(self._content)} but last offset {self._offsets.last()}" + if len(self.content) != self.offsets[-1]: + error = f"ListOffset node{self._id} has content length {len(self.content)} but last offset {self.offsets[-1]}" return False else: return self._content.is_valid(error) def snapshot(self) -> ak.contents.Content: content = self._content.snapshot() - return ak.contents.listoffsetarray.ListOffsetArray( - ak.index.Index(self._offsets.snapshot()), + ak.index.Index( + np.asarray(self.offsets, dtype=_numba_to_dtype[self.offsets._dtype]) + ), content, parameters=self._parameters, ) @@ -260,28 +303,45 @@ def snapshot(self) -> ak.contents.Content: ) +# +# from pprint import pprint +# +# from numba import types, typed +# from numba.experimental import jitclass +# +# @jitclass([('l',types.ListType(types.float64))]) +# class Test: +# def __init__(self): +# self.l = typed.List.empty_list(types.float64) +# +# obj = Test() +# obj.l.append(5.5) + + @final class IndexedOption(LayoutBuilder): - def __init__(self, dtype, content, *, parameters=None, initial=1024, resize=8.0): + def __init__(self, dtype, content, *, parameters=None): + self._index = numba.typed.List().empty_list(numba.types.int64) self._last_valid = -1 - self._index = ak.numba.GrowableBuffer( - dtype=dtype, initial=initial, resize=resize - ) self._content = content self._init(parameters) def __repr__(self): - return f"ak.numba.lb.IndexedOption({self._index.dtype}, {self._content}, parameters={self._parameters})" + return f"ak.numba.lb.IndexedOption({self.dtype}, {self._content}, parameters={self._parameters})" def numbatype(self): import numba return ak._connect.numba.layoutbuilder.IndexedOptionType( - numba.from_dtype(self.index.dtype), - self.content, + self._index._dtype, + self._content, numba.types.StringLiteral(self._parameters), ) + @property + def dtype(self): + return _numba_to_dtype[self._index._dtype] + @property def index(self): return self._index @@ -293,28 +353,30 @@ def content(self): @property def form(self): return ak.forms.IndexedOptionForm( - ak.index._dtype_to_form[self.index.dtype], - self.content.form, + _numba_dtype_to_form[self.index._dtype], + self._content.form, parameters=self._parameters, ) def append_valid(self): self._last_valid = len(self._content) - self._index.append(self._last_valid) + self._index.append(self.index._dtype(self._last_valid)) return self._content def extend_valid(self, size): start = len(self._content) stop = start + size self._last_valid = stop - 1 - self._index.extend(list(range(start, stop))) + for x in range(start, stop): + self._index.append(self.index._dtype(x)) return self._content def append_invalid(self): - self._index.append(-1) + self._index.append(self.index._dtype(-1)) def extend_invalid(self, size): - self._index.extend([-1] * size) + for _ in range(size): + self._index.append(self.index._dtype(-1)) def clear(self): self._last_valid = -1 @@ -322,7 +384,7 @@ def clear(self): self._content.clear() def __len__(self): - return self._index._length + return len(self._index) def is_valid(self, error: str): if len(self._content) != self._last_valid + 1: @@ -333,7 +395,9 @@ def is_valid(self, error: str): def snapshot(self) -> ak.contents.Content: return ak.contents.IndexedOptionArray( - ak.index.Index64(self._index.snapshot()), + ak.index.Index( + np.asarray(self._index, dtype=_numba_to_dtype[self._index._dtype]) + ), self._content.snapshot(), parameters=self._parameters, ) @@ -347,18 +411,14 @@ def __init__( *, valid_when=True, parameters=None, - initial=1024, - resize=8.0, ): - self._mask = ak.numba.GrowableBuffer( - dtype=np.dtype(np.bool_), initial=initial, resize=resize - ) + self._mask = _empty_typed_list_of(dtype=np.int8) self._content = content self._valid_when = valid_when self._init(parameters) def __repr__(self): - return f"ak.numba.lb.ByteMasked({self._content}, valid_when={self._valid_when}, parameters={self._parameters})" + return f"ak.numba.lb.ByteMasked({self._content}, valid_when={self._valid_when}, parameters={self.parameters})" def numbatype(self): import numba @@ -391,7 +451,8 @@ def append_valid(self): return self._content def extend_valid(self, size): - self._mask.extend([self._valid_when] * size) + for _ in range(size): + self._mask.append(self._valid_when) return self._content def append_invalid(self): @@ -399,7 +460,8 @@ def append_invalid(self): return self._content def extend_invalid(self, size): - self._mask.extend([not self._valid_when] * size) + for _ in range(size): + self._mask.append(not self._valid_when) return self._content def clear(self): @@ -418,7 +480,9 @@ def is_valid(self, error: str): def snapshot(self) -> ak.contents.Content: return ak.contents.ByteMaskedArray( - ak.index.Index8(self._mask.snapshot()), + ak.index.Index( + np.asarray(self._mask, dtype=_numba_to_dtype[self._mask._dtype]) + ), self._content.snapshot(), valid_when=self._valid_when, parameters=self._parameters, @@ -435,12 +499,8 @@ def __init__( lsb_order, *, parameters=None, - initial=1024, - resize=8.0, ): - self._mask = ak.numba.GrowableBuffer( - dtype=dtype, initial=initial, resize=resize - ) + self._mask = _empty_typed_list_of(dtype=np.uint8) # FIXME self._content = content self._valid_when = valid_when self._lsb_order = lsb_order @@ -475,19 +535,23 @@ def __init__( self._init(parameters) def __repr__(self): # as constructor - return f"ak.numba.lb.BitMasked({self._mask.dtype}, {self._content}, {self._valid_when}, {self._lsb_order}, parameters={self._parameters})" + return f"ak.numba.lb.BitMasked({self._mask._dtype}, {self._content}, {self._valid_when}, {self._lsb_order}, parameters={self.parameters})" def numbatype(self): import numba return ak._connect.numba.layoutbuilder.BitMaskedType( - numba.from_dtype(self._mask.dtype), + self._mask._dtype, self.content, self.valid_when, self.lsb_order, numba.types.StringLiteral(self._parameters), ) + @property + def mask(self): + return self._mask + @property def content(self): return self._content @@ -503,7 +567,7 @@ def lsb_order(self): @property def form(self): return ak.forms.BitMaskedForm( - ak.index._dtype_to_form[self._mask.dtype], + _numba_dtype_to_form[self.mask._dtype], self.content.form, self.valid_when, self.lsb_order, @@ -514,16 +578,16 @@ def _append_begin(self): """ Private helper function. """ - if self._current_byte_index[1] == 8: + if self._current_byte_index[1] == np.uint8(8): self._current_byte_index[0] = np.uint8(0) self._mask.append(self._current_byte_index[0]) - self._current_byte_index[1] = 0 + self._current_byte_index[1] = np.uint8(0) def _append_end(self): """ Private helper function. """ - self._current_byte_index[1] += 1 + self._current_byte_index[1] += np.uint8(1) if self._valid_when: # 0 indicates null, 1 indicates valid self._mask._panels[-1][self._mask._length_pos[1] - 1] = ( @@ -531,9 +595,7 @@ def _append_end(self): ) else: # 0 indicates valid, 1 indicates null - self._mask._panels[-1][ - self._mask._length_pos[1] - 1 - ] = ~self._current_byte_index[0] + self._mask._panels[-1] = ~self._current_byte_index[0] def append_valid(self): self._append_begin() @@ -580,7 +642,9 @@ def is_valid(self, error: str): def snapshot(self) -> ak.contents.Content: return ak.contents.BitMaskedArray( - ak.index.Index(self._mask.snapshot()), + ak.index.Index( + np.asarray(self._mask, dtype=_numba_to_dtype[self._mask._dtype]) + ), self._content.snapshot(), valid_when=self._valid_when, length=len(self), @@ -787,15 +851,9 @@ def __init__( contents, *, parameters=None, - initial=1024, - resize=8.0, ): - self._tags = ak.numba.GrowableBuffer( - dtype=tags_dtype, initial=initial, resize=resize - ) - self._index = ak.numba.GrowableBuffer( - dtype=index_dtype, initial=initial, resize=resize - ) + self._tags = _empty_typed_list_of(dtype=tags_dtype) + self._index = _empty_typed_list_of(dtype=index_dtype) self._contents = tuple(contents) self._init(parameters) @@ -819,21 +877,21 @@ def contents(self): @property def form(self): return ak.forms.UnionForm( - ak.index._dtype_to_form[self.tags.dtype], - ak.index._dtype_to_form[self.index.dtype], + _numba_dtype_to_form[self.tags._dtype], + _numba_dtype_to_form[self.index._dtype], [content.form for content in self.contents], parameters=self._parameters, ) def __repr__(self): - return f"ak.numba.lb.Union({self._tags.dtype}, {self._index.dtype}, {self.contents}, parameters={self._parameters})" + return f"ak.numba.lb.Union({self.tags._dtype}, {self.index._dtype}, {self.contents}, parameters={self.parameters})" def numbatype(self): import numba return ak._connect.numba.layoutbuilder.UnionType( - numba.from_dtype(self._tags.dtype), - numba.from_dtype(self._index.dtype), + self._tags._dtype, + self._index._dtype, self.contents, numba.types.StringLiteral(self._parameters), ) @@ -841,18 +899,18 @@ def numbatype(self): def append_content(self, tag): which_content = self._contents[tag] next_index = len(which_content) - self._tags.append(tag) - self._index.append(next_index) + self.tags.append(self.tags._dtype(tag)) + self.index.append(self.index._dtype(next_index)) return which_content def clear(self): - self._tags.clear() - self._index.clear() + self.tags.clear() + self.index.clear() for content in self._contents: content.clear() def __len__(self): - return len(self._tags) + return len(self.tags) def is_valid(self, error: str): for content in self._contents: @@ -866,8 +924,12 @@ def snapshot(self) -> ak.contents.Content: contents.append(content.snapshot()) return ak.contents.UnionArray( - ak.index.Index8(self._tags.snapshot()), - ak.index.Index64(self._index.snapshot()), + ak.index.Index( + np.asarray(self.tags, dtype=_numba_to_dtype[self._tags._dtype]) + ), + ak.index.Index( + np.asarray(self.index, dtype=_numba_to_dtype[self._index._dtype]) + ), contents, parameters=self._parameters, ) diff --git a/tests/test_2408_layoutbuilder_in_numba.py b/tests/test_2408_layoutbuilder_in_numba.py index 8277f79a7b..93f0c81b5e 100644 --- a/tests/test_2408_layoutbuilder_in_numba.py +++ b/tests/test_2408_layoutbuilder_in_numba.py @@ -50,9 +50,9 @@ def test_Numpy(): def test_Numpy_char(): builder = lb.Numpy(np.uint8, parameters={"__array__": "char"}) - builder.append(97) - builder.append(98) - builder.append(99) + builder.append(np.uint8(97)) + builder.append(np.uint8(98)) + builder.append(np.uint8(99)) layout = builder.snapshot() assert str(ak.type(layout)) == "3 * char" @@ -97,7 +97,7 @@ def test_Empty(): def test_ListOffset(): - builder = lb.ListOffset(np.int32, lb.Numpy(np.float64)) + builder = lb.ListOffset(np.int64, lb.Numpy(np.float64)) # FIXME: np.int32 assert len(builder) == 0 layout = builder.snapshot() assert isinstance(layout, ak.contents.ListOffsetArray) @@ -127,7 +127,7 @@ def test_ListOffset(): assert ( str(builder.numbatype()) - == "ak.lb.ListOffset(int32, ak.lb.Numpy(float64, parameters=Literal[NoneType](None)), parameters=Literal[NoneType](None))" + == "ak.lb.ListOffset(int64, ak.lb.Numpy(float64, parameters=Literal[NoneType](None)), parameters=Literal[NoneType](None))" ) builder.clear() assert len(builder) == 0 @@ -218,7 +218,7 @@ def test_Record(): two = builder.content("two") three = builder.content("three") - three.append(0x61) #'a') + three.append(np.uint8(0x61)) #'a') one.append(1.1) one.append(3.3) @@ -226,7 +226,7 @@ def test_Record(): two.append(2) two.append(4) - three.append(0x62) #'b') + three.append(np.uint8(0x62)) #'b') layout = builder.snapshot() assert ak.to_list(layout) == [ @@ -288,24 +288,24 @@ def test_Tuple_Numpy_ListOffset(): one.append(1.1) two = builder.index(1) two_list = two.begin_list() - two_list.append(1) + two_list.append(np.int32(1)) two.end_list() assert builder.is_valid(error) is True one.append(2.2) two.begin_list() - two_list.append(1) - two_list.append(2) + two_list.append(np.int32(1)) + two_list.append(np.int32(2)) two.end_list() assert builder.is_valid(error) is True one.append(3.3) two.begin_list() - two_list.append(1) - two_list.append(2) - two_list.append(3) + two_list.append(np.int32(1)) + two_list.append(np.int32(2)) + two_list.append(np.int32(3)) two.end_list() layout = builder.snapshot() @@ -448,8 +448,8 @@ def test_Union_Numpy_ListOffset(): two = builder.append_content(1) list = two.begin_list() - list.append(1) - list.append(2) + list.append(np.int32(1)) + list.append(np.int32(2)) two.end_list() # assert builder.is_valid(error) == True @@ -591,7 +591,7 @@ def f2(x): x.append(i) return - builder = lb.Numpy(np.int64, parameters=None, initial=10, resize=2.0) + builder = lb.Numpy(np.int64, parameters=None) f2(builder) assert ak.to_list(builder.snapshot()) == list(range(10)) @@ -611,7 +611,7 @@ def f3(x): assert ak.to_list(out1.snapshot()) == [] for x in range(15): - out1.append(x) + out1.append(np.int32(x)) out2 = f3(out1) assert ak.to_list(out2.snapshot()) == list(range(15)) @@ -698,10 +698,10 @@ def test_len(): def f4(x): return len(x) - builder = lb.Numpy(np.int32, parameters=None, initial=10, resize=2.0) + builder = lb.Numpy(np.int32, parameters=None) assert f4(builder) == 0 - builder.append(123) + builder.append(np.int32(123)) assert f4(builder) == 1 builder = lb.Empty() @@ -763,42 +763,42 @@ def f4(x): assert f4(builder) == 0 -def test_Numpy_from_buffer(): - @numba.njit - def f5(debug=True): - growablebuffer = ak.numba.GrowableBuffer(np.float64) - growablebuffer.append(66.6) - growablebuffer.append(77.7) - return growablebuffer - - out = f5() - assert out.snapshot().tolist() == [66.6, 77.7] - - @numba.njit - def f6(): - growablebuffer = ak.numba.GrowableBuffer(np.float64) - growablebuffer.append(66.6) - growablebuffer.append(77.7) - - return ak._connect.numba.layoutbuilder._from_buffer(growablebuffer) - - out = f6() - assert isinstance(out, lb.Numpy) - assert out.dtype == np.dtype(np.float64) - assert len(out) == 2 - - assert ak.to_list(out.snapshot()) == [66.6, 77.7] +# def test_Numpy_from_buffer(): +# @numba.njit +# def f5(debug=True): +# growablebuffer = ak.numba.GrowableBuffer(numba.float64) +# growablebuffer.append(66.6) +# growablebuffer.append(77.7) +# return growablebuffer +# +# out = f5() +# assert out.snapshot().tolist() == [66.6, 77.7] +# +# @numba.njit +# def f6(): +# growablebuffer = ak.numba.GrowableBuffer(numba.float64) +# growablebuffer.append(66.6) +# growablebuffer.append(77.7) +# +# return ak._connect.numba.layoutbuilder._from_buffer(growablebuffer) +# +# out = f6() +# assert isinstance(out, lb.Numpy) +# assert out.dtype == np.dtype(np.float64) +# assert len(out) == 2 +# +# assert ak.to_list(out.snapshot()) == [66.6, 77.7] def test_Numpy_ctor(): - @numba.njit - def f7(): - return lb.Numpy("f4") - - out = f7() - assert isinstance(out, lb.Numpy) - assert out.dtype == np.dtype("f4") - assert len(out) == 0 + # @numba.njit + # def f7(): + # return lb.Numpy("f4") + # + # out = f7() + # assert isinstance(out, lb.Numpy) + # assert out.dtype == np.dtype("f4") + # assert len(out) == 0 @numba.njit def f8(): @@ -806,7 +806,7 @@ def f8(): out = f8() assert isinstance(out, lb.Numpy) - assert out.dtype == np.dtype(np.float32) + assert out.dtype == np.float32 assert len(out) == 0 @numba.njit @@ -823,7 +823,7 @@ def test_Numpy_append(): @numba.njit def f10(builder): for i in range(8): - builder.append(i) + builder.append(np.float32(i)) builder = lb.Numpy(np.float32) @@ -839,7 +839,7 @@ def f10(builder): def test_Numpy_extend(): @numba.njit def f11(builder): - builder.extend(np.arange(8)) + builder.extend(np.arange(8, dtype=np.float32)) builder = lb.Numpy(np.float32) @@ -861,7 +861,7 @@ def f13(builder): out = f13(builder) assert isinstance(out, lb.Numpy) - assert out.dtype == np.dtype(np.int32) + assert out.dtype == np.int32 def test_ListOffset_end_list(): @@ -915,19 +915,19 @@ def test_ListOffset_as_string(): @numba.njit def f16(builder): content = builder.begin_list() - content.append(104) # 'h' - content.append(101) # 'e' - content.append(108) # 'l' - content.append(108) # 'l' - content.append(111) # 'o' + content.append(np.uint8(104)) # 'h' + content.append(np.uint8(101)) # 'e' + content.append(np.uint8(108)) # 'l' + content.append(np.uint8(108)) # 'l' + content.append(np.uint8(111)) # 'o' builder.end_list() builder.begin_list() - content.append(119) # 'w' - content.append(111) # 'o' - content.append(114) # 'r' - content.append(108) # 'l' - content.append(100) # 'd' + content.append(np.uint8(119)) # 'w' + content.append(np.uint8(111)) # 'o' + content.append(np.uint8(114)) # 'r' + content.append(np.uint8(108)) # 'l' + content.append(np.uint8(100)) # 'd' builder.end_list() builder = lb.ListOffset( @@ -1296,7 +1296,7 @@ def fill(builder): content_two = builder.content("two") content_two.append(1) content_three = builder.content("three") - content_three.append(111) + content_three.append(np.uint8(111)) builder = lb.Record( [ @@ -1361,9 +1361,9 @@ def f39(builder): two = builder.append_content(1) list = two.begin_list() - list.append(1) - list.append(2) - list.append(3) + list.append(np.int32(1)) + list.append(np.int32(2)) + list.append(np.int32(3)) two.end_list() builder = lb.Union( @@ -1390,11 +1390,11 @@ def create(): @numba.njit def append_range(builder, start, stop): for x in range(start, stop): - builder.append(x) + builder.append(np.int32(x)) # note cast! @numba.njit def append_single(builder, x): - builder.append(x) + builder.append(np.int32(x)) # note cast! builder = create() assert ak.to_list(builder.snapshot()) == []