Skip to content

Commit

Permalink
feat: add CUDA kernels that calculate length/sum (#2992)
Browse files Browse the repository at this point in the history
* feat: add cumulative sum CUDA kernels

* feat: add CUDA kernels (need to be fixed)

* feat: add more kernels with cumulative sum

* added exclusive_scan function and add new cuda kernels

* test: remove XFAIL for awkward_ByteMaskedArray_numnull

* feat: add python kernel definition for awkward_sorting_ranges_length

* feat: use `cupy.cumsum`

* test: remove XFAIL

* fix: check all kernels for length = 0

* fix: failing tests-spec

* fix: add missing src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_numnull_unique_64.cu

* fix: awkward_IndexedArray_numnull_parents.cu

* feat: add 2 kernels that use a temp array

* fix: use cupy.min instead of atomicMin() in awkward_ListArray_min_range

* fix: lenstarts = 0 case in awkward_ListArray_min_range

* fix: awkward_ListArray_getitem_jagged_expand

* refactor: remove changes in awkward_rduce_sum

* fix: formatting
  • Loading branch information
ManasviGoyal authored Feb 7, 2024
1 parent 9096a7c commit dd4753b
Show file tree
Hide file tree
Showing 94 changed files with 3,373 additions and 919 deletions.
12 changes: 12 additions & 0 deletions dev/generate-kernel-signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@


cuda_kernels_impl = [
"awkward_Index_nones_as_index",
"awkward_ListArray_min_range",
"awkward_ListArray_validity",
"awkward_BitMaskedArray_to_ByteMaskedArray",
"awkward_ListArray_compact_offsets",
"awkward_ListOffsetArray_flatten_offsets",
"awkward_IndexedArray_overlay_mask",
"awkward_ByteMaskedArray_numnull",
"awkward_IndexedArray_numnull",
"awkward_IndexedArray_numnull_parents",
"awkward_IndexedArray_numnull_unique_64",
"awkward_NumpyArray_fill",
"awkward_ListArray_fill",
Expand All @@ -43,12 +47,19 @@
"awkward_RegularArray_getitem_next_range",
"awkward_RegularArray_getitem_next_range_spreadadvanced",
"awkward_RegularArray_getitem_next_array",
"awkward_RegularArray_getitem_next_array_regularize",
"awkward_RegularArray_reduce_local_nextparents",
"awkward_RegularArray_reduce_nonlocal_preparenext",
"awkward_missing_repeat",
"awkward_RegularArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_carrylen",
"awkward_ListArray_getitem_next_array_advanced",
"awkward_ListArray_getitem_next_array",
"awkward_ListArray_getitem_next_at",
"awkward_ListArray_getitem_next_range_counts",
"awkward_ListArray_rpad_and_clip_length_axis1",
"awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64",
"awkward_NumpyArray_reduce_adjust_starts_64",
"awkward_NumpyArray_reduce_adjust_starts_shifts_64",
"awkward_RegularArray_getitem_next_at",
Expand Down Expand Up @@ -86,6 +97,7 @@
"awkward_reduce_sum_bool",
"awkward_reduce_prod_bool",
"awkward_reduce_countnonzero",
"awkward_sorting_ranges_length",
]


Expand Down
54 changes: 47 additions & 7 deletions dev/generate-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def __init__(self, name, typename, direction, role="default"):
self.role = role


no_role_kernels = [
"awkward_NumpyArray_sort_asstrings_uint8",
"awkward_argsort",
"awkward_sort",
]


class Specification:
def __init__(self, templatized_kernel_name, spec, testdata, blacklisted):
self.templatized_kernel_name = templatized_kernel_name
Expand All @@ -51,6 +58,8 @@ def __init__(self, templatized_kernel_name, spec, testdata, blacklisted):
)
if blacklisted:
self.tests = []
elif templatized_kernel_name in no_role_kernels:
self.tests = []
else:
self.tests = self.gettests(testdata)

Expand Down Expand Up @@ -185,6 +194,7 @@ def gettests(self, testdata):

def readspec():
specdict = {}
specdict_unit = {}
with open(os.path.join(CURRENT_DIR, "..", "kernel-specification.yml")) as f:
loadfile = yaml.load(f, Loader=yaml.CSafeLoader)

Expand All @@ -193,6 +203,13 @@ def readspec():
data = json.load(f)["tests"]

for spec in indspec:
for childfunc in spec["specializations"]:
specdict_unit[childfunc["name"]] = Specification(
spec["name"],
childfunc,
data,
not spec["automatic-tests"],
)
if "def " in spec["definition"]:
for childfunc in spec["specializations"]:
specdict[childfunc["name"]] = Specification(
Expand All @@ -201,7 +218,7 @@ def readspec():
data,
not spec["automatic-tests"],
)
return specdict
return specdict, specdict_unit


def getdtypes(args):
Expand All @@ -215,6 +232,8 @@ def getdtypes(args):
typename = typename + "_"
if count == 1:
dtypes.append("cupy." + typename)
elif count == 2:
dtypes.append("cupy." + typename)
return dtypes


Expand All @@ -239,7 +258,12 @@ def checkintrange(test_args, error, args):
if "int" in typename or "uint" in typename:
dtype = gettypename(typename)
min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
if "List" in typename:
if "List[List" in typename:
for row in val:
for data in row:
if not (min_val <= data <= max_val):
flag = False
elif "List" in typename:
for data in val:
if not (min_val <= data <= max_val):
flag = False
Expand Down Expand Up @@ -652,12 +676,16 @@ def gencpuunittests(specdict):


cuda_kernels_tests = [
"awkward_Index_nones_as_index",
"awkward_ListArray_min_range",
"awkward_ListArray_validity",
"awkward_BitMaskedArray_to_ByteMaskedArray",
"awkward_ListArray_compact_offsets",
"awkward_ListOffsetArray_flatten_offsets",
"awkward_IndexedArray_overlay_mask",
"awkward_ByteMaskedArray_numnull",
"awkward_IndexedArray_numnull",
"awkward_IndexedArray_numnull_parents",
"awkward_IndexedArray_numnull_unique_64",
"awkward_NumpyArray_fill",
"awkward_ListArray_fill",
Expand All @@ -683,12 +711,19 @@ def gencpuunittests(specdict):
"awkward_RegularArray_getitem_next_range",
"awkward_RegularArray_getitem_next_range_spreadadvanced",
"awkward_RegularArray_getitem_next_array",
"awkward_RegularArray_getitem_next_array_regularize",
"awkward_RegularArray_reduce_local_nextparents",
"awkward_RegularArray_reduce_nonlocal_preparenext",
"awkward_missing_repeat",
"awkward_RegularArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_carrylen",
"awkward_ListArray_getitem_next_array_advanced",
"awkward_ListArray_getitem_next_array",
"awkward_ListArray_getitem_next_at",
"awkward_ListArray_getitem_next_range_counts",
"awkward_ListArray_rpad_and_clip_length_axis1",
"awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64",
"awkward_NumpyArray_reduce_adjust_starts_64",
"awkward_NumpyArray_reduce_adjust_starts_shifts_64",
"awkward_RegularArray_getitem_next_at",
Expand Down Expand Up @@ -726,6 +761,7 @@ def gencpuunittests(specdict):
"awkward_reduce_sum_bool",
"awkward_reduce_prod_bool",
"awkward_reduce_countnonzero",
"awkward_sorting_ranges_length",
]


Expand Down Expand Up @@ -966,8 +1002,12 @@ def gencudaunittests(specdict):
)
)
elif count == 2:
raise NotImplementedError

f.write(
" " * 4
+ "{} = cupy.array({}, dtype=cupy.{})\n".format(
arg, val, typename
)
)
cuda_string = (
"funcC = cupy_backend['"
+ spec.templatized_kernel_name
Expand Down Expand Up @@ -1068,10 +1108,10 @@ def evalkernels():
if __name__ == "__main__":
genpykernels()
evalkernels()
specdict = readspec()
specdict, specdict_unit = readspec()
genspectests(specdict)
gencpukerneltests(specdict)
gencpuunittests(specdict)
gencpuunittests(specdict_unit)
genunittests()
gencudakerneltests(specdict)
gencudaunittests(specdict)
gencudaunittests(specdict_unit)
17 changes: 11 additions & 6 deletions kernel-specification.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1960,11 +1960,12 @@ kernels:
description: null
definition: |
def awkward_ListArray_min_range(tomin, fromstarts, fromstops, lenstarts):
shorter = fromstops[0] - fromstarts[0]
for i in range(1, lenstarts):
rangeval = fromstops[i] - fromstarts[i]
shorter = shorter if shorter < rangeval else rangeval
tomin[0] = shorter
if lenstarts > 0:
shorter = fromstops[0] - fromstarts[0]
for i in range(1, lenstarts):
rangeval = fromstops[i] - fromstarts[i]
shorter = shorter if shorter < rangeval else rangeval
tomin[0] = shorter
automatic-tests: true

- name: awkward_ListArray_rpad_and_clip_length_axis1
Expand Down Expand Up @@ -5917,5 +5918,9 @@ kernels:
- {name: parentslength, type: "int64_t", dir: in, role: default}
description: null
definition: |
Insert Python definition here
def awkward_sorting_ranges_length(tolength, parents, parentslength):
tolength[0] = 2
for i in range(1, parentslength):
if parents[i - 1] != parents[i]:
tolength[0] = tolength[0] + 1
automatic-tests: false
Loading

0 comments on commit dd4753b

Please sign in to comment.