Skip to content

Commit

Permalink
adding python-only implementation of gradient + test
Browse files Browse the repository at this point in the history
  • Loading branch information
ipdemes committed May 14, 2024
1 parent 8de3a95 commit 4e24ced
Show file tree
Hide file tree
Showing 6 changed files with 423 additions and 9 deletions.
4 changes: 2 additions & 2 deletions cunumeric/_sphinxext/_cunumeric_directive.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@

class CunumericDirective(SphinxDirective):
def parse(self, rst_text: str, annotation: str) -> list[nodes.Node]:
result = ViewList()
result = ViewList() # type: ignore
for line in rst_text.split("\n"):
result.append(line, annotation)
node = nodes.paragraph()
node.document = self.state.document
nested_parse_with_titles(self.state, result, node)
nested_parse_with_titles(self.state, result, node) # type: ignore
return node.children
252 changes: 251 additions & 1 deletion cunumeric/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#
from __future__ import annotations

import collections.abc
import math
import operator
import re
Expand Down Expand Up @@ -1748,7 +1749,7 @@ def check_list_depth(arr: Any, prefix: NdShape = (0,)) -> int:
"List depths are mismatched. First element was at depth "
f"{first_depth}, but there is an element at"
f" depth {other_depth}, "
f"arrays{convert_to_array_form(prefix+(idx+1,))}"
f"arrays{convert_to_array_form(prefix + (idx + 1,))}"
)

return depths[0] + 1
Expand Down Expand Up @@ -6736,6 +6737,255 @@ def convolve(a: ndarray, v: ndarray, mode: ConvolveMode = "full") -> ndarray:
return out


@add_boilerplate("f")
def gradient(
f: ndarray, *varargs: Any, axis: Any = None, edge_order: int = 1
) -> Any:
"""
Return the gradient of an N-dimensional array.
The gradient is computed using second order accurate central differences
in the interior points and either first or second order accurate one-sides
(forward or backwards) differences at the boundaries.
The returned gradient hence has the same shape as the input array.
Parameters
----------
f : array_like
An N-dimensional array containing samples of a scalar function.
varargs : list of scalar or array, optional
Spacing between f values. Default unitary spacing for all dimensions.
Spacing can be specified using:
1. single scalar to specify a sample distance for all dimensions.
2. N scalars to specify a constant sample distance for each dimension.
i.e. `dx`, `dy`, `dz`, ...
3. N arrays to specify the coordinates of the values along each
dimension of F. The length of the array must match the size of
the corresponding dimension
4. Any combination of N scalars/arrays with the meaning of 2. and 3.
If `axis` is given, the number of varargs must equal the number of
axes. Default: 1.
edge_order : {1, 2}, optional
Gradient is calculated using N-th order accurate differences
at the boundaries. Default: 1.
.. versionadded:: 1.9.1
axis : None or int or tuple of ints, optional
Gradient is calculated only along the given axis or axes
The default (axis = None) is to calculate the gradient for all the axes
of the input array. axis may be negative, in which case it counts from
the last to the first axis.
.. versionadded:: 1.11.0
Returns
-------
gradient : ndarray or list of ndarray
A list of ndarrays (or a single ndarray if there is only one dimension)
corresponding to the derivatives of f with respect to each dimension.
Each derivative has the same shape as f.
See Also
--------
numpy.gradient
Availability
--------
Multiple GPUs, Multiple CPUs
"""
N = f.ndim # number of dimensions

if axis is None:
axes = tuple(range(N))
elif isinstance(axis, collections.abc.Sequence):
axes = tuple(normalize_axis_index(a, N) for a in axis)
else:
axis = normalize_axis_index(axis, N)
axes = (axis,)

len_axes = len(axes)
if not varargs:
n = 0
else:
n = len(varargs)

if n == 0:
# no spacing argument - use 1 in all axes
dx = [asarray(1.0)] * len_axes
elif n == 1 and np.ndim(varargs[0]) == 0:
# single scalar for all axes
dx = list(asarray(varargs)) * len_axes
elif n == len_axes:
# scalar or 1d array for each axis
dx = list(asarray(v) for v in varargs)
for i, distances in enumerate(dx):
if distances.ndim == 0:
continue
elif distances.ndim != 1:
raise ValueError("distances must be either scalars or 1d")
if len(distances) != f.shape[axes[i]]:
raise ValueError(
"when 1d, distances must match "
"the length of the corresponding dimension"
)
if np.issubdtype(distances.dtype, np.integer):
# Convert numpy integer types to float64 to avoid modular
# arithmetic in np.diff(distances).
distances = distances.astype(np.float64)
diffx = diff(distances)
# if distances are constant reduce to the scalar case
# since it brings a consistent speedup
if (diffx == diffx[0]).all():
diffx = diffx[0]
dx[i] = diffx
else:
raise TypeError("invalid number of arguments")

if edge_order > 2:
raise ValueError("'edge_order' greater than 2 not supported")
if edge_order < 0:
raise ValueError(" invalid 'edge_order'")

# use central differences on interior and one-sided differences on the
# endpoints. This preserves second order-accuracy over the full domain.

outvals = []

# create slice objects --- initially all are [:, :, ..., :]
slice1 = [slice(None)] * N
slice2 = [slice(None)] * N
slice3 = [slice(None)] * N
slice4 = [slice(None)] * N

otype = f.dtype
if otype.type is np.datetime64:
raise TypeError("datetime64 is not supported by gradient in cuNumeric")
elif otype.type is np.timedelta64:
pass
elif np.issubdtype(otype, np.inexact):
pass
else:
# All other types convert to floating point.
# First check if f is a numpy integer type; if so, convert f to float64
# to avoid modular arithmetic when computing the changes in f.
if np.issubdtype(otype, np.integer):
f = f.astype(np.float64)
otype = np.dtype(np.float64)

for axis, ax_dx in zip(axes, dx):
if f.shape[axis] < edge_order + 1:
raise ValueError(
"Shape of array too small to calculate a numerical gradient, "
"at least (edge_order + 1) elements are required."
)
# result allocation
out = empty_like(f, dtype=otype)

# spacing for the current axis
uniform_spacing = np.ndim(ax_dx) == 0

# Numerical differentiation: 2nd order interior
slice1[axis] = slice(1, -1)
slice2[axis] = slice(None, -2)
slice3[axis] = slice(1, -1)
slice4[axis] = slice(2, None)

if uniform_spacing:
out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / (
2.0 * ax_dx
)
else:
dx1 = ax_dx[0:-1]
dx2 = ax_dx[1:]
a = -(dx2) / (dx1 * (dx1 + dx2))
b = (dx2 - dx1) / (dx1 * dx2)
c = dx1 / (dx2 * (dx1 + dx2))
# fix the shape for broadcasting
shape = list(1 for i in range(N))
shape[axis] = -1
a = a.reshape(shape)
b = b.reshape(shape)
c = c.reshape(shape)
# 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:]
out[tuple(slice1)] = (
a * f[tuple(slice2)]
+ b * f[tuple(slice3)]
+ c * f[tuple(slice4)]
)

# Numerical differentiation: 1st order edges
if edge_order == 1:
slice1[axis] = 0 # type: ignore
slice2[axis] = 1 # type: ignore
slice3[axis] = 0 # type: ignore
dx_0 = ax_dx if uniform_spacing else ax_dx[0]
# 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0])
out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0

slice1[axis] = -1 # type: ignore
slice2[axis] = -1 # type: ignore
slice3[axis] = -2 # type: ignore
dx_n = ax_dx if uniform_spacing else ax_dx[-1]
# 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2])
out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n

# Numerical differentiation: 2nd order edges
else:
slice1[axis] = 0 # type: ignore
slice2[axis] = 0 # type: ignore
slice3[axis] = 1 # type: ignore
slice4[axis] = 2 # type: ignore
if uniform_spacing:
a = -1.5 / ax_dx
b = 2.0 / ax_dx
c = -0.5 / ax_dx
else:
dx1 = ax_dx[0]
dx2 = ax_dx[1]
a = -(2.0 * dx1 + dx2) / (dx1 * (dx1 + dx2))
b = (dx1 + dx2) / (dx1 * dx2)
c = -dx1 / (dx2 * (dx1 + dx2))
# 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2]
out[tuple(slice1)] = (
a * f[tuple(slice2)]
+ b * f[tuple(slice3)]
+ c * f[tuple(slice4)]
)

slice1[axis] = -1 # type: ignore
slice2[axis] = -3 # type: ignore
slice3[axis] = -2 # type: ignore
slice4[axis] = -1 # type: ignore
if uniform_spacing:
a = 0.5 / ax_dx
b = -2.0 / ax_dx
c = 1.5 / ax_dx
else:
dx1 = ax_dx[-2]
dx2 = ax_dx[-1]
a = (dx2) / (dx1 * (dx1 + dx2))
b = -(dx2 + dx1) / (dx1 * dx2)
c = (2.0 * dx2 + dx1) / (dx2 * (dx1 + dx2))
# 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1]
out[tuple(slice1)] = (
a * f[tuple(slice2)]
+ b * f[tuple(slice3)]
+ c * f[tuple(slice4)]
)

outvals.append(out)

# reset the slice object in this dimension to ":"
slice1[axis] = slice(None)
slice2[axis] = slice(None)
slice3[axis] = slice(None)
slice4[axis] = slice(None)

if len_axes == 1:
return outvals[0]
else:
return outvals


@add_boilerplate("a")
def clip(
a: ndarray,
Expand Down
1 change: 1 addition & 0 deletions docs/cunumeric/source/api/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,4 @@ Miscellaneous
inner
outer
vdot
gradient
9 changes: 4 additions & 5 deletions install.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,13 @@ def validate_path(path):

# Also use preexisting CMAKE_ARGS from conda if set
cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ")

if debug or verbose:
cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")]

build_type = (
"Debug" if debug else "RelWithDebInfo" if debug_release else "Release"
) # noqa: F841
cmake_flags += f"""\
-DCMAKE_BUILD_TYPE={(
"Debug" if debug else "RelWithDebInfo" if debug_release else "Release"
)}
-DCMAKE_BUILD_TYPE=build_type
-DBUILD_SHARED_LIBS=ON
-DCMAKE_CUDA_ARCHITECTURES={str(arch)}
-DLegion_MAX_DIM={str(maxdim)}
Expand Down
Loading

0 comments on commit 4e24ced

Please sign in to comment.