Skip to content

Commit

Permalink
Merge branch 'branch-23.11' into mp/2023-10-05/dont-add-culibs
Browse files Browse the repository at this point in the history
  • Loading branch information
manopapad authored Nov 7, 2023
2 parents 9b24a55 + f8c94f0 commit ebea367
Show file tree
Hide file tree
Showing 15 changed files with 610 additions and 34 deletions.
97 changes: 97 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
name: Bug report
description: Submit a bug report
title: "[BUG] "
labels: TRIAGE
body:
- type: markdown
attributes:
value: "# Bug report"
- type: markdown
attributes:
value: Thank you for reporting a bug and helping us improve Cunumeric!
- type: markdown
attributes:
value: >
Please fill out all of the required information.
- type: markdown
attributes:
value: |
---
## Environment information
- type: textarea
id: legate_issue
attributes:
label: Software versions
description: >-
Run `legate-issue` and paste the output here.
placeholder: |
Python : 3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0]
Platform : Linux-5.14.0-1042-oem-x86_64-with-glibc2.31
Legion : v23.11.00.dev-16-g2499f878
Legate : 23.11.00.dev+17.gb7b50313
Cunumeric : (ImportError: cannot import name 'LogicalArray' from 'legate.core')
Numpy : 1.24.4
Scipy : 1.10.1
Numba : (not installed)
CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge)
GPU Driver : 515.65.01
GPU Devices :
GPU 0: Quadro RTX 8000
GPU 1: Quadro RTX 8000
validations:
required: true
- type: input
id: jupyter
attributes:
label: Jupyter notebook / Jupyter Lab version
description: >-
Please supply if the issue you are reporting is related to Jupyter
notebook or Jupyter Lab.
validations:
required: false
- type: markdown
attributes:
value: |
## Issue details
- type: textarea
id: expected-behavior
attributes:
label: Expected behavior
description: What did you expect to happen?
validations:
required: true
- type: textarea
id: observed-behavior
attributes:
label: Observed behavior
description: What did actually happen?
validations:
required: true
- type: markdown
attributes:
value: |
## Directions to reproduce
- type: textarea
id: example
attributes:
label: Example code or instructions
description: >
Please provide detailed instructions to reproduce the issue. Ideally this includes a
[Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example)
given here or as a link to code in another repository.
render: Python
validations:
required: true
- type: markdown
attributes:
value: |
## Additional information
- type: textarea
id: traceback-console
attributes:
label: Stack traceback or browser console output
description: >
Add any error messages or logs that might be helpful in reproducing and
identifying the bug, for example a Python stack traceback.
validations:
required: false
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ If you have questions, please contact us at legate(at)nvidia.com.
cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric):

```
conda install -c nvidia -c conda-forge -c legate cunumeric
mamba install -c nvidia -c conda-forge -c legate cunumeric
```

Only linux-64 packages are available at the moment.

The default package contains GPU support, and is compatible with CUDA >= 11.8
(CUDA driver version >= r520), and Volta or later GPU architectures. There are
also CPU-only packages available, and will be automatically selected by `conda`
when installing on a machine without GPUs.
also CPU-only packages available, and will be automatically selected when
installing on a machine without GPUs.

See the build instructions at https://nv-legate.github.io/cunumeric for details
about building cuNumeric from source.
Expand Down Expand Up @@ -119,7 +119,7 @@ with cuNumeric going forward:
new features to cuNumeric.
* We plan to add support for sharded file I/O for loading and
storing large data sets that could never be loaded on a single node.
Initially this will begin with native support for [h5py](https://www.h5py.org/)
Initially this will begin with native support for hdf5 and zarr,
but will grow to accommodate other formats needed by our lighthouse
applications.
* Strong scaling: while cuNumeric is currently implemented in a way that
Expand Down
2 changes: 1 addition & 1 deletion cmake/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"git_url" : "https://github.com/nv-legate/legate.core.git",
"git_shallow": false,
"always_download": false,
"git_tag" : "a1b653e7836b6e0d9b41bf63b41c34de7131db25"
"git_tag" : "a4b5430ebb2c52e3f8da8f27534bc0db8826b804"
}
}
}
151 changes: 129 additions & 22 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@
from .coverage import FALLBACK_WARNING, clone_class, is_implemented
from .runtime import runtime
from .types import NdShape
from .utils import deep_apply, dot_modes, to_core_dtype
from .utils import (
calculate_volume,
deep_apply,
dot_modes,
to_core_dtype,
tuple_pop,
)

if TYPE_CHECKING:
from pathlib import Path
Expand Down Expand Up @@ -3079,12 +3085,40 @@ def max(
where=where,
)

def _summation_dtype(
self, dtype: Optional[np.dtype[Any]]
) -> np.dtype[Any]:
# Pick our dtype if it wasn't picked yet
if dtype is None:
if self.dtype.kind != "f" and self.dtype.kind != "c":
return np.dtype(np.float64)
else:
return self.dtype
return dtype

def _normalize_summation(
self, sum_array: Any, axis: Any, dtype: np.dtype[Any], ddof: int = 0
) -> None:
if axis is None:
divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof
else:
divisor = self.shape[axis] - ddof

# Divide by the number of things in the collapsed dimensions
# Pick the right kinds of division based on the dtype
if dtype.kind == "f" or dtype.kind == "c":
sum_array.__itruediv__(
np.array(divisor, dtype=sum_array.dtype),
)
else:
sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))

@add_boilerplate()
def mean(
self,
axis: Any = None,
dtype: Union[np.dtype[Any], None] = None,
out: Union[ndarray, None] = None,
dtype: Optional[np.dtype[Any]] = None,
out: Optional[ndarray] = None,
keepdims: bool = False,
) -> ndarray:
"""a.mean(axis=None, dtype=None, out=None, keepdims=False)
Expand All @@ -3105,14 +3139,11 @@ def mean(
if axis is not None and not isinstance(axis, int):
raise NotImplementedError(
"cunumeric.mean only supports int types for "
"'axis' currently"
"`axis` currently"
)
# Pick our dtype if it wasn't picked yet
if dtype is None:
if self.dtype.kind != "f" and self.dtype.kind != "c":
dtype = np.dtype(np.float64)
else:
dtype = self.dtype

dtype = self._summation_dtype(dtype)

# Do the sum
if out is not None and out.dtype == dtype:
sum_array = self.sum(
Expand All @@ -3127,18 +3158,9 @@ def mean(
dtype=dtype,
keepdims=keepdims,
)
if axis is None:
divisor = reduce(lambda x, y: x * y, self.shape, 1)
else:
divisor = self.shape[axis]
# Divide by the number of things in the collapsed dimensions
# Pick the right kinds of division based on the dtype
if dtype.kind == "f" or dtype.kind == "c":
sum_array.__itruediv__(
np.array(divisor, dtype=sum_array.dtype),
)
else:
sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))

self._normalize_summation(sum_array, axis, dtype)

# Convert to the output we didn't already put it there
if out is not None and sum_array is not out:
assert out.dtype != sum_array.dtype
Expand All @@ -3147,6 +3169,91 @@ def mean(
else:
return sum_array

@add_boilerplate()
def var(
self,
axis: Optional[Union[int, tuple[int, ...]]] = None,
dtype: Optional[np.dtype[Any]] = None,
out: Optional[ndarray] = None,
ddof: int = 0,
keepdims: bool = False,
*,
where: Union[bool, ndarray] = True,
) -> ndarray:
"""a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False)
Returns the variance of the array elements along given axis.
Refer to :func:`cunumeric.var` for full documentation.
See Also
--------
cunumeric.var : equivalent function
Availability
--------
Multiple GPUs, Multiple CPUs
"""
if axis is not None and not isinstance(axis, int):
raise NotImplementedError(
"cunumeric.var only supports int types for `axis` currently"
)

# this could be computed as a single pass through the array
# by computing both <x^2> and <x> and then computing <x^2> - <x>^2.
# this would takee the difference of two large numbers and is unstable
# the mean needs to be computed first and the variance computed
# directly as <(x-mu)^2>, which then requires two passes through the
# data to first compute the mean and then compute the variance
# see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
# TODO(https://github.com/nv-legate/cunumeric/issues/590)

dtype = self._summation_dtype(dtype)
# calculate the mean, but keep the dimensions so that the
# mean can be broadcast against the original array
mu = self.mean(axis=axis, dtype=dtype, keepdims=True)

# 1D arrays (or equivalent) should benefit from this unary reduction:
#
if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1:
# this is a scalar reduction and we can optimize this as a single
# pass through a scalar reduction
result = self._perform_unary_reduction(
UnaryRedCode.VARIANCE,
self,
axis=axis,
dtype=dtype,
out=out,
keepdims=keepdims,
where=where,
args=(mu,),
)
else:
# TODO(https://github.com/nv-legate/cunumeric/issues/591)
# there isn't really support for generic binary reductions
# right now all of the current binary reductions are boolean
# reductions like allclose. To implement this a single pass would
# require a variant of einsum/dot that produces
# (self-mu)*(self-mu) rather than self*mu. For now, we have to
# compute delta = self-mu in a first pass and then compute
# delta*delta in second pass
delta = self - mu

result = self._perform_unary_reduction(
UnaryRedCode.SUM_SQUARES,
delta,
axis=axis,
dtype=dtype,
out=out,
keepdims=keepdims,
where=where,
)

self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof)

return result

@add_boilerplate()
def min(
self,
Expand Down
4 changes: 4 additions & 0 deletions cunumeric/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class _CunumericSharedLib:
CUNUMERIC_RED_NANSUM: int
CUNUMERIC_RED_PROD: int
CUNUMERIC_RED_SUM: int
CUNUMERIC_RED_SUM_SQUARES: int
CUNUMERIC_RED_VARIANCE: int
CUNUMERIC_REPEAT: int
CUNUMERIC_SCALAR_UNARY_RED: int
CUNUMERIC_SCAN_GLOBAL: int
Expand Down Expand Up @@ -452,6 +454,8 @@ class UnaryRedCode(IntEnum):
NANSUM = _cunumeric.CUNUMERIC_RED_NANSUM
PROD = _cunumeric.CUNUMERIC_RED_PROD
SUM = _cunumeric.CUNUMERIC_RED_SUM
SUM_SQUARES = _cunumeric.CUNUMERIC_RED_SUM_SQUARES
VARIANCE = _cunumeric.CUNUMERIC_RED_VARIANCE


# Match these to CuNumericBinaryOpCode in cunumeric_c.h
Expand Down
4 changes: 4 additions & 0 deletions cunumeric/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def __init__(

_UNARY_RED_TO_REDUCTION_OPS: Dict[int, int] = {
UnaryRedCode.SUM: ReductionOp.ADD,
UnaryRedCode.SUM_SQUARES: ReductionOp.ADD,
UnaryRedCode.VARIANCE: ReductionOp.ADD,
UnaryRedCode.PROD: ReductionOp.MUL,
UnaryRedCode.MAX: ReductionOp.MAX,
UnaryRedCode.MIN: ReductionOp.MIN,
Expand Down Expand Up @@ -209,6 +211,8 @@ def min_identity(

_UNARY_RED_IDENTITIES: Dict[UnaryRedCode, Callable[[Any], Any]] = {
UnaryRedCode.SUM: lambda _: 0,
UnaryRedCode.SUM_SQUARES: lambda _: 0,
UnaryRedCode.VARIANCE: lambda _: 0,
UnaryRedCode.PROD: lambda _: 1,
UnaryRedCode.MIN: min_identity,
UnaryRedCode.MAX: max_identity,
Expand Down
22 changes: 21 additions & 1 deletion cunumeric/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,6 +1524,26 @@ def unary_reduction(
else where.array,
**kws,
)
elif op == UnaryRedCode.SUM_SQUARES:
squared = np.square(rhs.array)
np.sum(
squared,
out=self.array,
axis=orig_axis,
where=where,
keepdims=keepdims,
)
elif op == UnaryRedCode.VARIANCE:
(mu,) = args
centered = np.subtract(rhs.array, mu)
squares = np.square(centered)
np.sum(
squares,
axis=orig_axis,
where=where,
keepdims=keepdims,
out=self.array,
)
elif op == UnaryRedCode.CONTAINS:
self.array.fill(args[0] in rhs.array)
elif op == UnaryRedCode.COUNT_NONZERO:
Expand Down Expand Up @@ -1595,7 +1615,7 @@ def where(self, rhs1: Any, rhs2: Any, rhs3: Any) -> None:
if self.deferred is not None:
self.deferred.where(rhs1, rhs2, rhs3)
else:
self.array[:] = np.where(rhs1.array, rhs2.array, rhs3.array)
self.array[...] = np.where(rhs1.array, rhs2.array, rhs3.array)

def argwhere(self) -> NumPyThunk:
if self.deferred is not None:
Expand Down
Loading

0 comments on commit ebea367

Please sign in to comment.