Merge remote-tracking branch 'origin/branch-24.01' into unary_red_where

nv-legate · Nov 21, 2023 · b39364b · b39364b
2 parents 7a77535 + 6fda443
commit b39364b
Show file tree

Hide file tree

Showing 40 changed files with 1,596 additions and 179 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,97 @@
+name: Bug report
+description: Submit a bug report
+title: "[BUG] "
+labels: TRIAGE
+body:
+  - type: markdown
+    attributes:
+      value: "# Bug report"
+  - type: markdown
+    attributes:
+      value: Thank you for reporting a bug and helping us improve Cunumeric!
+  - type: markdown
+    attributes:
+      value: >
+        Please fill out all of the required information.
+  - type: markdown
+    attributes:
+      value: |
+        ---
+        ## Environment information
+  - type: textarea
+    id: legate_issue
+    attributes:
+      label: Software versions
+      description: >-
+        Run `legate-issue` and paste the output here.
+      placeholder: |
+        Python      :  3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0]
+        Platform    :  Linux-5.14.0-1042-oem-x86_64-with-glibc2.31
+        Legion      :  v23.11.00.dev-16-g2499f878
+        Legate      :  23.11.00.dev+17.gb7b50313
+        Cunumeric   :  (ImportError: cannot import name 'LogicalArray' from 'legate.core')
+        Numpy       :  1.24.4
+        Scipy       :  1.10.1
+        Numba       :  (not installed)
+        CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge)
+        GPU Driver  : 515.65.01
+        GPU Devices :
+          GPU 0: Quadro RTX 8000
+          GPU 1: Quadro RTX 8000
+    validations:
+      required: true
+  - type: input
+    id: jupyter
+    attributes:
+      label: Jupyter notebook / Jupyter Lab version
+      description: >-
+        Please supply if the issue you are reporting is related to Jupyter
+        notebook or Jupyter Lab.
+    validations:
+      required: false
+  - type: markdown
+    attributes:
+      value: |
+        ## Issue details
+  - type: textarea
+    id: expected-behavior
+    attributes:
+      label: Expected behavior
+      description: What did you expect to happen?
+    validations:
+      required: true
+  - type: textarea
+    id: observed-behavior
+    attributes:
+      label: Observed behavior
+      description: What did actually happen?
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: |
+        ## Directions to reproduce
+  - type: textarea
+    id: example
+    attributes:
+      label: Example code or instructions
+      description: >
+        Please provide detailed instructions to reproduce the issue. Ideally this includes a 
+        [Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example)
+        given here or as a link to code in another repository. 
+      render: Python
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: |
+        ## Additional information
+  - type: textarea
+    id: traceback-console
+    attributes:
+      label: Stack traceback or browser console output
+      description: >
+        Add any error messages or logs that might be helpful in reproducing and
+        identifying the bug, for example a Python stack traceback.
+    validations:
+      required: false
diff --git a/README.md b/README.md
@@ -40,15 +40,15 @@ If you have questions, please contact us at legate(at)nvidia.com.
 cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric):
 
 ```
-conda install -c nvidia -c conda-forge -c legate cunumeric
+mamba install -c nvidia -c conda-forge -c legate cunumeric
 ```
 
 Only linux-64 packages are available at the moment.
 
 The default package contains GPU support, and is compatible with CUDA >= 11.8
 (CUDA driver version >= r520), and Volta or later GPU architectures. There are
-also CPU-only packages available, and will be automatically selected by `conda`
-when installing on a machine without GPUs.
+also CPU-only packages available, and will be automatically selected when
+installing on a machine without GPUs.
 
 See the build instructions at https://nv-legate.github.io/cunumeric for details
 about building cuNumeric from source.
@@ -119,7 +119,7 @@ with cuNumeric going forward:
   new features to cuNumeric.
 * We plan to add support for sharded file I/O for loading and
   storing large data sets that could never be loaded on a single node.
-  Initially this will begin with native support for [h5py](https://www.h5py.org/)
+  Initially this will begin with native support for hdf5 and zarr,
   but will grow to accommodate other formats needed by our lighthouse
   applications.
 * Strong scaling: while cuNumeric is currently implemented in a way that

diff --git a/cmake/versions.json b/cmake/versions.json
@@ -5,7 +5,7 @@
       "git_url" : "https://github.com/nv-legate/legate.core.git",
       "git_shallow": false,
       "always_download": false,
-      "git_tag" : "8997f997be02936304b3ac23fe785f1de7a3424b"
+      "git_tag" : "6fa0acc9dcfa89be2702f1de6c045bc262f752b1"
     }
   }
 }
diff --git a/continuous_integration/scripts/build-cunumeric-all b/continuous_integration/scripts/build-cunumeric-all
@@ -3,12 +3,6 @@
 setup_env() {
     yaml_file=$(find ~/.artifacts -name "environment*.yaml" | head -n 1)
 
-    [ "${USE_CUDA:-}" = "ON" ] &&
-        echo "  - libcublas-dev" >> "${yaml_file}" &&
-        echo "  - libcufft-dev" >> "${yaml_file}" &&
-        echo "  - libcurand-dev" >> "${yaml_file}" &&
-        echo "  - libcusolver-dev" >> "${yaml_file}";
-
     echo "YAML file..."
     cat "${yaml_file}"
 

diff --git a/cunumeric/__init__.py b/cunumeric/__init__.py
@@ -28,7 +28,7 @@
 
 import numpy as _np
 
-from cunumeric import linalg, random, fft
+from cunumeric import linalg, random, fft, ma
 from cunumeric.array import maybe_convert_to_np_ndarray, ndarray
 from cunumeric.bits import packbits, unpackbits
 from cunumeric.module import *

diff --git a/cunumeric/array.py b/cunumeric/array.py
@@ -53,7 +53,13 @@
 from .coverage import FALLBACK_WARNING, clone_class, is_implemented
 from .runtime import runtime
 from .types import NdShape
-from .utils import deep_apply, dot_modes, to_core_dtype
+from .utils import (
+    calculate_volume,
+    deep_apply,
+    dot_modes,
+    to_core_dtype,
+    tuple_pop,
+)
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -159,7 +165,9 @@ def maybe_convert_to_np_ndarray(obj: Any) -> Any:
     """
     Converts cuNumeric arrays into NumPy arrays, otherwise has no effect.
     """
-    if isinstance(obj, ndarray):
+    from .ma import MaskedArray
+
+    if isinstance(obj, (ndarray, MaskedArray)):
         return obj.__array__()
     return obj
 
@@ -1664,8 +1672,6 @@ def __setitem__(self, key: Any, value: ndarray) -> None:
 
         """
         check_writeable(self)
-        if key is None:
-            raise KeyError("invalid key passed to cunumeric.ndarray")
         if value.dtype != self.dtype:
             temp = ndarray(value.shape, dtype=self.dtype, inputs=(value,))
             temp._thunk.convert(value._thunk)
@@ -3086,12 +3092,54 @@ def _count_nonzero(self, axis: Any = None) -> Union[int, ndarray]:
             axis=axis,
         )
 
+    def _summation_dtype(
+        self, dtype: Optional[np.dtype[Any]]
+    ) -> np.dtype[Any]:
+        # Pick our dtype if it wasn't picked yet
+        if dtype is None:
+            if self.dtype.kind != "f" and self.dtype.kind != "c":
+                return np.dtype(np.float64)
+            else:
+                return self.dtype
+        return dtype
+
+    def _normalize_summation(
+        self,
+        sum_array: Any,
+        axis: Any,
+        dtype: np.dtype[Any],
+        ddof: int = 0,
+        keepdims: bool = False,
+        where: Union[ndarray, None] = None,
+    ) -> None:
+        if axis is None:
+            if where is not None:
+                divisor = where._count_nonzero() - ddof
+            else:
+                divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof
+        else:
+            if where is not None:
+                divisor = (
+                    where.sum(axis=axis, dtype=dtype, keepdims=keepdims) - ddof
+                )
+            else:
+                divisor = self.shape[axis] - ddof
+
+        # Divide by the number of things in the collapsed dimensions
+        # Pick the right kinds of division based on the dtype
+        if np.ndim(divisor) == 0:
+            divisor = np.array(divisor, dtype=sum_array.dtype)  # type: ignore [assignment] # noqa
+        if dtype.kind == "f" or dtype.kind == "c":
+            sum_array.__itruediv__(divisor)
+        else:
+            sum_array.__ifloordiv__(divisor)
+
     @add_boilerplate()
     def mean(
         self,
         axis: Any = None,
-        dtype: Union[np.dtype[Any], None] = None,
-        out: Union[ndarray, None] = None,
+        dtype: Optional[np.dtype[Any]] = None,
+        out: Optional[ndarray] = None,
         keepdims: bool = False,
         where: Union[ndarray, None] = None,
     ) -> ndarray:
@@ -3113,16 +3161,12 @@ def mean(
         if axis is not None and not isinstance(axis, int):
             raise NotImplementedError(
                 "cunumeric.mean only supports int types for "
-                "'axis' currently"
+                "`axis` currently"
             )
-        # Pick our dtype if it wasn't picked yet
-        if dtype is None:
-            if self.dtype.kind != "f" and self.dtype.kind != "c":
-                dtype = np.dtype(np.float64)
-            else:
-                dtype = self.dtype
 
+        dtype = self._summation_dtype(dtype)
         where_array = broadcast_where(where, self.shape)
+
         # Do the sum
         sum_array = (
             self.sum(
@@ -3138,28 +3182,10 @@ def mean(
             )
         )
 
-        if axis is None:
-            if where_array is not None:
-                divisor = where_array._count_nonzero()
-            else:
-                divisor = reduce(lambda x, y: x * y, self.shape, 1)
-
-        else:
-            if where_array is not None:
-                divisor = where_array.sum(
-                    axis=axis, dtype=dtype, keepdims=keepdims
-                )
-            else:
-                divisor = self.shape[axis]
+        self._normalize_summation(
+            sum_array, axis, dtype, keepdims=keepdims, where=where_array
+        )
 
-        # Divide by the number of things in the collapsed dimensions
-        # Pick the right kinds of division based on the dtype
-        if dtype.kind == "f" or dtype.kind == "c":
-            sum_array.__itruediv__(
-                divisor,
-            )
-        else:
-            sum_array.__ifloordiv__(divisor)
         # Convert to the output we didn't already put it there
         if out is not None and sum_array is not out:
             assert out.dtype != sum_array.dtype
@@ -3196,6 +3222,91 @@ def _nanmean(
             where=nan_mask,
         )
 
+    @add_boilerplate()
+    def var(
+        self,
+        axis: Optional[Union[int, tuple[int, ...]]] = None,
+        dtype: Optional[np.dtype[Any]] = None,
+        out: Optional[ndarray] = None,
+        ddof: int = 0,
+        keepdims: bool = False,
+        *,
+        where: Union[ndarray, None] = None,
+    ) -> ndarray:
+        """a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False)
+
+        Returns the variance of the array elements along given axis.
+
+        Refer to :func:`cunumeric.var` for full documentation.
+
+        See Also
+        --------
+        cunumeric.var : equivalent function
+
+        Availability
+        --------
+        Multiple GPUs, Multiple CPUs
+
+        """
+        if axis is not None and not isinstance(axis, int):
+            raise NotImplementedError(
+                "cunumeric.var only supports int types for `axis` currently"
+            )
+
+        # this could be computed as a single pass through the array
+        # by computing both <x^2> and <x> and then computing <x^2> - <x>^2.
+        # this would takee the difference of two large numbers and is unstable
+        # the mean needs to be computed first and the variance computed
+        # directly as <(x-mu)^2>, which then requires two passes through the
+        # data to first compute the mean and then compute the variance
+        # see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        # TODO(https://github.com/nv-legate/cunumeric/issues/590)
+
+        dtype = self._summation_dtype(dtype)
+        # calculate the mean, but keep the dimensions so that the
+        # mean can be broadcast against the original array
+        mu = self.mean(axis=axis, dtype=dtype, keepdims=True, where=where)
+
+        # 1D arrays (or equivalent) should benefit from this unary reduction:
+        #
+        if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1:
+            # this is a scalar reduction and we can optimize this as a single
+            # pass through a scalar reduction
+            result = self._perform_unary_reduction(
+                UnaryRedCode.VARIANCE,
+                self,
+                axis=axis,
+                dtype=dtype,
+                out=out,
+                keepdims=keepdims,
+                where=where,
+                args=(mu,),
+            )
+        else:
+            # TODO(https://github.com/nv-legate/cunumeric/issues/591)
+            # there isn't really support for generic binary reductions
+            # right now all of the current binary reductions are boolean
+            # reductions like allclose. To implement this a single pass would
+            # require a variant of einsum/dot that produces
+            # (self-mu)*(self-mu) rather than self*mu. For now, we have to
+            # compute delta = self-mu in a first pass and then compute
+            # delta*delta in second pass
+            delta = self - mu
+
+            result = self._perform_unary_reduction(
+                UnaryRedCode.SUM_SQUARES,
+                delta,
+                axis=axis,
+                dtype=dtype,
+                out=out,
+                keepdims=keepdims,
+                where=where,
+            )
+
+        self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof)
+
+        return result
+
     @add_boilerplate()
     def min(
         self,