Merge branch 'branch-23.11' into mp/2023-10-05/dont-add-culibs

nv-legate · Nov 7, 2023 · ebea367 · ebea367
2 parents 9b24a55 + f8c94f0
commit ebea367
Show file tree

Hide file tree

Showing 15 changed files with 610 additions and 34 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,97 @@
+name: Bug report
+description: Submit a bug report
+title: "[BUG] "
+labels: TRIAGE
+body:
+  - type: markdown
+    attributes:
+      value: "# Bug report"
+  - type: markdown
+    attributes:
+      value: Thank you for reporting a bug and helping us improve Cunumeric!
+  - type: markdown
+    attributes:
+      value: >
+        Please fill out all of the required information.
+  - type: markdown
+    attributes:
+      value: |
+        ---
+        ## Environment information
+  - type: textarea
+    id: legate_issue
+    attributes:
+      label: Software versions
+      description: >-
+        Run `legate-issue` and paste the output here.
+      placeholder: |
+        Python      :  3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0]
+        Platform    :  Linux-5.14.0-1042-oem-x86_64-with-glibc2.31
+        Legion      :  v23.11.00.dev-16-g2499f878
+        Legate      :  23.11.00.dev+17.gb7b50313
+        Cunumeric   :  (ImportError: cannot import name 'LogicalArray' from 'legate.core')
+        Numpy       :  1.24.4
+        Scipy       :  1.10.1
+        Numba       :  (not installed)
+        CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge)
+        GPU Driver  : 515.65.01
+        GPU Devices :
+          GPU 0: Quadro RTX 8000
+          GPU 1: Quadro RTX 8000
+    validations:
+      required: true
+  - type: input
+    id: jupyter
+    attributes:
+      label: Jupyter notebook / Jupyter Lab version
+      description: >-
+        Please supply if the issue you are reporting is related to Jupyter
+        notebook or Jupyter Lab.
+    validations:
+      required: false
+  - type: markdown
+    attributes:
+      value: |
+        ## Issue details
+  - type: textarea
+    id: expected-behavior
+    attributes:
+      label: Expected behavior
+      description: What did you expect to happen?
+    validations:
+      required: true
+  - type: textarea
+    id: observed-behavior
+    attributes:
+      label: Observed behavior
+      description: What did actually happen?
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: |
+        ## Directions to reproduce
+  - type: textarea
+    id: example
+    attributes:
+      label: Example code or instructions
+      description: >
+        Please provide detailed instructions to reproduce the issue. Ideally this includes a 
+        [Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example)
+        given here or as a link to code in another repository. 
+      render: Python
+    validations:
+      required: true
+  - type: markdown
+    attributes:
+      value: |
+        ## Additional information
+  - type: textarea
+    id: traceback-console
+    attributes:
+      label: Stack traceback or browser console output
+      description: >
+        Add any error messages or logs that might be helpful in reproducing and
+        identifying the bug, for example a Python stack traceback.
+    validations:
+      required: false
diff --git a/README.md b/README.md
@@ -40,15 +40,15 @@ If you have questions, please contact us at legate(at)nvidia.com.
 cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric):
 
 ```
-conda install -c nvidia -c conda-forge -c legate cunumeric
+mamba install -c nvidia -c conda-forge -c legate cunumeric
 ```
 
 Only linux-64 packages are available at the moment.
 
 The default package contains GPU support, and is compatible with CUDA >= 11.8
 (CUDA driver version >= r520), and Volta or later GPU architectures. There are
-also CPU-only packages available, and will be automatically selected by `conda`
-when installing on a machine without GPUs.
+also CPU-only packages available, and will be automatically selected when
+installing on a machine without GPUs.
 
 See the build instructions at https://nv-legate.github.io/cunumeric for details
 about building cuNumeric from source.
@@ -119,7 +119,7 @@ with cuNumeric going forward:
   new features to cuNumeric.
 * We plan to add support for sharded file I/O for loading and
   storing large data sets that could never be loaded on a single node.
-  Initially this will begin with native support for [h5py](https://www.h5py.org/)
+  Initially this will begin with native support for hdf5 and zarr,
   but will grow to accommodate other formats needed by our lighthouse
   applications.
 * Strong scaling: while cuNumeric is currently implemented in a way that

diff --git a/cmake/versions.json b/cmake/versions.json
@@ -5,7 +5,7 @@
       "git_url" : "https://github.com/nv-legate/legate.core.git",
       "git_shallow": false,
       "always_download": false,
-      "git_tag" : "a1b653e7836b6e0d9b41bf63b41c34de7131db25"
+      "git_tag" : "a4b5430ebb2c52e3f8da8f27534bc0db8826b804"
     }
   }
 }
diff --git a/cunumeric/array.py b/cunumeric/array.py
@@ -54,7 +54,13 @@
 from .coverage import FALLBACK_WARNING, clone_class, is_implemented
 from .runtime import runtime
 from .types import NdShape
-from .utils import deep_apply, dot_modes, to_core_dtype
+from .utils import (
+    calculate_volume,
+    deep_apply,
+    dot_modes,
+    to_core_dtype,
+    tuple_pop,
+)
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -3079,12 +3085,40 @@ def max(
             where=where,
         )
 
+    def _summation_dtype(
+        self, dtype: Optional[np.dtype[Any]]
+    ) -> np.dtype[Any]:
+        # Pick our dtype if it wasn't picked yet
+        if dtype is None:
+            if self.dtype.kind != "f" and self.dtype.kind != "c":
+                return np.dtype(np.float64)
+            else:
+                return self.dtype
+        return dtype
+
+    def _normalize_summation(
+        self, sum_array: Any, axis: Any, dtype: np.dtype[Any], ddof: int = 0
+    ) -> None:
+        if axis is None:
+            divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof
+        else:
+            divisor = self.shape[axis] - ddof
+
+        # Divide by the number of things in the collapsed dimensions
+        # Pick the right kinds of division based on the dtype
+        if dtype.kind == "f" or dtype.kind == "c":
+            sum_array.__itruediv__(
+                np.array(divisor, dtype=sum_array.dtype),
+            )
+        else:
+            sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))
+
     @add_boilerplate()
     def mean(
         self,
         axis: Any = None,
-        dtype: Union[np.dtype[Any], None] = None,
-        out: Union[ndarray, None] = None,
+        dtype: Optional[np.dtype[Any]] = None,
+        out: Optional[ndarray] = None,
         keepdims: bool = False,
     ) -> ndarray:
         """a.mean(axis=None, dtype=None, out=None, keepdims=False)
@@ -3105,14 +3139,11 @@ def mean(
         if axis is not None and not isinstance(axis, int):
             raise NotImplementedError(
                 "cunumeric.mean only supports int types for "
-                "'axis' currently"
+                "`axis` currently"
             )
-        # Pick our dtype if it wasn't picked yet
-        if dtype is None:
-            if self.dtype.kind != "f" and self.dtype.kind != "c":
-                dtype = np.dtype(np.float64)
-            else:
-                dtype = self.dtype
+
+        dtype = self._summation_dtype(dtype)
+
         # Do the sum
         if out is not None and out.dtype == dtype:
             sum_array = self.sum(
@@ -3127,18 +3158,9 @@ def mean(
                 dtype=dtype,
                 keepdims=keepdims,
             )
-        if axis is None:
-            divisor = reduce(lambda x, y: x * y, self.shape, 1)
-        else:
-            divisor = self.shape[axis]
-        # Divide by the number of things in the collapsed dimensions
-        # Pick the right kinds of division based on the dtype
-        if dtype.kind == "f" or dtype.kind == "c":
-            sum_array.__itruediv__(
-                np.array(divisor, dtype=sum_array.dtype),
-            )
-        else:
-            sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))
+
+        self._normalize_summation(sum_array, axis, dtype)
+
         # Convert to the output we didn't already put it there
         if out is not None and sum_array is not out:
             assert out.dtype != sum_array.dtype
@@ -3147,6 +3169,91 @@ def mean(
         else:
             return sum_array
 
+    @add_boilerplate()
+    def var(
+        self,
+        axis: Optional[Union[int, tuple[int, ...]]] = None,
+        dtype: Optional[np.dtype[Any]] = None,
+        out: Optional[ndarray] = None,
+        ddof: int = 0,
+        keepdims: bool = False,
+        *,
+        where: Union[bool, ndarray] = True,
+    ) -> ndarray:
+        """a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False)
+
+        Returns the variance of the array elements along given axis.
+
+        Refer to :func:`cunumeric.var` for full documentation.
+
+        See Also
+        --------
+        cunumeric.var : equivalent function
+
+        Availability
+        --------
+        Multiple GPUs, Multiple CPUs
+
+        """
+        if axis is not None and not isinstance(axis, int):
+            raise NotImplementedError(
+                "cunumeric.var only supports int types for `axis` currently"
+            )
+
+        # this could be computed as a single pass through the array
+        # by computing both <x^2> and <x> and then computing <x^2> - <x>^2.
+        # this would takee the difference of two large numbers and is unstable
+        # the mean needs to be computed first and the variance computed
+        # directly as <(x-mu)^2>, which then requires two passes through the
+        # data to first compute the mean and then compute the variance
+        # see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        # TODO(https://github.com/nv-legate/cunumeric/issues/590)
+
+        dtype = self._summation_dtype(dtype)
+        # calculate the mean, but keep the dimensions so that the
+        # mean can be broadcast against the original array
+        mu = self.mean(axis=axis, dtype=dtype, keepdims=True)
+
+        # 1D arrays (or equivalent) should benefit from this unary reduction:
+        #
+        if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1:
+            # this is a scalar reduction and we can optimize this as a single
+            # pass through a scalar reduction
+            result = self._perform_unary_reduction(
+                UnaryRedCode.VARIANCE,
+                self,
+                axis=axis,
+                dtype=dtype,
+                out=out,
+                keepdims=keepdims,
+                where=where,
+                args=(mu,),
+            )
+        else:
+            # TODO(https://github.com/nv-legate/cunumeric/issues/591)
+            # there isn't really support for generic binary reductions
+            # right now all of the current binary reductions are boolean
+            # reductions like allclose. To implement this a single pass would
+            # require a variant of einsum/dot that produces
+            # (self-mu)*(self-mu) rather than self*mu. For now, we have to
+            # compute delta = self-mu in a first pass and then compute
+            # delta*delta in second pass
+            delta = self - mu
+
+            result = self._perform_unary_reduction(
+                UnaryRedCode.SUM_SQUARES,
+                delta,
+                axis=axis,
+                dtype=dtype,
+                out=out,
+                keepdims=keepdims,
+                where=where,
+            )
+
+        self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof)
+
+        return result
+
     @add_boilerplate()
     def min(
         self,

diff --git a/cunumeric/config.py b/cunumeric/config.py
@@ -187,6 +187,8 @@ class _CunumericSharedLib:
     CUNUMERIC_RED_NANSUM: int
     CUNUMERIC_RED_PROD: int
     CUNUMERIC_RED_SUM: int
+    CUNUMERIC_RED_SUM_SQUARES: int
+    CUNUMERIC_RED_VARIANCE: int
     CUNUMERIC_REPEAT: int
     CUNUMERIC_SCALAR_UNARY_RED: int
     CUNUMERIC_SCAN_GLOBAL: int
@@ -452,6 +454,8 @@ class UnaryRedCode(IntEnum):
     NANSUM = _cunumeric.CUNUMERIC_RED_NANSUM
     PROD = _cunumeric.CUNUMERIC_RED_PROD
     SUM = _cunumeric.CUNUMERIC_RED_SUM
+    SUM_SQUARES = _cunumeric.CUNUMERIC_RED_SUM_SQUARES
+    VARIANCE = _cunumeric.CUNUMERIC_RED_VARIANCE
 
 
 # Match these to CuNumericBinaryOpCode in cunumeric_c.h

diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py
@@ -159,6 +159,8 @@ def __init__(
 
 _UNARY_RED_TO_REDUCTION_OPS: Dict[int, int] = {
     UnaryRedCode.SUM: ReductionOp.ADD,
+    UnaryRedCode.SUM_SQUARES: ReductionOp.ADD,
+    UnaryRedCode.VARIANCE: ReductionOp.ADD,
     UnaryRedCode.PROD: ReductionOp.MUL,
     UnaryRedCode.MAX: ReductionOp.MAX,
     UnaryRedCode.MIN: ReductionOp.MIN,
@@ -209,6 +211,8 @@ def min_identity(
 
 _UNARY_RED_IDENTITIES: Dict[UnaryRedCode, Callable[[Any], Any]] = {
     UnaryRedCode.SUM: lambda _: 0,
+    UnaryRedCode.SUM_SQUARES: lambda _: 0,
+    UnaryRedCode.VARIANCE: lambda _: 0,
     UnaryRedCode.PROD: lambda _: 1,
     UnaryRedCode.MIN: min_identity,
     UnaryRedCode.MAX: max_identity,

diff --git a/cunumeric/eager.py b/cunumeric/eager.py
@@ -1524,6 +1524,26 @@ def unary_reduction(
                 else where.array,
                 **kws,
             )
+        elif op == UnaryRedCode.SUM_SQUARES:
+            squared = np.square(rhs.array)
+            np.sum(
+                squared,
+                out=self.array,
+                axis=orig_axis,
+                where=where,
+                keepdims=keepdims,
+            )
+        elif op == UnaryRedCode.VARIANCE:
+            (mu,) = args
+            centered = np.subtract(rhs.array, mu)
+            squares = np.square(centered)
+            np.sum(
+                squares,
+                axis=orig_axis,
+                where=where,
+                keepdims=keepdims,
+                out=self.array,
+            )
         elif op == UnaryRedCode.CONTAINS:
             self.array.fill(args[0] in rhs.array)
         elif op == UnaryRedCode.COUNT_NONZERO:
@@ -1595,7 +1615,7 @@ def where(self, rhs1: Any, rhs2: Any, rhs3: Any) -> None:
         if self.deferred is not None:
             self.deferred.where(rhs1, rhs2, rhs3)
         else:
-            self.array[:] = np.where(rhs1.array, rhs2.array, rhs3.array)
+            self.array[...] = np.where(rhs1.array, rhs2.array, rhs3.array)
 
     def argwhere(self) -> NumPyThunk:
         if self.deferred is not None: