diff --git a/.github/actions/download-artifacts/action.yml b/.github/actions/download-artifacts/action.yml index e8019b1b1..640dc143a 100644 --- a/.github/actions/download-artifacts/action.yml +++ b/.github/actions/download-artifacts/action.yml @@ -25,7 +25,7 @@ runs: repo: nv-legate/legate.core commit: ${{ inputs.git_sha }} workflow_conclusion: success - workflow: "ci-gh-${{ inputs.device }}-build-and-test.yml" + workflow: "ci-gh.yml" name: "legate.core-${{ inputs.device }}-[0-9a-z]{40}" name_is_regexp: true diff --git a/README.md b/README.md index 7516331ff..cec00b052 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,14 @@ If you have questions, please contact us at legate(at)nvidia.com. ## Installation -cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric): +cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric). +Create a new environment containing cuNumeric: + +``` +mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric +``` + +or install it into an existing environment: ``` mamba install -c nvidia -c conda-forge -c legate cunumeric @@ -48,7 +55,12 @@ Only linux-64 packages are available at the moment. The default package contains GPU support, and is compatible with CUDA >= 11.8 (CUDA driver version >= r520), and Volta or later GPU architectures. There are also CPU-only packages available, and will be automatically selected when -installing on a machine without GPUs. +installing on a machine without GPUs. You can force installation of a CPU-only +package by requesting it as follows: + +``` +mamba ... cunumeric=*=*_cpu +``` See the build instructions at https://nv-legate.github.io/cunumeric for details about building cuNumeric from source. diff --git a/cmake/versions.json b/cmake/versions.json index 43d60fa5e..9441e93d8 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "6fa0acc9dcfa89be2702f1de6c045bc262f752b1" + "git_tag" : "c05ebdab30dee6ac2d6e2808fb835fad0302822d" } } } diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index c652d931b..85ea88d74 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -116,11 +116,12 @@ requirements: - cuda-nvcc ={{ cuda_version }} - cuda-cccl ={{ cuda_version }} - cuda-cudart ={{ cuda_version }} + - cuda-cudart-static ={{ cuda_version }} - cuda-driver-dev ={{ cuda_version }} - cuda-cudart-dev ={{ cuda_version }} - cuda-nvtx ={{ cuda_version }} # - libcutensor-dev >=1.3 - - cutensor >=1.3 =*_* + - cutensor >=1.3,<2.0 =*_* - libcublas-dev - libcusolver-dev - libcufft-dev @@ -140,6 +141,8 @@ requirements: - libcublas - libcusolver >=11.4.1.48-0 - libcufft + - libnvjitlink + - libcusparse {% endif %} - opt_einsum >=3.3 - scipy diff --git a/cunumeric/array.py b/cunumeric/array.py index 6c226d747..3da80916c 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -2532,10 +2532,9 @@ def _diag_helper( def diagonal( self, offset: int = 0, - axis1: Any = None, - axis2: Any = None, + axis1: int = 0, + axis2: int = 1, extract: bool = True, - axes: Any = None, ) -> ndarray: """a.diagonal(offset=0, axis1=None, axis2=None) @@ -2557,19 +2556,7 @@ def diagonal( raise ValueError("extract can be true only for Ndim >=2") axes = None else: - if isinstance(axis1, int) and isinstance(axis2, int): - if axes is not None: - raise ValueError( - "Either axis1/axis2 or axes must be supplied" - ) - axes = (axis1, axis2) - # default values for axes - elif (axis1 is None) and (axis2 is None) and (axes is None): - axes = (0, 1) - elif (axes is not None) and ( - (axis1 is not None) or (axis2 is not None) - ): - raise ValueError("Either axis1/axis2 or axes must be supplied") + axes = (axis1, axis2) return self._diag_helper(offset=offset, axes=axes, extract=extract) @add_boilerplate("indices", "values") diff --git a/cunumeric/module.py b/cunumeric/module.py index d37ea4183..45d7508b1 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -3806,10 +3806,9 @@ def compress( def diagonal( a: ndarray, offset: int = 0, - axis1: Optional[int] = None, - axis2: Optional[int] = None, + axis1: int = 0, + axis2: int = 1, extract: bool = True, - axes: Optional[tuple[int, int]] = None, ) -> ndarray: """ diagonal(a: ndarray, offset=0, axis1=None, axis2=None) @@ -3868,9 +3867,7 @@ def diagonal( Multiple GPUs, Multiple CPUs """ - return a.diagonal( - offset=offset, axis1=axis1, axis2=axis2, extract=extract, axes=axes - ) + return a.diagonal(offset=offset, axis1=axis1, axis2=axis2, extract=extract) @add_boilerplate("a", "indices", "values") diff --git a/src/cunumeric/matrix/batched_cholesky_template.inl b/src/cunumeric/matrix/batched_cholesky_template.inl index 8d266e3f0..d27f25e7a 100644 --- a/src/cunumeric/matrix/batched_cholesky_template.inl +++ b/src/cunumeric/matrix/batched_cholesky_template.inl @@ -57,7 +57,7 @@ struct _cholesky_supported { template struct BatchedCholeskyImpl { - template + template 2)>* = nullptr> void operator()(Array& input_array, Array& output_array) const { using VAL = legate_type_of; @@ -94,8 +94,8 @@ struct BatchedCholeskyImpl { if (shape.empty()) return; - int num_blocks = 1; - for (int i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); } + int32_t num_blocks = 1; + for (int32_t i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); } auto m = static_cast(shape.hi[DIM - 2] - shape.lo[DIM - 2] + 1); auto n = static_cast(shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1); @@ -103,7 +103,7 @@ struct BatchedCholeskyImpl { auto block_stride = m * n; - for (int i = 0; i < num_blocks; ++i) { + for (int32_t i = 0; i < num_blocks; ++i) { if constexpr (_cholesky_supported::value) { CopyBlockImpl()(output, input, sizeof(VAL) * block_stride); PotrfImplBody()(output, m, n); @@ -119,6 +119,12 @@ struct BatchedCholeskyImpl { } } } + + template * = nullptr> + void operator()(Array& input_array, Array& output_array) const + { + assert(false); + } }; template diff --git a/tests/integration/test_convolve.py b/tests/integration/test_convolve.py index 7a318668d..997a9d6f7 100644 --- a/tests/integration/test_convolve.py +++ b/tests/integration/test_convolve.py @@ -13,6 +13,8 @@ # limitations under the License. # +import os + import numpy as np import pytest import scipy.signal as sig @@ -20,8 +22,57 @@ import cunumeric as num -SHAPES = [(100,), (10, 10), (10, 10, 10)] -FILTER_SHAPES = [(5,), (3, 5), (3, 5, 3)] +CUDA_TEST = os.environ.get("LEGATE_NEED_CUDA") == "1" + +SHAPES = [(100,), (10, 10), (10, 10, 10), (32, 2, 32)] +FILTER_SHAPES = [(5,), (3, 5), (3, 5, 3), (32, 1, 32)] + +LARGE_SHAPES = [ + pytest.param( + (128, 2, 1024), + (64, 2, 512), + marks=pytest.mark.xfail( + not CUDA_TEST, run=False, reason="test hang on CPU variants" + ), + ), + pytest.param( + (1024, 2, 4096), + (128, 16, 64), + marks=pytest.mark.xfail( + not CUDA_TEST, run=False, reason="test hang on CPU variants" + ), + ), + pytest.param( + (1024, 2, 1024), + (5, 1, 5), + marks=pytest.mark.xfail( + CUDA_TEST, run=False, reason="GPU variant hits SIGABRT" + ), + ), + pytest.param( + (1024, 2, 1024), + (128, 1, 128), + marks=pytest.mark.xfail( + run=False, reason="GPU variant hits SIGFPE, CPU hangs" + ), + ), +] + +DTYPES = [ + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, + np.complex64, + np.complex128, +] @pytest.mark.xfail @@ -68,7 +119,9 @@ def check_convolve(a, v): @pytest.mark.parametrize( - "shape, filter_shape", zip(SHAPES, FILTER_SHAPES), ids=str + "shape, filter_shape", + list(zip(SHAPES, FILTER_SHAPES)) + LARGE_SHAPES, + ids=str, ) def test_double(shape, filter_shape): a = num.random.rand(*shape) @@ -79,7 +132,9 @@ def test_double(shape, filter_shape): @pytest.mark.parametrize( - "shape, filter_shape", zip(SHAPES, FILTER_SHAPES), ids=str + "shape, filter_shape", + list(zip(SHAPES, FILTER_SHAPES)) + LARGE_SHAPES, + ids=str, ) def test_int(shape, filter_shape): a = num.random.randint(0, 5, shape) @@ -88,10 +143,11 @@ def test_int(shape, filter_shape): check_convolve(a, v) -def test_dtype(): +@pytest.mark.parametrize("dtype", DTYPES, ids=str) +def test_dtype(dtype): shape = (5,) * 2 arr1 = num.random.randint(0, 5, shape, dtype=np.int64) - arr2 = num.random.random(shape) + arr2 = num.random.random(shape).astype(dtype) out_num = num.convolve(arr1, arr2, mode="same") out_np = np.convolve(arr1, arr2, mode="same") assert allclose(out_num, out_np) diff --git a/tests/integration/test_index_routines.py b/tests/integration/test_index_routines.py index c86f62e97..4488b90b5 100644 --- a/tests/integration/test_index_routines.py +++ b/tests/integration/test_index_routines.py @@ -298,8 +298,8 @@ def test_diagonal(): num_array = mk_seq_array(num, a_shape) for num_axes in range(3, ndim + 1): for axes in permutations(range(ndim), num_axes): - res_num = num.diagonal( - num_array, offset=0, extract=True, axes=axes + res_num = num_array._diag_helper( + offset=0, extract=True, axes=axes ) res_ref = diagonal_reference(np_array, axes) assert np.array_equal(res_num, res_ref) @@ -424,33 +424,18 @@ def test_axes_none(self): with pytest.raises(TypeError): num.diagonal(self.a, 0, None, 0) - @pytest.mark.diff - def test_scalar_axes(self): - # NumPy does not have axes arg - with pytest.raises(ValueError): - num.diagonal(self.a, axes=(0,)) - - @pytest.mark.diff - def test_duplicate_axes(self): - # NumPy does not have axes arg - expected_exc = ValueError - with pytest.raises(expected_exc): - num.diagonal(self.a, axis1=1, axes=(0, 1)) - with pytest.raises(expected_exc): - num.diagonal(self.a, axis1=1, axis2=0, axes=(0, 1)) - @pytest.mark.diff def test_extra_axes(self): # NumPy does not have axes arg axes = num.arange(self.a.ndim + 1, dtype=int) with pytest.raises(ValueError): - num.diagonal(self.a, axes=axes) + self.a._diag_helper(self.a, axes=axes) @pytest.mark.diff def test_n_axes_offset(self): # NumPy does not have axes arg with pytest.raises(ValueError): - num.diagonal(self.a, offset=1, axes=(2, 1, 0)) + self.a._diag_helper(offset=1, axes=(2, 1, 0)) @pytest.mark.parametrize( "k",