From a5aa130a6a053cdf640ef5e116ad73d06cabfae7 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 22 Nov 2023 16:06:52 -0800 Subject: [PATCH 1/2] Add more details to conda install process (#1083) --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7516331ff..cec00b052 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,14 @@ If you have questions, please contact us at legate(at)nvidia.com. ## Installation -cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric): +cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric). +Create a new environment containing cuNumeric: + +``` +mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric +``` + +or install it into an existing environment: ``` mamba install -c nvidia -c conda-forge -c legate cunumeric @@ -48,7 +55,12 @@ Only linux-64 packages are available at the moment. The default package contains GPU support, and is compatible with CUDA >= 11.8 (CUDA driver version >= r520), and Volta or later GPU architectures. There are also CPU-only packages available, and will be automatically selected when -installing on a machine without GPUs. +installing on a machine without GPUs. You can force installation of a CPU-only +package by requesting it as follows: + +``` +mamba ... cunumeric=*=*_cpu +``` See the build instructions at https://nv-legate.github.io/cunumeric for details about building cuNumeric from source. From 0474c7d2d80103db13823289f2bac3173ae87726 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 22 Nov 2023 21:48:57 -0800 Subject: [PATCH 2/2] Don't instantiate template for low DIM counts, to fix a warning (#1077) * Don't instantiate template for low DIM counts, to fix a warning * Reverse the order on the comparison --- src/cunumeric/matrix/batched_cholesky_template.inl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/cunumeric/matrix/batched_cholesky_template.inl b/src/cunumeric/matrix/batched_cholesky_template.inl index 8d266e3f0..d27f25e7a 100644 --- a/src/cunumeric/matrix/batched_cholesky_template.inl +++ b/src/cunumeric/matrix/batched_cholesky_template.inl @@ -57,7 +57,7 @@ struct _cholesky_supported { template struct BatchedCholeskyImpl { - template + template 2)>* = nullptr> void operator()(Array& input_array, Array& output_array) const { using VAL = legate_type_of; @@ -94,8 +94,8 @@ struct BatchedCholeskyImpl { if (shape.empty()) return; - int num_blocks = 1; - for (int i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); } + int32_t num_blocks = 1; + for (int32_t i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); } auto m = static_cast(shape.hi[DIM - 2] - shape.lo[DIM - 2] + 1); auto n = static_cast(shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1); @@ -103,7 +103,7 @@ struct BatchedCholeskyImpl { auto block_stride = m * n; - for (int i = 0; i < num_blocks; ++i) { + for (int32_t i = 0; i < num_blocks; ++i) { if constexpr (_cholesky_supported::value) { CopyBlockImpl()(output, input, sizeof(VAL) * block_stride); PotrfImplBody()(output, m, n); @@ -119,6 +119,12 @@ struct BatchedCholeskyImpl { } } } + + template * = nullptr> + void operator()(Array& input_array, Array& output_array) const + { + assert(false); + } }; template