From a5aa130a6a053cdf640ef5e116ad73d06cabfae7 Mon Sep 17 00:00:00 2001
From: Manolis Papadakis <manopapad@gmail.com>
Date: Wed, 22 Nov 2023 16:06:52 -0800
Subject: [PATCH 1/2] Add more details to conda install process (#1083)

---
 README.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 7516331ff..cec00b052 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,14 @@ If you have questions, please contact us at legate(at)nvidia.com.
 
 ## Installation
 
-cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric):
+cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric).
+Create a new environment containing cuNumeric:
+
+```
+mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric
+```
+
+or install it into an existing environment:
 
 ```
 mamba install -c nvidia -c conda-forge -c legate cunumeric
@@ -48,7 +55,12 @@ Only linux-64 packages are available at the moment.
 The default package contains GPU support, and is compatible with CUDA >= 11.8
 (CUDA driver version >= r520), and Volta or later GPU architectures. There are
 also CPU-only packages available, and will be automatically selected when
-installing on a machine without GPUs.
+installing on a machine without GPUs. You can force installation of a CPU-only
+package by requesting it as follows:
+
+```
+mamba ... cunumeric=*=*_cpu
+```
 
 See the build instructions at https://nv-legate.github.io/cunumeric for details
 about building cuNumeric from source.

From 0474c7d2d80103db13823289f2bac3173ae87726 Mon Sep 17 00:00:00 2001
From: Manolis Papadakis <manopapad@gmail.com>
Date: Wed, 22 Nov 2023 21:48:57 -0800
Subject: [PATCH 2/2] Don't instantiate template for low DIM counts, to fix a
 warning (#1077)

* Don't instantiate template for low DIM counts, to fix a warning

* Reverse the order on the comparison
---
 src/cunumeric/matrix/batched_cholesky_template.inl | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/cunumeric/matrix/batched_cholesky_template.inl b/src/cunumeric/matrix/batched_cholesky_template.inl
index 8d266e3f0..d27f25e7a 100644
--- a/src/cunumeric/matrix/batched_cholesky_template.inl
+++ b/src/cunumeric/matrix/batched_cholesky_template.inl
@@ -57,7 +57,7 @@ struct _cholesky_supported {
 
 template <VariantKind KIND>
 struct BatchedCholeskyImpl {
-  template <Type::Code CODE, int DIM>
+  template <Type::Code CODE, int32_t DIM, std::enable_if_t<(DIM > 2)>* = nullptr>
   void operator()(Array& input_array, Array& output_array) const
   {
     using VAL = legate_type_of<CODE>;
@@ -94,8 +94,8 @@ struct BatchedCholeskyImpl {
 
     if (shape.empty()) return;
 
-    int num_blocks = 1;
-    for (int i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); }
+    int32_t num_blocks = 1;
+    for (int32_t i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); }
 
     auto m = static_cast<int32_t>(shape.hi[DIM - 2] - shape.lo[DIM - 2] + 1);
     auto n = static_cast<int32_t>(shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1);
@@ -103,7 +103,7 @@ struct BatchedCholeskyImpl {
 
     auto block_stride = m * n;
 
-    for (int i = 0; i < num_blocks; ++i) {
+    for (int32_t i = 0; i < num_blocks; ++i) {
       if constexpr (_cholesky_supported<CODE>::value) {
         CopyBlockImpl<KIND>()(output, input, sizeof(VAL) * block_stride);
         PotrfImplBody<KIND, CODE>()(output, m, n);
@@ -119,6 +119,12 @@ struct BatchedCholeskyImpl {
       }
     }
   }
+
+  template <Type::Code CODE, int32_t DIM, std::enable_if_t<DIM <= 2>* = nullptr>
+  void operator()(Array& input_array, Array& output_array) const
+  {
+    assert(false);
+  }
 };
 
 template <VariantKind KIND>