From 19672048f1c270ab74a038cb156699623e60e3a7 Mon Sep 17 00:00:00 2001
From: Jeremy <jwilke@nvidia.com>
Date: Fri, 18 Aug 2023 13:35:20 -0700
Subject: [PATCH] address PR comments

---
 cunumeric/linalg/cholesky.py                  | 24 +++++++++++--------
 src/cunumeric/matrix/batched_cholesky.cc      |  2 +-
 src/cunumeric/matrix/batched_cholesky.cu      |  2 +-
 src/cunumeric/matrix/batched_cholesky_omp.cc  |  2 +-
 .../matrix/batched_cholesky_template.inl      |  8 +++----
 tests/integration/test_cholesky.py            | 11 ++++++++-
 6 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py
index 983e771ed..5221556f4 100644
--- a/cunumeric/linalg/cholesky.py
+++ b/cunumeric/linalg/cholesky.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2022 NVIDIA Corporation
+# Copyright 2023 NVIDIA Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -208,13 +208,6 @@ def _batched_cholesky(output: DeferredArray, input: DeferredArray) -> None:
     # wildly varying memory available depending on the system.
     # Just use a fixed cutoff to provide some sensible warning.
     # TODO: find a better way to inform the user dims are too big
-    size = input.base.shape[-1]
-    if size > 32768:
-        raise NotImplementedError(
-            "batched cholesky is only valid"
-            " when the square submatrices fit"
-            f" on a single proc, n > {size} is too large"
-        )
     context = output.context
     task = context.create_auto_task(CuNumericOpCode.BATCHED_CHOLESKY)
     task.add_input(input.base)
@@ -229,16 +222,27 @@ def _batched_cholesky(output: DeferredArray, input: DeferredArray) -> None:
 def cholesky(
     output: DeferredArray, input: DeferredArray, no_tril: bool
 ) -> None:
+    runtime = output.runtime
+    context = output.context
     if len(input.base.shape) > 2:
         if no_tril:
             raise NotImplementedError(
                 "batched cholesky expects to only "
                 "produce the lower triangular matrix"
             )
+        size = input.base.shape[-1]
+        # Choose 32768 as dimension cutoff for warning
+        # so that for float64 anything larger than
+        # 8 GiB produces a warning
+        if size > 32768:
+            runtime.warn(
+                "batched cholesky is only valid"
+                " when the square submatrices fit"
+                f" on a single proc, n > {size} may be too large",
+                category=UserWarning,
+            )
         return _batched_cholesky(output, input)
 
-    runtime = output.runtime
-    context = output.context
     if runtime.num_procs == 1:
         transpose_copy_single(context, input.base, output.base)
         potrf_single(context, output.base)
diff --git a/src/cunumeric/matrix/batched_cholesky.cc b/src/cunumeric/matrix/batched_cholesky.cc
index 947edbdea..30dbe3c53 100644
--- a/src/cunumeric/matrix/batched_cholesky.cc
+++ b/src/cunumeric/matrix/batched_cholesky.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021-2022 NVIDIA Corporation
+/* Copyright 2023 NVIDIA Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/src/cunumeric/matrix/batched_cholesky.cu b/src/cunumeric/matrix/batched_cholesky.cu
index e71896e3b..26fe3058f 100644
--- a/src/cunumeric/matrix/batched_cholesky.cu
+++ b/src/cunumeric/matrix/batched_cholesky.cu
@@ -1,4 +1,4 @@
-/* Copyright 2021-2022 NVIDIA Corporation
+/* Copyright 2023 NVIDIA Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/src/cunumeric/matrix/batched_cholesky_omp.cc b/src/cunumeric/matrix/batched_cholesky_omp.cc
index db4fd8dfb..84b311ff2 100644
--- a/src/cunumeric/matrix/batched_cholesky_omp.cc
+++ b/src/cunumeric/matrix/batched_cholesky_omp.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021-2022 NVIDIA Corporation
+/* Copyright 2023 NVIDIA Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/src/cunumeric/matrix/batched_cholesky_template.inl b/src/cunumeric/matrix/batched_cholesky_template.inl
index 38d0af0a5..db16427d2 100644
--- a/src/cunumeric/matrix/batched_cholesky_template.inl
+++ b/src/cunumeric/matrix/batched_cholesky_template.inl
@@ -1,4 +1,4 @@
-/* Copyright 2021-2022 NVIDIA Corporation
+/* Copyright 2023 NVIDIA Corporation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,16 +64,16 @@ struct BatchedCholeskyImpl {
     auto shape = input_array.shape<DIM>();
     if (shape != output_array.shape<DIM>()) {
       throw legate::TaskException(
-        "Batched cholesky is not yet supported when input/output types differ");
+        "Batched cholesky is not supported when input/output shapes differ");
     }
 
-    if (shape.empty()) return;
-
     size_t strides[DIM];
 
     auto input  = input_array.read_accessor<VAL, DIM>(shape).ptr(shape, strides);
     auto output = output_array.write_accessor<VAL, DIM>(shape).ptr(shape, strides);
 
+    if (shape.empty()) return;
+
     // TODO: we need some sort of check here on the strides
     // This should be a dense thing.
 
diff --git a/tests/integration/test_cholesky.py b/tests/integration/test_cholesky.py
index 67ff6b1e4..8b869b553 100644
--- a/tests/integration/test_cholesky.py
+++ b/tests/integration/test_cholesky.py
@@ -1,4 +1,4 @@
-# Copyright 2021-2022 NVIDIA Corporation
+# Copyright 2023 NVIDIA Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -82,6 +82,15 @@ def test_batched_3d(n):
         assert allclose(correct, test)
 
 
+def test_batched_empty():
+    batch = 4
+    a = _get_real_symm_posdef(8)
+    a_batched = num.einsum("i,jk->ijk", np.arange(batch) + 1, a)
+    a_sliced = a_batched[0:0, :, :]
+    empty = num.linalg.cholesky(a_sliced)
+    assert empty.shape == a_sliced.shape
+
+
 @pytest.mark.parametrize("n", SIZES)
 def test_batched_4d(n):
     batch = 2