Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-24.01' into clean_diagonal
Browse files Browse the repository at this point in the history
  • Loading branch information
ipdemes committed Nov 27, 2023
2 parents 82dbb6d + 0474c7d commit 99fc3f0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@ If you have questions, please contact us at legate(at)nvidia.com.

## Installation

cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric):
cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric).
Create a new environment containing cuNumeric:

```
mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric
```

or install it into an existing environment:

```
mamba install -c nvidia -c conda-forge -c legate cunumeric
Expand All @@ -48,7 +55,12 @@ Only linux-64 packages are available at the moment.
The default package contains GPU support, and is compatible with CUDA >= 11.8
(CUDA driver version >= r520), and Volta or later GPU architectures. There are
also CPU-only packages available, and will be automatically selected when
installing on a machine without GPUs.
installing on a machine without GPUs. You can force installation of a CPU-only
package by requesting it as follows:

```
mamba ... cunumeric=*=*_cpu
```

See the build instructions at https://nv-legate.github.io/cunumeric for details
about building cuNumeric from source.
Expand Down
14 changes: 10 additions & 4 deletions src/cunumeric/matrix/batched_cholesky_template.inl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct _cholesky_supported {

template <VariantKind KIND>
struct BatchedCholeskyImpl {
template <Type::Code CODE, int DIM>
template <Type::Code CODE, int32_t DIM, std::enable_if_t<(DIM > 2)>* = nullptr>
void operator()(Array& input_array, Array& output_array) const
{
using VAL = legate_type_of<CODE>;
Expand Down Expand Up @@ -94,16 +94,16 @@ struct BatchedCholeskyImpl {

if (shape.empty()) return;

int num_blocks = 1;
for (int i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); }
int32_t num_blocks = 1;
for (int32_t i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); }

auto m = static_cast<int32_t>(shape.hi[DIM - 2] - shape.lo[DIM - 2] + 1);
auto n = static_cast<int32_t>(shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1);
assert(m > 0 && n > 0);

auto block_stride = m * n;

for (int i = 0; i < num_blocks; ++i) {
for (int32_t i = 0; i < num_blocks; ++i) {
if constexpr (_cholesky_supported<CODE>::value) {
CopyBlockImpl<KIND>()(output, input, sizeof(VAL) * block_stride);
PotrfImplBody<KIND, CODE>()(output, m, n);
Expand All @@ -119,6 +119,12 @@ struct BatchedCholeskyImpl {
}
}
}

template <Type::Code CODE, int32_t DIM, std::enable_if_t<DIM <= 2>* = nullptr>
void operator()(Array& input_array, Array& output_array) const
{
assert(false);
}
};

template <VariantKind KIND>
Expand Down

0 comments on commit 99fc3f0

Please sign in to comment.