Skip to content

Commit

Permalink
Merge branch 'branch-24.03' into numba-soa-fix-typing
Browse files Browse the repository at this point in the history
  • Loading branch information
gmarkall committed May 1, 2024
2 parents 5bbe4d6 + 8de3a95 commit 966111a
Show file tree
Hide file tree
Showing 31 changed files with 2,043 additions and 252 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/gh-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
options: -u root
image: "${{ inputs.image }}"
env:
CUDA_VERSION: "12.0"
CUDA_VERSION: "12.2"
CUDA_VERSION_MAJOR: "12"
CUDA_VERSION_MINOR: "0"
CUDA_VERSION_MINOR: "2"
SCCACHE_REGION: "us-east-2"
SCCACHE_BUCKET: "rapids-sccache-devs"
SCCACHE_S3_KEY_PREFIX: "legate-cunumeric-dev"
Expand Down
33 changes: 19 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,33 +37,38 @@ If you have questions, please contact us at legate(at)nvidia.com.

## Installation

cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric).
Create a new environment containing cuNumeric:
cuNumeric is available from [conda](https://docs.conda.io/projects/conda/en/latest/index.html)
on the [legate channel](https://anaconda.org/legate/cunumeric).
Please make sure you have at least conda version 24.1 installed, then create
a new environment containing cuNumeric:

```
mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric
conda create -n myenv -c nvidia -c conda-forge -c legate cunumeric
```

or install it into an existing environment:

```
mamba install -c nvidia -c conda-forge -c legate cunumeric
conda install -c nvidia -c conda-forge -c legate cunumeric
```

Only linux-64 packages are available at the moment.

The default package contains GPU support, and is compatible with CUDA >= 11.8
(CUDA driver version >= r520), and Volta or later GPU architectures. There are
also CPU-only packages available, and will be automatically selected when
installing on a machine without GPUs. You can force installation of a CPU-only
package by requesting it as follows:
Once installed, you can verify the installation by running one of the examples
from the cuNumeric repository, for instance:

```
mamba ... cunumeric=*=*_cpu
$ legate examples/black_scholes.py
Running black scholes on 10K options...
Elapsed Time: 129.017 ms
```

See the build instructions at https://nv-legate.github.io/cunumeric for details
about building cuNumeric from source.
Only linux-64 packages are available at the moment.

The default package contains GPU support, and is compatible with CUDA >= 11.8
(driver >= 520), and Volta or later GPU architectures. There are also CPU-only
packages available, which will be automatically selected when installing on a
machine without GPUs available. See https://nv-legate.github.io/cunumeric for
details about manually forcing different install configurations, or building
cuNumeric from source.

## Usage and Execution

Expand Down
2 changes: 1 addition & 1 deletion cmake/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"git_url" : "https://github.com/nv-legate/legate.core.git",
"git_shallow": false,
"always_download": false,
"git_tag" : "08da13fc544f3db26bf1ef7ce9bdb85e72a9d9fb"
"git_tag" : "0f509a007f36609d2b0bd536d8e5c54f2391b444"
}
}
}
4 changes: 2 additions & 2 deletions continuous_integration/scripts/test-cunumeric
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ setup_env() {
}

setup_test_env() {
mamba install -y "clang-tools>=8" "clang>=8" colorama coverage mock pre-commit pytest-cov pytest-lazy-fixture pytest-mock pytest types-docutils pynvml
mamba install -y "clang-tools>=8" "clang>=8" colorama coverage mock pre-commit pytest-cov pytest-lazy-fixture pytest-mock "pytest<8" types-docutils pynvml

pip install tifffile
}
Expand Down Expand Up @@ -58,4 +58,4 @@ test-cunumeric() {
esac
}

(test-cunumeric "$@");
(test-cunumeric "$@");
2 changes: 2 additions & 0 deletions cunumeric/_ufunc/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@
"logical_and",
BinaryOpCode.LOGICAL_AND,
relation_types_of(all_dtypes),
red_code=UnaryRedCode.ALL,
)

logical_or = create_binary_ufunc(
"Compute the truth value of x1 OR x2 element-wise.",
"logical_or",
BinaryOpCode.LOGICAL_OR,
relation_types_of(all_dtypes),
red_code=UnaryRedCode.ANY,
)

logical_xor = create_binary_ufunc(
Expand Down
11 changes: 11 additions & 0 deletions cunumeric/_ufunc/ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,16 @@ def reduce(
f"reduction for {self} is not yet implemented"
)

if self._op_code in [
BinaryOpCode.LOGICAL_AND,
BinaryOpCode.LOGICAL_OR,
]:
res_dtype = bool
if dtype is not None:
raise TypeError("Cannot set dtype on a logical reduction")
else:
res_dtype = None

# NumPy seems to be using None as the default axis value for scalars
if array.ndim == 0 and axis == 0:
axis = None
Expand All @@ -767,6 +777,7 @@ def reduce(
keepdims=keepdims,
initial=initial,
where=where,
res_dtype=res_dtype,
)


Expand Down
14 changes: 14 additions & 0 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2257,6 +2257,20 @@ def clip(
Multiple GPUs, Multiple CPUs
"""
min = (
min
if min is not None
else np.iinfo(self.dtype).min
if self.dtype.kind == "i"
else -np.inf
)
max = (
max
if max is not None
else np.iinfo(self.dtype).max
if self.dtype.kind == "i"
else np.inf
)
args = (
np.array(min, dtype=self.dtype),
np.array(max, dtype=self.dtype),
Expand Down
30 changes: 18 additions & 12 deletions cunumeric/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def _copy_if_overlapping(self, other: DeferredArray) -> DeferredArray:
self.runtime.create_empty_thunk(
self.shape,
self.base.type,
inputs=[self],
),
)
copy.copy(self, deep=True)
Expand Down Expand Up @@ -1098,22 +1099,20 @@ def set_item(self, key: Any, rhs: Any) -> None:
# to set the result back. In cuNumeric, the object we
# return in step (1) is actually a subview to the array arr
# through which we make updates in place, so after step (2) is
# done, # the effect of inplace update is already reflected
# done, the effect of inplace update is already reflected
# to the arr. Therefore, we skip the copy to avoid redundant
# copies if we know that we hit such a scenario.
# TODO: We should make this work for the advanced indexing case
if view.base == rhs.base:
# NOTE: Neither Store nor Storage have an __eq__, so we can
# only check that the underlying RegionField/Future corresponds
# to the same Legion handle.
if (
view.base.has_storage
and rhs.base.has_storage
and view.base.storage.same_handle(rhs.base.storage)
):
return

if view.base.overlaps(rhs.base):
rhs_copy = self.runtime.create_empty_thunk(
rhs.shape,
rhs.base.type,
inputs=[rhs],
)
rhs_copy.copy(rhs, deep=False)
rhs = rhs_copy

view.copy(rhs, deep=False)

def broadcast_to(self, shape: NdShape) -> NumPyThunk:
Expand Down Expand Up @@ -1870,6 +1869,9 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None:

assert indices.size == values.size

# Handle store overlap
values = values._copy_if_overlapping(self_tmp)

# first, we create indirect array with PointN type that
# (indices.size,) shape and is used to copy data from values
# to the target ND array (self)
Expand Down Expand Up @@ -1910,11 +1912,12 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None:
@auto_convert("mask", "values")
def putmask(self, mask: Any, values: Any) -> None:
assert self.shape == mask.shape

values = values._copy_if_overlapping(self)
if values.shape != self.shape:
values_new = values._broadcast(self.shape)
else:
values_new = values.base

task = self.context.create_auto_task(CuNumericOpCode.PUTMASK)
task.add_input(self.base)
task.add_input(mask.base)
Expand Down Expand Up @@ -3142,6 +3145,7 @@ def unary_op(
multiout: Optional[Any] = None,
) -> None:
lhs = self.base
src = src._copy_if_overlapping(self)
rhs = src._broadcast(lhs.shape)

with Annotation({"OpCode": op.name}):
Expand Down Expand Up @@ -3304,7 +3308,9 @@ def binary_op(
args: Any,
) -> None:
lhs = self.base
src1 = src1._copy_if_overlapping(self)
rhs1 = src1._broadcast(lhs.shape)
src2 = src2._copy_if_overlapping(self)
rhs2 = src2._broadcast(lhs.shape)

with Annotation({"OpCode": op_code.name}):
Expand Down
2 changes: 0 additions & 2 deletions cunumeric/linalg/cholesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def transpose_copy_single(
task.add_input(input)
# Output has the same shape as input, but is mapped
# to a column major instance
task.add_scalar_arg(False, ty.bool_)

task.add_broadcast(output)
task.add_broadcast(input)
Expand All @@ -62,7 +61,6 @@ def transpose_copy(
task.add_input(p_input)
# Output has the same shape as input, but is mapped
# to a column major instance
task.add_scalar_arg(False, ty.bool_)

task.execute()

Expand Down
Loading

0 comments on commit 966111a

Please sign in to comment.