diff --git a/cmake/versions.json b/cmake/versions.json index 1e3d22f08..e38876207 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "08da13fc544f3db26bf1ef7ce9bdb85e72a9d9fb" + "git_tag" : "85c2a247a6b2c8086e57568ef0056045c3e175e3" } } } diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 9f9e46057..9d9fa963d 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -296,6 +296,7 @@ def _copy_if_overlapping(self, other: DeferredArray) -> DeferredArray: self.runtime.create_empty_thunk( self.shape, self.base.type, + inputs=[self], ), ) copy.copy(self, deep=True) @@ -1098,22 +1099,20 @@ def set_item(self, key: Any, rhs: Any) -> None: # to set the result back. In cuNumeric, the object we # return in step (1) is actually a subview to the array arr # through which we make updates in place, so after step (2) is - # done, # the effect of inplace update is already reflected + # done, the effect of inplace update is already reflected # to the arr. Therefore, we skip the copy to avoid redundant # copies if we know that we hit such a scenario. # TODO: We should make this work for the advanced indexing case - if view.base == rhs.base: + # NOTE: Neither Store nor Storage have an __eq__, so we can + # only check that the underlying RegionField/Future corresponds + # to the same Legion handle. + if ( + view.base.has_storage + and rhs.base.has_storage + and view.base.storage.same_handle(rhs.base.storage) + ): return - if view.base.overlaps(rhs.base): - rhs_copy = self.runtime.create_empty_thunk( - rhs.shape, - rhs.base.type, - inputs=[rhs], - ) - rhs_copy.copy(rhs, deep=False) - rhs = rhs_copy - view.copy(rhs, deep=False) def broadcast_to(self, shape: NdShape) -> NumPyThunk: @@ -1870,6 +1869,9 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: assert indices.size == values.size + # Handle store overlap + values = values._copy_if_overlapping(self_tmp) + # first, we create indirect array with PointN type that # (indices.size,) shape and is used to copy data from values # to the target ND array (self) @@ -1910,11 +1912,12 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: @auto_convert("mask", "values") def putmask(self, mask: Any, values: Any) -> None: assert self.shape == mask.shape - + values = values._copy_if_overlapping(self) if values.shape != self.shape: values_new = values._broadcast(self.shape) else: values_new = values.base + task = self.context.create_auto_task(CuNumericOpCode.PUTMASK) task.add_input(self.base) task.add_input(mask.base) @@ -3142,6 +3145,7 @@ def unary_op( multiout: Optional[Any] = None, ) -> None: lhs = self.base + src = src._copy_if_overlapping(self) rhs = src._broadcast(lhs.shape) with Annotation({"OpCode": op.name}): @@ -3304,7 +3308,9 @@ def binary_op( args: Any, ) -> None: lhs = self.base + src1 = src1._copy_if_overlapping(self) rhs1 = src1._broadcast(lhs.shape) + src2 = src2._copy_if_overlapping(self) rhs2 = src2._broadcast(lhs.shape) with Annotation({"OpCode": op_code.name}): diff --git a/tests/integration/test_overlap.py b/tests/integration/test_overlap.py new file mode 100644 index 000000000..ad64e4170 --- /dev/null +++ b/tests/integration/test_overlap.py @@ -0,0 +1,80 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from utils.generators import mk_seq_array + +import cunumeric as num + + +def setitem(lib, a, slice_lhs, slice_rhs): + a[slice_lhs] = a[slice_rhs] + + +def dot(lib, a, slice_lhs, slice_rhs): + modes = "".join([chr(ord("a") + m) for m in range(len(a.shape))]) + expr = f"{modes},{modes}->{modes}" + lib.einsum(expr, a[slice_lhs], a[slice_rhs], out=a[slice_lhs]) + + +def unary_arith(lib, a, slice_lhs, slice_rhs): + lib.sin(a[slice_rhs], out=a[slice_lhs]) + + +def binary_arith(lib, a, slice_lhs, slice_rhs): + a[slice_lhs] += a[slice_rhs] + + +def put(lib, a, slice_lhs, slice_rhs): + indices = lib.flip(lib.arange(a[slice_rhs].size)) + a[slice_lhs].put(indices, a[slice_rhs]) + + +def putmask(lib, a, slice_lhs, slice_rhs): + mask = (mk_seq_array(lib, a[slice_rhs].shape) % 2).astype(bool) + lib.putmask(a[slice_lhs], mask, a[slice_rhs]) + + +SHAPES = ((4,), (4, 5), (4, 5, 6)) +OPERATIONS = (setitem, dot, unary_arith, binary_arith, put, putmask) + + +@pytest.mark.parametrize("partial", (True, False)) +@pytest.mark.parametrize("shape", SHAPES) +@pytest.mark.parametrize("operation", OPERATIONS) +def test_partial(partial, shape, operation): + if partial: + # e.g. for shape = (4,5) and setitem: lhs[1:,:] = rhs[:-1,:] + slice_lhs = (slice(1, None),) + (slice(None),) * (len(shape) - 1) + slice_rhs = (slice(None, -1),) + (slice(None),) * (len(shape) - 1) + else: + # e.g. for shape = (4,5) and setitem: lhs[:,:] = rhs[:,:] + slice_lhs = (slice(None),) * len(shape) + slice_rhs = (slice(None),) * len(shape) + + a_np = mk_seq_array(np, shape).astype(np.float64) + a_num = mk_seq_array(num, shape).astype(np.float64) + + operation(np, a_np, slice_lhs, slice_rhs) + operation(num, a_num, slice_lhs, slice_rhs) + + assert np.array_equal(a_np, a_num) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv))