diff --git a/programming_examples/README.md b/programming_examples/README.md index 2203b264c..10e4d3aef 100644 --- a/programming_examples/README.md +++ b/programming_examples/README.md @@ -21,3 +21,9 @@ This example provides logic to divide in input 2D matrix into *tiles* of data, a ## [Data Transfer Transpose](data_transfer_transpose) Transposes a matrix with using either Channels or `dma_memcpy_nd`. + +## [WIP: Multi-Segment Examples](multi_segment) + +This is a collection of simple examples that illustrate how to use multiple segments. + +Warning: This example is a work-in-progress. diff --git a/programming_examples/channel_examples/README.md b/programming_examples/channel_examples/README.md index 9cb599d40..7c1cd3e11 100644 --- a/programming_examples/channel_examples/README.md +++ b/programming_examples/channel_examples/README.md @@ -4,12 +4,23 @@ This example focuses on one of the key abstractions of air: *channels*. This is ## Running and Testing -#### ```herd-to-herd```: Using a channel to pass data between herds +#### ```herd-to-herd```: Using a channel to pass data between herd. -This example ([herd_to_herd/herd_to_herd.py](herd_to_herd/herd_to_herd.py)) defines two `herd`s within the same `launch` + `segment`. There is a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer herd*, which reads data form the `Herd2Herd` channel. +There are two part of this example: two herds within one segment (single segment), and one herd per segment for two segments (multi-segment) + +The single segment example example ([herd_to_herd/single_segment/herd_to_herd.py](herd_to_herd/single_segment/herd_to_herd.py)) defines two `herd`s within the same `launch` + `segment`. There is a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer herd*, which reads data form the `Herd2Herd` channel. + +```bash +cd herd_to_herd/single_segment +make clean && make +``` + +The multi-segment example ([herd_to_herd/multi_segment/herd_to_herd.py](herd_to_herd/multi_segment/herd_to_herd.py)) defines two `segment`s, each with one `herd`, within the same `launch`. There is a *producer_segment* with a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer_segment* with a *consumer herd*, which reads data form the `Herd2Herd` channel. + +Warning: The multi-segment example is a work in progress! ```bash -cd herd_to_herd +cd herd_to_herd/multi_segment make clean && make ``` diff --git a/programming_examples/channel_examples/herd_to_herd/multi_segment/Makefile b/programming_examples/channel_examples/herd_to_herd/multi_segment/Makefile new file mode 100644 index 000000000..844c5686d --- /dev/null +++ b/programming_examples/channel_examples/herd_to_herd/multi_segment/Makefile @@ -0,0 +1,12 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +targetname := $(shell basename ${srcdir}) + +run: + mkdir -p build + cd build && ${powershell} python3 ${srcdir}/run.py -v + +clean: + rm -rf build __pycache__ diff --git a/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py b/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py new file mode 100644 index 000000000..fd1726a9b --- /dev/null +++ b/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py @@ -0,0 +1,118 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +from air.ir import * +from air.dialects.air import * +from air.dialects.memref import AllocOp, DeallocOp, load, store +from air.dialects.func import FuncOp +from air.dialects.scf import for_, yield_ + +range_ = for_ + +IMAGE_WIDTH = 32 +IMAGE_HEIGHT = 16 +IMAGE_SIZE = [IMAGE_WIDTH, IMAGE_HEIGHT] + + +@module_builder +def build_module(): + memrefTyInOut = MemRefType.get(IMAGE_SIZE, T.i32()) + + # We want to store our data in L1 memory + mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) + + # This is the type definition of the tile + image_type_l1 = MemRefType.get( + shape=IMAGE_SIZE, + element_type=T.i32(), + memory_space=mem_space_l1, + ) + + # Create two channels which will send/receive the + # input/output data respectively + ChannelOp("ChanIn") + ChannelOp("ChanOut") + + # Create a channel we will use to pass data between works in two herds + ChannelOp("Herd2Herd") + + # We will send an image worth of data in and out + @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) + def copy(arg0, arg1): + + # The arguments are the input and output + @launch(operands=[arg0, arg1]) + def launch_body(a, b): + + # Fetch all input data into the channel + ChannelPut("ChanIn", a) + + # Push all output data out of the channel + ChannelGet("ChanOut", b) + + @segment(name="producer_segment") + def segment_body(): + + @herd(name="producer_herd", sizes=[1, 1]) + def herd_body(tx, ty, sx, sy): + + # We must allocate a buffer of tilße size for the input/output + image_in = AllocOp(image_type_l1, [], []) + image_out = AllocOp(image_type_l1, [], []) + + ChannelGet("ChanIn", image_in) + + # Access every value in the image + for j in range_(IMAGE_HEIGHT): + for i in range_(IMAGE_WIDTH): + # Load the input value + val_in = load(image_in, [i, j]) + + # Calculate the output value + val_out = arith.muli(val_in, val_in) + + # Store the output value + store(val_out, image_out, [i, j]) + yield_([]) + yield_([]) + + ChannelPut("Herd2Herd", image_out) + + DeallocOp(image_in) + DeallocOp(image_out) + + @segment(name="consumer_segment") + def segment_body(): + + @herd(name="consumer_herd", sizes=[1, 1]) + def herd_body(tx, ty, sx, sy): + + # We must allocate a buffer of image size for the input/output + image_in = AllocOp(image_type_l1, [], []) + image_out = AllocOp(image_type_l1, [], []) + + ChannelGet("Herd2Herd", image_in) + + # Access every value in the image + for j in range_(IMAGE_HEIGHT): + for i in range_(IMAGE_WIDTH): + # Load the input value + val_in = load(image_in, [i, j]) + + # Calculate the output value + val_out = arith.addi(val_in, arith.ConstantOp(T.i32(), 1)) + + # Store the output value + store(val_out, image_out, [i, j]) + yield_([]) + yield_([]) + + ChannelPut("ChanOut", image_out) + + DeallocOp(image_in) + DeallocOp(image_out) + + +if __name__ == "__main__": + module = build_module() + print(module) diff --git a/programming_examples/channel_examples/herd_to_herd/multi_segment/run.py b/programming_examples/channel_examples/herd_to_herd/multi_segment/run.py new file mode 100644 index 000000000..03b84d1dc --- /dev/null +++ b/programming_examples/channel_examples/herd_to_herd/multi_segment/run.py @@ -0,0 +1,91 @@ +# run.py -*- Python -*- +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +import argparse +import numpy as np +import air.backend.xrt as xrt_backend +import filelock + +from herd_to_herd import * + +INOUT_DATATYPE = np.uint32 +INOUT_ELEM_SIZE = np.dtype(INOUT_DATATYPE).itemsize +INOUT_SIZE = IMAGE_SIZE[0] * IMAGE_SIZE[1] +INOUT_SIZE_BYTES = INOUT_SIZE * INOUT_ELEM_SIZE + + +def print_matrix(matrix_array): + for i in range(IMAGE_HEIGHT): + row = matrix_array[i * IMAGE_WIDTH : (i + 1) * IMAGE_WIDTH] + for val in row: + val = val & 0xFFFF + print(f"{val:04x}", end=" ") + print("") + + +def test_main(build_module, verbose=False): + mlir_module = build_module() + + input_a = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + input_b = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + for i in range(INOUT_SIZE): + input_a[i] = 0x2 + input_b[i] = 0x00C0FFEE + + backend = xrt_backend.XRTBackend( + verbose=verbose, experimental_passes=True, omit_while_true_loop=True + ) + + if verbose: + print_matrix(input_b) + + # run the module + with filelock.FileLock("/tmp/npu.lock"): + addone = backend.compile_and_load(mlir_module) + (_, output_b) = addone(input_a, input_b) + + backend.unload() + + if verbose: + print_matrix(output_b) + + # check output, should have all values incremented + errors = 0 + for i in range(INOUT_SIZE): + rb = output_b[i] + + row = i // IMAGE_WIDTH + col = i % IMAGE_WIDTH + + # value should have been updated + expected_value = 0x2 * 0x2 + 1 + if not (rb == expected_value): + """ + print( + f"IM {i} [{col}, {row}] should be 0x{expected_value:x}, is 0x{rb:x}\n" + ) + """ + errors += 1 + + if errors == 0: + print("PASS!") + exit(0) + else: + print("failed. errors=", errors) + exit(-1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="run.py", + description="Builds, runs, and tests the herd-to-herd multi-segment example", + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + ) + args = parser.parse_args() + test_main(build_module, verbose=args.verbose) diff --git a/programming_examples/channel_examples/herd_to_herd/multi_segment/run_makefile.lit b/programming_examples/channel_examples/herd_to_herd/multi_segment/run_makefile.lit new file mode 100644 index 000000000..12342da6c --- /dev/null +++ b/programming_examples/channel_examples/herd_to_herd/multi_segment/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. + // SPDX-License-Identifier: MIT + // + // REQUIRES: ryzen_ai + // + // RUN: make -f %S/Makefile clean + // RUN: make -f %S/Makefile run | FileCheck %s + // CHECK: PASS! + // XFAIL: * \ No newline at end of file diff --git a/programming_examples/channel_examples/herd_to_herd/Makefile b/programming_examples/channel_examples/herd_to_herd/single_segment/Makefile similarity index 100% rename from programming_examples/channel_examples/herd_to_herd/Makefile rename to programming_examples/channel_examples/herd_to_herd/single_segment/Makefile diff --git a/programming_examples/channel_examples/herd_to_herd/herd_to_herd.py b/programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py similarity index 100% rename from programming_examples/channel_examples/herd_to_herd/herd_to_herd.py rename to programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py diff --git a/programming_examples/channel_examples/herd_to_herd/run.py b/programming_examples/channel_examples/herd_to_herd/single_segment/run.py similarity index 96% rename from programming_examples/channel_examples/herd_to_herd/run.py rename to programming_examples/channel_examples/herd_to_herd/single_segment/run.py index 5d28899a8..df5468a0f 100644 --- a/programming_examples/channel_examples/herd_to_herd/run.py +++ b/programming_examples/channel_examples/herd_to_herd/single_segment/run.py @@ -77,7 +77,7 @@ def test_main(build_module, verbose=False): if __name__ == "__main__": parser = argparse.ArgumentParser( prog="run.py", - description="Builds, runs, and tests the channel_examples/herd_to_herd example", + description="Builds, runs, and tests the herd-to-herd multi-segment example", ) parser.add_argument( diff --git a/programming_examples/channel_examples/herd_to_herd/run_makefile.lit b/programming_examples/channel_examples/herd_to_herd/single_segment/run_makefile.lit similarity index 100% rename from programming_examples/channel_examples/herd_to_herd/run_makefile.lit rename to programming_examples/channel_examples/herd_to_herd/single_segment/run_makefile.lit diff --git a/programming_examples/multi_segment/common.py b/programming_examples/multi_segment/common.py new file mode 100644 index 000000000..2d62308cb --- /dev/null +++ b/programming_examples/multi_segment/common.py @@ -0,0 +1,82 @@ +# run.py -*- Python -*- +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +import numpy as np +import air.backend.xrt as xrt_backend +import filelock + +VECTOR_LEN = 32 +VECTOR_SIZE = [VECTOR_LEN, 1] + +INOUT_DATATYPE = np.uint32 +INOUT_ELEM_SIZE = np.dtype(INOUT_DATATYPE).itemsize +INOUT_SIZE = VECTOR_SIZE[0] * VECTOR_SIZE[1] +INOUT_SIZE_BYTES = INOUT_SIZE * INOUT_ELEM_SIZE + + +def test_main(build_module, verbose=False): + mlir_module = build_module() + + input_a = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + input_b = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + input_c = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + input_d = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE) + for i in range(INOUT_SIZE): + input_a[i] = 0x2 + input_b[i] = 0x3 + for i in range(INOUT_SIZE): + input_c[i] = 0x00C0FFEE + input_d[i] = 0x0000CAFE + + backend = xrt_backend.XRTBackend( + verbose=verbose, experimental_passes=True, omit_while_true_loop=True + ) + + if verbose: + print(input_a) + print(input_b) + + # run the module + with filelock.FileLock("/tmp/npu.lock"): + addone = backend.compile_and_load(mlir_module) + (_, _, output_c, output_d) = addone(input_a, input_b, input_c, input_d) + + backend.unload() + + if verbose: + print(output_c) + print(output_d) + + # check output, should have all values incremented + errors = 0 + for i in range(INOUT_SIZE): + rb = output_c[i] + + # value should have been updated + if not (rb == 12): + """ + print( + f"C - IM {i} should be 0x{expected_value:x}, is 0x{rb:x}\n" + ) + """ + errors += 1 + + for i in range(INOUT_SIZE): + rb = output_d[i] + + # value should have been updated + if not (rb == 13): + """ + print( + f"D - IM {i} should be 0x{expected_value:x}, is 0x{rb:x}\n" + ) + """ + errors += 1 + + if errors == 0: + print("PASS!") + exit(0) + else: + print("failed. errors=", errors) + exit(-1) diff --git a/programming_examples/multi_segment/multi_segment_channel/Makefile b/programming_examples/multi_segment/multi_segment_channel/Makefile new file mode 100644 index 000000000..844c5686d --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_channel/Makefile @@ -0,0 +1,12 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +targetname := $(shell basename ${srcdir}) + +run: + mkdir -p build + cd build && ${powershell} python3 ${srcdir}/run.py -v + +clean: + rm -rf build __pycache__ diff --git a/programming_examples/multi_segment/multi_segment_channel/multi_segment.py b/programming_examples/multi_segment/multi_segment_channel/multi_segment.py new file mode 100644 index 000000000..6773c4875 --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_channel/multi_segment.py @@ -0,0 +1,109 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +import sys +from pathlib import Path # if you haven't already done so + +# Python paths are a bit complex. Taking solution from : https://stackoverflow.com/questions/16981921/relative-imports-in-python-3 +file = Path(__file__).resolve() +parent, root = file.parent, file.parents[1] +sys.path.append(str(root)) + +# Additionally remove the current file's directory from sys.path +try: + sys.path.remove(str(parent)) +except ValueError: # Already removed + pass + +from air.ir import * +from air.dialects.air import * +from air.dialects.memref import AllocOp, DeallocOp, load, store +from air.dialects.func import FuncOp +from air.dialects.scf import for_, yield_ + +range_ = for_ + +from common import * + + +@module_builder +def build_module(): + memrefTyInOut = MemRefType.get(VECTOR_SIZE, T.i32()) + + # We want to store our data in L1 memory + mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) + + # This is the type definition of the tile + image_type_l1 = MemRefType.get( + shape=VECTOR_SIZE, + element_type=T.i32(), + memory_space=mem_space_l1, + ) + + ChannelOp("ChanInA") + ChannelOp("ChanInB") + ChannelOp("ChanOutC") + ChannelOp("ChanOutD") + + # We will send an image worth of data in and out + @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut, memrefTyInOut, memrefTyInOut) + def copy(arg0, arg1, arg2, arg3): + + # The arguments are the input and output + @launch(operands=[arg0, arg1, arg2, arg3]) + def launch_body(a, b, c, d): + ChannelPut("ChanInA", a) + ChannelPut("ChanInB", b) + ChannelGet("ChanOutC", c) + ChannelGet("ChanOutD", d) + + @segment(name="seg1") + def segment_body(): + + @herd(name="addherd1", sizes=[1, 1]) + def herd_body(tx, ty, sx, sy): + + image_in_a = AllocOp(image_type_l1, [], []) + image_out_a = AllocOp(image_type_l1, [], []) + + ChannelGet("ChanInA", image_in_a) + + # Access every value in the tile + c0 = arith.ConstantOp.create_index(0) + for j in range_(VECTOR_LEN): + val_a = load(image_in_a, [c0, j]) + val_outa = arith.addi(val_a, arith.constant(T.i32(), 10)) + store(val_outa, image_out_a, [c0, j]) + yield_([]) + + ChannelPut("ChanOutC", image_out_a) + DeallocOp(image_in_a) + DeallocOp(image_out_a) + + @segment(name="seg2") + def segment_body(): + + @herd(name="addherd2", sizes=[1, 1]) + def herd_body(tx, ty, sx, sy): + + image_in_b = AllocOp(image_type_l1, [], []) + image_out_b = AllocOp(image_type_l1, [], []) + + ChannelGet("ChanInB", image_in_b) + + # Access every value in the tile + c0 = arith.ConstantOp.create_index(0) + for j in range_(VECTOR_LEN): + val_b = load(image_in_b, [c0, j]) + val_outb = arith.addi(arith.constant(T.i32(), 10), val_b) + store(val_outb, image_out_b, [c0, j]) + yield_([]) + + ChannelPut("ChanOutD", image_out_b) + + DeallocOp(image_in_b) + DeallocOp(image_out_b) + + +if __name__ == "__main__": + module = build_module() + print(module) diff --git a/programming_examples/multi_segment/multi_segment_channel/run.py b/programming_examples/multi_segment/multi_segment_channel/run.py new file mode 100644 index 000000000..37fd0fc81 --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_channel/run.py @@ -0,0 +1,35 @@ +# run.py -*- Python -*- +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +import argparse +import sys +from pathlib import Path # if you haven't already done so + +# Python paths are a bit complex. Taking solution from : https://stackoverflow.com/questions/16981921/relative-imports-in-python-3 +file = Path(__file__).resolve() +parent, root = file.parent, file.parents[1] +sys.path.append(str(root)) + +# Additionally remove the current file's directory from sys.path +try: + sys.path.remove(str(parent)) +except ValueError: # Already removed + pass + +from multi_segment_channel.multi_segment import build_module +from common import test_main + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="run.py", + description="Builds, runs, and tests the multi-segment channel example", + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + ) + args = parser.parse_args() + test_main(build_module, verbose=args.verbose) diff --git a/programming_examples/multi_segment/multi_segment_channel/run_makefile.lit b/programming_examples/multi_segment/multi_segment_channel/run_makefile.lit new file mode 100644 index 000000000..12342da6c --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_channel/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. + // SPDX-License-Identifier: MIT + // + // REQUIRES: ryzen_ai + // + // RUN: make -f %S/Makefile clean + // RUN: make -f %S/Makefile run | FileCheck %s + // CHECK: PASS! + // XFAIL: * \ No newline at end of file diff --git a/programming_examples/multi_segment/multi_segment_dma/Makefile b/programming_examples/multi_segment/multi_segment_dma/Makefile new file mode 100644 index 000000000..844c5686d --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_dma/Makefile @@ -0,0 +1,12 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +targetname := $(shell basename ${srcdir}) + +run: + mkdir -p build + cd build && ${powershell} python3 ${srcdir}/run.py -v + +clean: + rm -rf build __pycache__ diff --git a/programming_examples/multi_segment/multi_segment_dma/multi_segment.py b/programming_examples/multi_segment/multi_segment_dma/multi_segment.py new file mode 100644 index 000000000..fee34f23f --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_dma/multi_segment.py @@ -0,0 +1,100 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +import sys +from pathlib import Path # if you haven't already done so + +# Python paths are a bit complex. Taking solution from : https://stackoverflow.com/questions/16981921/relative-imports-in-python-3 +file = Path(__file__).resolve() +parent, root = file.parent, file.parents[1] +sys.path.append(str(root)) + +# Additionally remove the current file's directory from sys.path +try: + sys.path.remove(str(parent)) +except ValueError: # Already removed + pass + +from air.ir import * +from air.dialects.air import * +from air.dialects.memref import AllocOp, DeallocOp, load, store +from air.dialects.func import FuncOp +from air.dialects.scf import for_, yield_ + +range_ = for_ + +from common import * + + +@module_builder +def build_module(): + memrefTyInOut = MemRefType.get(VECTOR_SIZE, T.i32()) + + # We want to store our data in L1 memory + mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) + + # This is the type definition of the tile + image_type_l1 = MemRefType.get( + shape=VECTOR_SIZE, + element_type=T.i32(), + memory_space=mem_space_l1, + ) + + # We will send an image worth of data in and out + @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut, memrefTyInOut, memrefTyInOut) + def copy(arg0, arg1, arg2, arg3): + + # The arguments are the input and output + @launch(operands=[arg0, arg1, arg2, arg3]) + def launch_body(a, b, c, d): + + @segment(name="seg1", operands=[a, c]) + def segment_body(arg0, arg2): + + @herd(name="addherd1", sizes=[1, 1], operands=[arg0, arg2]) + def herd_body(tx, ty, sx, sy, a, c): + + image_in_a = AllocOp(image_type_l1, [], []) + image_out_a = AllocOp(image_type_l1, [], []) + + dma_memcpy_nd(image_in_a, a) + + # Access every value in the tile + c0 = arith.ConstantOp.create_index(0) + for j in range_(VECTOR_LEN): + val_a = load(image_in_a, [c0, j]) + val_outa = arith.addi(val_a, arith.constant(T.i32(), 10)) + store(val_outa, image_out_a, [c0, j]) + yield_([]) + + dma_memcpy_nd(c, image_out_a) + DeallocOp(image_in_a) + DeallocOp(image_out_a) + + @segment(name="seg2", operands=[b, d]) + def segment_body(arg1, arg3): + + @herd(name="addherd2", sizes=[1, 1], operands=[arg1, arg3]) + def herd_body(tx, ty, sx, sy, b, d): + + image_in_b = AllocOp(image_type_l1, [], []) + image_out_b = AllocOp(image_type_l1, [], []) + + dma_memcpy_nd(image_in_b, b) + + # Access every value in the tile + c0 = arith.ConstantOp.create_index(0) + for j in range_(VECTOR_LEN): + val_b = load(image_in_b, [c0, j]) + val_outb = arith.addi(arith.constant(T.i32(), 10), val_b) + store(val_outb, image_out_b, [c0, j]) + yield_([]) + + dma_memcpy_nd(d, image_out_b) + + DeallocOp(image_in_b) + DeallocOp(image_out_b) + + +if __name__ == "__main__": + module = build_module() + print(module) diff --git a/programming_examples/multi_segment/multi_segment_dma/run.py b/programming_examples/multi_segment/multi_segment_dma/run.py new file mode 100644 index 000000000..a514eb5a3 --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_dma/run.py @@ -0,0 +1,35 @@ +# run.py -*- Python -*- +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +import argparse +import sys +from pathlib import Path # if you haven't already done so + +# Python paths are a bit complex. Taking solution from : https://stackoverflow.com/questions/16981921/relative-imports-in-python-3 +file = Path(__file__).resolve() +parent, root = file.parent, file.parents[1] +sys.path.append(str(root)) + +# Additionally remove the current file's directory from sys.path +try: + sys.path.remove(str(parent)) +except ValueError: # Already removed + pass + +from multi_segment_dma.multi_segment import build_module +from common import test_main + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="run.py", + description="Builds, runs, and tests the multi-segment DMA example", + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + ) + args = parser.parse_args() + test_main(build_module, verbose=args.verbose) diff --git a/programming_examples/multi_segment/multi_segment_dma/run_makefile.lit b/programming_examples/multi_segment/multi_segment_dma/run_makefile.lit new file mode 100644 index 000000000..12342da6c --- /dev/null +++ b/programming_examples/multi_segment/multi_segment_dma/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. + // SPDX-License-Identifier: MIT + // + // REQUIRES: ryzen_ai + // + // RUN: make -f %S/Makefile clean + // RUN: make -f %S/Makefile run | FileCheck %s + // CHECK: PASS! + // XFAIL: * \ No newline at end of file