Skip to content

Commit

Permalink
Examples using Multiple Segments (Xilinx#663)
Browse files Browse the repository at this point in the history
* Add multi-segment examples
  • Loading branch information
hunhoffe authored Jul 15, 2024
1 parent 1befe53 commit 8545fe4
Show file tree
Hide file tree
Showing 19 changed files with 654 additions and 4 deletions.
6 changes: 6 additions & 0 deletions programming_examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ This example provides logic to divide in input 2D matrix into *tiles* of data, a
## [Data Transfer Transpose](data_transfer_transpose)

Transposes a matrix with using either Channels or `dma_memcpy_nd`.

## [WIP: Multi-Segment Examples](multi_segment)

This is a collection of simple examples that illustrate how to use multiple segments.

Warning: This example is a work-in-progress.
17 changes: 14 additions & 3 deletions programming_examples/channel_examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,23 @@ This example focuses on one of the key abstractions of air: *channels*. This is

## Running and Testing

#### ```herd-to-herd```: Using a channel to pass data between herds
#### ```herd-to-herd```: Using a channel to pass data between herd.

This example ([herd_to_herd/herd_to_herd.py](herd_to_herd/herd_to_herd.py)) defines two `herd`s within the same `launch` + `segment`. There is a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer herd*, which reads data form the `Herd2Herd` channel.
There are two part of this example: two herds within one segment (single segment), and one herd per segment for two segments (multi-segment)

The single segment example example ([herd_to_herd/single_segment/herd_to_herd.py](herd_to_herd/single_segment/herd_to_herd.py)) defines two `herd`s within the same `launch` + `segment`. There is a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer herd*, which reads data form the `Herd2Herd` channel.

```bash
cd herd_to_herd/single_segment
make clean && make
```

The multi-segment example ([herd_to_herd/multi_segment/herd_to_herd.py](herd_to_herd/multi_segment/herd_to_herd.py)) defines two `segment`s, each with one `herd`, within the same `launch`. There is a *producer_segment* with a *producer herd*, which writes data to a `Herd2Herd` channel, and a *consumer_segment* with a *consumer herd*, which reads data form the `Herd2Herd` channel.

Warning: The multi-segment example is a work in progress!

```bash
cd herd_to_herd
cd herd_to_herd/multi_segment
make clean && make
```

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (C) 2024, Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))

targetname := $(shell basename ${srcdir})

run:
mkdir -p build
cd build && ${powershell} python3 ${srcdir}/run.py -v

clean:
rm -rf build __pycache__
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Copyright (C) 2024, Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT

from air.ir import *
from air.dialects.air import *
from air.dialects.memref import AllocOp, DeallocOp, load, store
from air.dialects.func import FuncOp
from air.dialects.scf import for_, yield_

range_ = for_

IMAGE_WIDTH = 32
IMAGE_HEIGHT = 16
IMAGE_SIZE = [IMAGE_WIDTH, IMAGE_HEIGHT]


@module_builder
def build_module():
memrefTyInOut = MemRefType.get(IMAGE_SIZE, T.i32())

# We want to store our data in L1 memory
mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1)

# This is the type definition of the tile
image_type_l1 = MemRefType.get(
shape=IMAGE_SIZE,
element_type=T.i32(),
memory_space=mem_space_l1,
)

# Create two channels which will send/receive the
# input/output data respectively
ChannelOp("ChanIn")
ChannelOp("ChanOut")

# Create a channel we will use to pass data between works in two herds
ChannelOp("Herd2Herd")

# We will send an image worth of data in and out
@FuncOp.from_py_func(memrefTyInOut, memrefTyInOut)
def copy(arg0, arg1):

# The arguments are the input and output
@launch(operands=[arg0, arg1])
def launch_body(a, b):

# Fetch all input data into the channel
ChannelPut("ChanIn", a)

# Push all output data out of the channel
ChannelGet("ChanOut", b)

@segment(name="producer_segment")
def segment_body():

@herd(name="producer_herd", sizes=[1, 1])
def herd_body(tx, ty, sx, sy):

# We must allocate a buffer of tilße size for the input/output
image_in = AllocOp(image_type_l1, [], [])
image_out = AllocOp(image_type_l1, [], [])

ChannelGet("ChanIn", image_in)

# Access every value in the image
for j in range_(IMAGE_HEIGHT):
for i in range_(IMAGE_WIDTH):
# Load the input value
val_in = load(image_in, [i, j])

# Calculate the output value
val_out = arith.muli(val_in, val_in)

# Store the output value
store(val_out, image_out, [i, j])
yield_([])
yield_([])

ChannelPut("Herd2Herd", image_out)

DeallocOp(image_in)
DeallocOp(image_out)

@segment(name="consumer_segment")
def segment_body():

@herd(name="consumer_herd", sizes=[1, 1])
def herd_body(tx, ty, sx, sy):

# We must allocate a buffer of image size for the input/output
image_in = AllocOp(image_type_l1, [], [])
image_out = AllocOp(image_type_l1, [], [])

ChannelGet("Herd2Herd", image_in)

# Access every value in the image
for j in range_(IMAGE_HEIGHT):
for i in range_(IMAGE_WIDTH):
# Load the input value
val_in = load(image_in, [i, j])

# Calculate the output value
val_out = arith.addi(val_in, arith.ConstantOp(T.i32(), 1))

# Store the output value
store(val_out, image_out, [i, j])
yield_([])
yield_([])

ChannelPut("ChanOut", image_out)

DeallocOp(image_in)
DeallocOp(image_out)


if __name__ == "__main__":
module = build_module()
print(module)
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# run.py -*- Python -*-
#
# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
import argparse
import numpy as np
import air.backend.xrt as xrt_backend
import filelock

from herd_to_herd import *

INOUT_DATATYPE = np.uint32
INOUT_ELEM_SIZE = np.dtype(INOUT_DATATYPE).itemsize
INOUT_SIZE = IMAGE_SIZE[0] * IMAGE_SIZE[1]
INOUT_SIZE_BYTES = INOUT_SIZE * INOUT_ELEM_SIZE


def print_matrix(matrix_array):
for i in range(IMAGE_HEIGHT):
row = matrix_array[i * IMAGE_WIDTH : (i + 1) * IMAGE_WIDTH]
for val in row:
val = val & 0xFFFF
print(f"{val:04x}", end=" ")
print("")


def test_main(build_module, verbose=False):
mlir_module = build_module()

input_a = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
input_b = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
for i in range(INOUT_SIZE):
input_a[i] = 0x2
input_b[i] = 0x00C0FFEE

backend = xrt_backend.XRTBackend(
verbose=verbose, experimental_passes=True, omit_while_true_loop=True
)

if verbose:
print_matrix(input_b)

# run the module
with filelock.FileLock("/tmp/npu.lock"):
addone = backend.compile_and_load(mlir_module)
(_, output_b) = addone(input_a, input_b)

backend.unload()

if verbose:
print_matrix(output_b)

# check output, should have all values incremented
errors = 0
for i in range(INOUT_SIZE):
rb = output_b[i]

row = i // IMAGE_WIDTH
col = i % IMAGE_WIDTH

# value should have been updated
expected_value = 0x2 * 0x2 + 1
if not (rb == expected_value):
"""
print(
f"IM {i} [{col}, {row}] should be 0x{expected_value:x}, is 0x{rb:x}\n"
)
"""
errors += 1

if errors == 0:
print("PASS!")
exit(0)
else:
print("failed. errors=", errors)
exit(-1)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="run.py",
description="Builds, runs, and tests the herd-to-herd multi-segment example",
)

parser.add_argument(
"-v",
"--verbose",
action="store_true",
)
args = parser.parse_args()
test_main(build_module, verbose=args.verbose)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// (c) Copyright 2024 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT
//
// REQUIRES: ryzen_ai
//
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile run | FileCheck %s
// CHECK: PASS!
// XFAIL: *
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_main(build_module, verbose=False):
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="run.py",
description="Builds, runs, and tests the channel_examples/herd_to_herd example",
description="Builds, runs, and tests the herd-to-herd multi-segment example",
)

parser.add_argument(
Expand Down
82 changes: 82 additions & 0 deletions programming_examples/multi_segment/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# run.py -*- Python -*-
#
# Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
import numpy as np
import air.backend.xrt as xrt_backend
import filelock

VECTOR_LEN = 32
VECTOR_SIZE = [VECTOR_LEN, 1]

INOUT_DATATYPE = np.uint32
INOUT_ELEM_SIZE = np.dtype(INOUT_DATATYPE).itemsize
INOUT_SIZE = VECTOR_SIZE[0] * VECTOR_SIZE[1]
INOUT_SIZE_BYTES = INOUT_SIZE * INOUT_ELEM_SIZE


def test_main(build_module, verbose=False):
mlir_module = build_module()

input_a = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
input_b = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
input_c = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
input_d = np.arange(1, INOUT_SIZE + 1, dtype=INOUT_DATATYPE)
for i in range(INOUT_SIZE):
input_a[i] = 0x2
input_b[i] = 0x3
for i in range(INOUT_SIZE):
input_c[i] = 0x00C0FFEE
input_d[i] = 0x0000CAFE

backend = xrt_backend.XRTBackend(
verbose=verbose, experimental_passes=True, omit_while_true_loop=True
)

if verbose:
print(input_a)
print(input_b)

# run the module
with filelock.FileLock("/tmp/npu.lock"):
addone = backend.compile_and_load(mlir_module)
(_, _, output_c, output_d) = addone(input_a, input_b, input_c, input_d)

backend.unload()

if verbose:
print(output_c)
print(output_d)

# check output, should have all values incremented
errors = 0
for i in range(INOUT_SIZE):
rb = output_c[i]

# value should have been updated
if not (rb == 12):
"""
print(
f"C - IM {i} should be 0x{expected_value:x}, is 0x{rb:x}\n"
)
"""
errors += 1

for i in range(INOUT_SIZE):
rb = output_d[i]

# value should have been updated
if not (rb == 13):
"""
print(
f"D - IM {i} should be 0x{expected_value:x}, is 0x{rb:x}\n"
)
"""
errors += 1

if errors == 0:
print("PASS!")
exit(0)
else:
print("failed. errors=", errors)
exit(-1)
12 changes: 12 additions & 0 deletions programming_examples/multi_segment/multi_segment_channel/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (C) 2024, Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT
srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))

targetname := $(shell basename ${srcdir})

run:
mkdir -p build
cd build && ${powershell} python3 ${srcdir}/run.py -v

clean:
rm -rf build __pycache__
Loading

0 comments on commit 8545fe4

Please sign in to comment.