From 46296df89ef7a7b283d6c78c0e4c3783fe1873ae Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sat, 16 Mar 2024 06:23:14 +0000 Subject: [PATCH] WIP: Add suggestion to discuss maros and aliases --- .../naive/aarch64/aarch64_simple0_macros.s | 50 +++++++++++++++++++ tutorial/README.md | 5 +- tutorial/tutorial-3-macros.py | 23 +++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 examples/naive/aarch64/aarch64_simple0_macros.s create mode 100644 tutorial/tutorial-3-macros.py diff --git a/examples/naive/aarch64/aarch64_simple0_macros.s b/examples/naive/aarch64/aarch64_simple0_macros.s new file mode 100644 index 00000000..52a86748 --- /dev/null +++ b/examples/naive/aarch64/aarch64_simple0_macros.s @@ -0,0 +1,50 @@ +qdata0 .req q8 +qdata1 .req q9 +qdata2 .req q10 +qdata3 .req q11 + +qtwiddle .req q0 + +data0 .req v8 +data1 .req v9 +data2 .req v10 +data3 .req v11 + +twiddle .req v0 +modulus .req v1 + +tmp .req v12 + +data_ptr .req x0 +twiddle_ptr .req x1 + +.macro barmul out, in, twiddle, modulus + mul \out.8h, \in.8h, \twiddle.h[0] + sqrdmulh \in.8h, \in.8h, \twiddle.h[1] + mls \out.8h, \in.8h, \modulus.h[0] +.endm + +.macro butterfly data0, data1, tmp, twiddle, modulus + barmul \tmp, \data1, \twiddle, \modulus + sub \data1.8h, \data0.8h, \tmp.8h + add \data0.8h, \data0.8h, \tmp.8h +.endm + +start: + + ldr qtwiddle, [twiddle_ptr, #0] + + ldr qdata0, [data_ptr, #0*16] + ldr qdata1, [data_ptr, #1*16] + ldr qdata2, [data_ptr, #2*16] + ldr qdata3, [data_ptr, #3*16] + + butterfly data0, data1, tmp, twiddle, modulus + butterfly data2, data3, tmp, twiddle, modulus + + str qdata0, [data_ptr], #4*16 + str qdata1, [data_ptr, #-3*16] + str qdata2, [data_ptr, #-2*16] + str qdata3, [data_ptr, #-1*16] + +end: diff --git a/tutorial/README.md b/tutorial/README.md index ecca4dce..765ff57a 100644 --- a/tutorial/README.md +++ b/tutorial/README.md @@ -244,7 +244,10 @@ To make use of these issue slots one would have to mix in scalar instructions (o Also note the registers used: In the original code `v24` was as a temporary register in both computation streams preventing to effectively interleave them. SLOTHY renamed those registers to be able to interleave both computations. Other registers have also been arbitrarily renamed, but without any specific reason. -Even with this small Neon example, you can see that understanding the input code is much easier than the output code which is the reason why we believe SLOTHY can help with writing auditable code. +Even with this small Neon example, you can see that understanding the input code is much easier than the output code +which is the reason why we believe SLOTHY can help with writing auditable code. + +**TODO: Add section on taking readability one step further by making extensive use of aliases and macros? See `aarch64_simple0_macros.s`** ## 3. Writing your own calling code diff --git a/tutorial/tutorial-3-macros.py b/tutorial/tutorial-3-macros.py new file mode 100644 index 00000000..1b1cac46 --- /dev/null +++ b/tutorial/tutorial-3-macros.py @@ -0,0 +1,23 @@ +import logging +import sys + +sys.path.append("../") +from slothy import Slothy + +import slothy.targets.aarch64.aarch64_neon as AArch64_Neon +import slothy.targets.aarch64.cortex_a55 as Target_CortexA55 + +logging.basicConfig(stream=sys.stdout, level=logging.INFO) + +arch = AArch64_Neon +target = Target_CortexA55 + +slothy = Slothy(arch, target) + +# example +slothy.load_source_from_file("../examples/naive/aarch64/aarch64_simple0_macros.s") +slothy.config.variable_size=True +slothy.config.constraints.stalls_first_attempt=32 + +slothy.optimize(start="start", end="end") +slothy.write_source_to_file("../examples/opt/aarch64/aarch64_simple0_macros_a55.s")