Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into armv7m
Browse files Browse the repository at this point in the history
  • Loading branch information
mkannwischer committed Dec 14, 2024
2 parents d6bb231 + 097fde4 commit 59e3c1b
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 92 deletions.
4 changes: 4 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2873,6 +2873,10 @@ def main():
AArch64IfElse(),

AArch64Split0(),
<<<<<<< HEAD
=======

>>>>>>> origin/main
# Armv7m examples
Armv7mExample0(),
Armv7mExample0Func(),
Expand Down
35 changes: 29 additions & 6 deletions examples/naive/armv7m/armv7m_simple0.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@

start:
ldr r1, [r0, #4]
add r1, r2, r1
eor.w r1, r1, r3
smlabt r3, r2, r2, r1
asrs r3, r3, #1
str r3, [r0, #4]
ldr r1, [r0, #4]
add r1, r2,r1
eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

ldm r0, {r1-r2,r14}
add r1, r2,r1
eor.w r1,r1, r14
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]


ldm r0, {r1-r3}
add r1, r2,r1
eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

ldm r0, {r1,r2,r3}
add r1, r2,r1
eor.w r1,r1, r3
smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4]

end:
92 changes: 64 additions & 28 deletions examples/opt/armv7m/armv7m_simple0_opt_m7.s
Original file line number Diff line number Diff line change
@@ -1,33 +1,69 @@

start:
// Instructions: 6
// Expected cycles: 5
// Expected IPC: 1.20
//
// Cycle bound: 5.0
// IPC bound: 1.20
//
// Wall time: 0.02s
// User time: 0.02s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r6, [r0, #4] // *.............................
add r6, r2, r6 // .*............................
eor.w r3, r6, r3 // ..*...........................
smlabt r12, r2, r2, r3 // ..*...........................
asrs r3, r12, #1 // ....*.........................
str r3, [r0, #4] // ....*.........................
// Instructions: 24
// Expected cycles: 14
// Expected IPC: 1.71
//
// Cycle bound: 14.0
// IPC bound: 1.71
//
// Wall time: 0.40s
// User time: 0.40s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r10, [r0, #4] // *.............................
ldm r0, {r1,r7,r14} // .*............................
add r10, r2, r10 // .*............................
eor.w r10, r10, r3 // ..*...........................
smlabt r2, r2, r2, r10 // ..*...........................
ldm r0, {r10,r11,r12} // ....*.........................
add r1, r7, r1 // ....*.........................
eor.w r1, r1, r14 // .....*........................
smlabt r6, r7, r7, r1 // .....*........................
asrs r1, r2, #1 // ......*.......................
str r1, [r0, #4] // ......*.......................
add r4, r11, r10 // .......*......................
ldm r0, {r1,r2,r14} // .......*......................
asrs r10, r6, #1 // ........*.....................
eor.w r6, r4, r12 // ........*.....................
smlabt r11, r11, r11, r6 // .........*....................
add r1, r2, r1 // ..........*...................
str r10, [r0, #4] // ..........*...................
eor.w r1, r1, r14 // ...........*..................
smlabt r7, r2, r2, r1 // ...........*..................
asrs r1, r11, #1 // ............*.................
str r1, [r0, #4] // ............*.................
asrs r3, r7, #1 // .............*................
str r3, [r0, #4] // .............*................

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2, r1 // .*.............................
// eor.w r1, r1, r3 // ..*............................
// smlabt r3, r2, r2, r1 // ..*............................
// asrs r3, r3, #1 // ....*..........................
// str r3, [r0, #4] // ....*..........................
// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2,r1 // .*.............................
// eor.w r1,r1, r3 // ..*............................
// smlabt r3,r2, r2, r1 // ..*............................
// asrs r3, r3,#1 // ......*........................
// str r3, [r0,#4] // ......*........................
// ldm r0, {r1-r2,r14} // ....*..........................
// add r1, r2,r1 // .......*.......................
// eor.w r1,r1, r14 // ........*......................
// smlabt r3,r2, r2, r1 // .........*.....................
// asrs r3, r3,#1 // ............*..................
// str r3, [r0,#4] // ............*..................
// ldm r0, {r1-r3} // .*.............................
// add r1, r2,r1 // ....*..........................
// eor.w r1,r1, r3 // .....*.........................
// smlabt r3,r2, r2, r1 // .....*.........................
// asrs r3, r3,#1 // ........*......................
// str r3, [r0,#4] // ..........*....................
// ldm r0, {r1,r2,r3} // .......*.......................
// add r1, r2,r1 // ..........*....................
// eor.w r1,r1, r3 // ...........*...................
// smlabt r3,r2, r2, r1 // ...........*...................
// asrs r3, r3,#1 // .............*.................
// str r3, [r0,#4] // .............*.................

end:
20 changes: 18 additions & 2 deletions slothy/core/slothy.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,17 @@ def _fusion_core(self, pre, body, post, logger, ssa=True):
return body

def fusion_region(self, start, end, **kwargs):
"""Run fusion callbacks on straightline code"""
""" Run fusion callbacks on straightline code replacing certain
instruction (sequences) with an alternative. These replacements are
defined in the architectural model by setting an instruction class'
global_fusion_cb.
Args:
start: The label marking the beginning of the part of the code to
apply fusion to.
end: The label marking the end of the part of the code to apply
fusion to.
"""
logger = self.logger.getChild(f"ssa_{start}_{end}")
pre, body, post = AsmHelper.extract(self.source, start, end)

Expand All @@ -423,7 +433,13 @@ def fusion_region(self, start, end, **kwargs):
assert SourceLine.is_source(self.source)

def fusion_loop(self, loop_lbl, **kwargs):
"""Run fusion callbacks on loop body"""
"""Run fusion callbacks on loop body replacing certain instruction
(sequences) with an alternative. These replacements are defined in the
architectural model by setting an instruction class' global_fusion_cb.
Args:
loop_lbl: Label of loop to which the fusions are applied to.
"""
logger = self.logger.getChild(f"ssa_loop_{loop_lbl}")

pre , body, post, _, other_data, loop = \
Expand Down
18 changes: 14 additions & 4 deletions slothy/targets/aarch64/aarch64_neon.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class which generates instruction parsers and writers from instruction templates
from unicorn.arm64_const import *

from slothy.targets.common import *
from slothy.helper import Loop, LLVM_Mc
from slothy.helper import Loop, LLVM_Mc, SourceLine

arch_name = "Arm_AArch64"

Expand Down Expand Up @@ -3219,6 +3219,12 @@ def core(inst,t, log=None):
q_ld2_lane_post_inc.global_parsing_cb = q_ld2_lane_post_inc_parsing_cb()

def eor3_fusion_cb():
"""
Example for a fusion call back. Allows to merge two eor instruction with
two inputs into one eor with three inputs. Such technique can help perform
transformations in case of differences between uArchs.
Note: This is not used in any real (crypto) example. This is merely a PoC.
"""
def core(inst,t,log=None):
succ = None

Expand Down Expand Up @@ -3275,10 +3281,13 @@ def core(inst,t,log=None):

return core

# TODO: Test only...
# veor.global_fusion_cb = eor3_fusion_cb()

def eor3_splitting_cb():
"""
Example for a splitting call back. Allows to split one eor instruction with
three inputs into two eors with two inputs. Such technique can help perform
transformations in case of differences between uArchs.
Note: This is not used in any real (crypto) example. This is merely a PoC.
"""
def core(inst,t,log=None):

d = inst.args_out[0]
Expand Down Expand Up @@ -3318,6 +3327,7 @@ def core(inst,t,log=None):

return core

# Can alternatively set veor3.global_fusion_cb to eor3_fusion_cb() here
veor3.global_fusion_cb = eor3_splitting_cb()

def iter_aarch64_instructions():
Expand Down
Loading

0 comments on commit 59e3c1b

Please sign in to comment.