Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Armv7-M: Allow register overlap in ldm + ldrd #153

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ def core(self,slothy):
slothy.config.variable_size=True
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")
slothy.fusion_region("start", "end", ssa=False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to put the fusion_region before the optimize, otherwise this does not help SLOTHY find a better solution.


class Armv7mExample0Func(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
Expand Down
3 changes: 3 additions & 0 deletions examples/naive/armv7m/armv7m_simple0.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,7 @@ smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4] // @slothy:writes=a

ldrd r1, r3, [r1, #8]
str r1,[r0]
ldm r0 ,{r0-r3}
end:
106 changes: 39 additions & 67 deletions examples/opt/armv7m/armv7m_simple0_opt_m7.s
Original file line number Diff line number Diff line change
@@ -1,69 +1,41 @@

start:
// Instructions: 24
// Expected cycles: 26
// Expected IPC: 0.92
//
// Cycle bound: 26.0
// IPC bound: 0.92
//
// Wall time: 0.20s
// User time: 0.20s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r6, [r0, #4] // *............................. // @slothy:reads=a
add r10, r2, r6 // .*............................
eor.w r1, r10, r3 // ..*...........................
smlabt r7, r2, r2, r1 // ..*...........................
asrs r5, r7, #1 // ....*.........................
str r5, [r0, #4] // ....*......................... // @slothy:writes=a
ldm r0, {r7,r9,r11} // .....*........................ // @slothy:reads=a
add r8, r9, r7 // ........*.....................
eor.w r2, r8, r11 // .........*....................
smlabt r12, r9, r9, r2 // .........*....................
asrs r11, r12, #1 // ...........*..................
str r11, [r0, #4] // ...........*.................. // @slothy:writes=a
ldm r0, {r7,r8,r10} // ............*................. // @slothy:reads=a
add r6, r8, r7 // ...............*..............
eor.w r5, r6, r10 // ................*.............
smlabt r12, r8, r8, r5 // ................*.............
asrs r9, r12, #1 // ..................*...........
str r9, [r0, #4] // ..................*........... // @slothy:writes=a
ldm r0, {r1,r2,r8} // ...................*.......... // @slothy:reads=a
add r14, r2, r1 // ......................*.......
eor.w r5, r14, r8 // .......................*......
smlabt r10, r2, r2, r5 // .......................*......
asrs r3, r10, #1 // .........................*....
str r3, [r0, #4] // .........................*.... // @slothy:writes=a
start:
ldr r4, [r0, #4] // *............................. // @slothy:reads=a
add r4, r2, r4 // .*............................
eor.w r4, r4, r3 // ..*...........................
smlabt r2, r2, r2, r4 // ..*...........................
asrs r2, r2, #1 // ....*.........................
str r2, [r0, #4] // ....*......................... // @slothy:writes=a
ldr r11, [r0, #0]// .....*........................ // @slothy:reads=a
ldr r12, [r0, #4]// .....*........................ // @slothy:reads=a
ldr r14, [r0, #8]// .....*........................ // @slothy:reads=a
add r2, r12, r11 // ........*.....................
eor.w r2, r2, r14 // .........*....................
smlabt r2, r12, r12, r2 // .........*....................
asrs r2, r2, #1 // ...........*..................
str r2, [r0, #4] // ...........*.................. // @slothy:writes=a
ldr r11, [r0, #0]// ............*................. // @slothy:reads=a
ldr r12, [r0, #4]// ............*................. // @slothy:reads=a
ldr r14, [r0, #8]// ............*................. // @slothy:reads=a
add r2, r12, r11 // ...............*..............
eor.w r2, r2, r14 // ................*.............
smlabt r2, r12, r12, r2 // ................*.............
asrs r2, r2, #1 // ..................*...........
str r2, [r0, #4] // ..................*........... // @slothy:writes=a
ldr r11, [r0, #0]// ...................*.......... // @slothy:reads=a
ldr r12, [r0, #4]// ...................*.......... // @slothy:reads=a
ldr r14, [r0, #8]// ...................*.......... // @slothy:reads=a
add r2, r12, r11 // ......................*.......
eor.w r14, r2, r14 // .......................*......
smlabt r6, r12, r12, r14 // .......................*......
ldr r4, [r14, #8]// ........................*.....
ldr r2, [r14, #12]// ........................*.....
str r14, [r0] // ........................*.....
asrs r2, r6, #1 // .........................*....
str r2, [r0, #4] // .........................*.... // @slothy:writes=a
ldr r1, [r0, #4]// ..........................*...
ldr r2, [r0, #8]// ..........................*...
ldr r3, [r0, #12]// ..........................*...
ldr r0, [r0, #0]// ..........................*...
end:

// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2,r1 // .*.............................
// eor.w r1,r1, r3 // ..*............................
// smlabt r3,r2, r2, r1 // ..*............................
// asrs r3, r3,#1 // ....*..........................
// str r3, [r0,#4] // ....*..........................
// ldm r0, {r1-r2,r14} // .....*.........................
// add r1, r2,r1 // ........*......................
// eor.w r1,r1, r14 // .........*.....................
// smlabt r3,r2, r2, r1 // .........*.....................
// asrs r3, r3,#1 // ...........*...................
// str r3, [r0,#4] // ...........*...................
// ldm r0, {r1-r3} // ............*..................
// add r1, r2,r1 // ...............*...............
// eor.w r1,r1, r3 // ................*..............
// smlabt r3,r2, r2, r1 // ................*..............
// asrs r3, r3,#1 // ..................*............
// str r3, [r0,#4] // ..................*............
// ldm r0, {r1,r2,r3} // ...................*...........
// add r1, r2,r1 // ......................*........
// eor.w r1,r1, r3 // .......................*.......
// smlabt r3,r2, r2, r1 // .......................*.......
// asrs r3, r3,#1 // .........................*.....
// str r3, [r0,#4] // .........................*.....

end:
34 changes: 30 additions & 4 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ def make(cls, src):
obj.increment = None
obj.pre_index = 0
obj.addr = obj.args_in[0]
obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
#obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove those, not comment them out.
Also we need to test if this affects any other examples in SLOTHY.

For that please make sure you have a clean copy of SLOTHY, and then run

python3 example.py --timeout 60 --only-target=slothy.targets.arm_v7m.cortex_m7

This is going to run for a few hours. Then zip up the output files in examples/opt/armv7m and attach them to this PR.

return obj

def write(self):
Expand All @@ -1505,7 +1505,7 @@ def make(cls, src):
obj.increment = None
obj.pre_index = obj.immediate
obj.addr = obj.args_in[0]
obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
#obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
return obj

def write(self):
Expand All @@ -1528,7 +1528,7 @@ def make(cls, src):
obj = Armv7mInstruction.build(cls, src)
obj.increment = None
obj.pre_index = obj.immediate
obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
#obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
obj.addr = obj.args_in[0]
return obj

Expand All @@ -1545,7 +1545,7 @@ def make(cls, src):
obj = Armv7mInstruction.build(cls, src)
obj.increment = None
obj.pre_index = obj.immediate
obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
#obj.args_in_out_different = [(0,0)] # Can't have Rd==Ra
obj.addr = obj.args_in[0]
return obj

Expand Down Expand Up @@ -1940,6 +1940,19 @@ def core(inst,t,log=None):
add_comments(inst.source_line.comments)
ldr.source_line = ldr_src

# In case the address register is also contained in the
# register list, we need to overwrite the address register
# in the last ldr
ldrs_reordered = []
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
for ldr, reg in zip(ldrs, regs):
if reg != ptr:
ldrs_reordered.append(ldr)

for ldr, reg in zip(ldrs, regs):
if reg == ptr:
ldrs_reordered.append(ldr)
ldrs = ldrs_reordered

if log is not None:
log(f"ldm splitting: {t.inst}; {[ldr for ldr in ldrs]}")

Expand Down Expand Up @@ -2128,6 +2141,19 @@ def core(inst,t,log=None):
add_comments(inst.source_line.comments)
ldr.source_line = ldr_src

# In case the address register is also contained in the
# register list, we need to overwrite the address register
# in the last ldr
ldrs_reordered = []
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
for ldr, reg in zip(ldrs, regs):
if reg != ptr:
ldrs_reordered.append(ldr)

for ldr, reg in zip(ldrs, regs):
if reg == ptr:
ldrs_reordered.append(ldr)
ldrs = ldrs_reordered

if log is not None:
log(f"ldrd splitting: {t.inst}; {[ldr for ldr in ldrs]}")

Expand Down