Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Armv7-M: Add BranchLoop type #129

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):

def core(self,slothy):
slothy.config.variable_size=True
slothy.optimize_loop("start")
slothy.optimize_loop("start", forced_loop_type=Arch_Armv7M.SubsLoop)

class Armv7mLoopCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
Expand All @@ -720,7 +720,7 @@ def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")
slothy.optimize_loop("start", forced_loop_type=Arch_Armv7M.CmpLoop)

class Armv7mLoopVmovCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
Expand Down
13 changes: 11 additions & 2 deletions slothy/core/slothy.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,12 @@ def fusion_loop(self, loop_lbl, forced_loop_type=None, **kwargs):

pre , body, post, _, other_data, loop = \
self.arch.Loop.extract(self.source, loop_lbl, forced_loop_type=forced_loop_type)
loop_cnt = other_data['cnt']

try:
loop_cnt = other_data['cnt']
except KeyError:
loop_cnt = Nonee

indentation = AsmHelper.find_indentation(body)

body_ssa = SourceLine.read_multiline(loop.start(loop_cnt)) + \
Expand All @@ -472,7 +477,11 @@ def optimize_loop(self, loop_lbl, postamble_label=None, forced_loop_type=None):

early, body, late, _, other_data, loop = \
self.arch.Loop.extract(self.source, loop_lbl, forced_loop_type=forced_loop_type)
loop_cnt = other_data['cnt']

try:
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
loop_cnt = other_data['cnt']
except KeyError:
loop_cnt = None

# Check if the body has a dominant indentation
indentation = AsmHelper.find_indentation(body)
Expand Down
99 changes: 99 additions & 0 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,105 @@ def end(self, other, indentation=0):
yield f'{indent}cmp {other["cnt"]}, {other["end"]}'
yield f'{indent}bne {lbl_start}'


class BranchLoop(Loop):
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
"""
More general loop type that just considers the branch instruction as part of the boundary.
This can help to improve performance as the instructions that belong to handling the loop can be considered by SLOTHY aswell.

Note: This loop type is still rather experimental. It has a lot of logics inside as it needs to be able to "understand" a variety of different ways to express loops, e.g., how counters get incremented, how registers marking the end of the loop need to be modified in case of software pipelining etc. Currently, this type covers the three other types we offer above, namely `SubsLoop`, `CmpLoop`, and `VmovCmpLoop`.

For examples, we refer to the classes `SubsLoop`, `CmpLoop`, and `VmovCmpLoop`.
"""
def __init__(self, lbl="lbl", lbl_start="1", lbl_end="2", loop_init="lr") -> None:
super().__init__(lbl_start=lbl_start, lbl_end=lbl_end, loop_init=loop_init)
self.lbl = lbl
self.lbl_regex = r"^\s*(?P<label>\w+)\s*:(?P<remainder>.*)$"
self.end_regex = (rf"^\s*(cbnz|cbz|bne)(?:\.w)?\s+{lbl}",)

def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, preamble_code=None, body_code=None, postamble_code=None, register_aliases=None):
"""Emit starting instruction(s) and jump label for loop"""
indent = ' ' * indentation
if body_code is None:
logging.debug(f"No body code in loop start: Just printing label.")
yield f"{self.lbl}:"
return
# Identify the register that is used as a loop counter
body_code = [l for l in body_code if l.text != ""]
for l in body_code:
inst = Instruction.parser(l)
# Flags are set through cmp
# LIMITATION: By convention, we require the first argument to be the
# "counter" and the second the one marking the iteration end.
if isinstance(inst[0], cmp):
# Assume this mapping
loop_cnt_reg = inst[0].args_in[0]
loop_end_reg = inst[0].args_in[1]
logging.debug(f"Assuming {loop_cnt_reg} as counter register and {loop_end_reg} as end register.")
break
# Flags are set through subs
elif isinstance(inst[0], subs_imm_short):
loop_cnt_reg = inst[0].args_in_out[0]
loop_end_reg = inst[0].args_in_out[0]
break

# Find FPR that is used to stash the loop end incase it's vmov loop
loop_end_reg_fpr = None
for li, l in enumerate(body_code):
inst = Instruction.parser(l)
# Flags are set through cmp
if isinstance(inst[0], vmov_gpr):
if loop_end_reg in inst[0].args_out:
logging.debug(f"Copying from {inst[0].args_in} to {loop_end_reg}")
loop_end_reg_fpr = inst[0].args_in[0]

# The last vmov occurance before the cmp that writes to the register
# we compare to will be the right one. The same GPR could be written
# previously due to renaming, before it becomes the value used in
# the cmp.
if isinstance(inst[0], cmp):
break

if unroll > 1:
assert unroll in [1,2,4,8,16,32]
yield f"{indent}lsr {loop_end_reg}, {loop_end_reg}, #{int(math.log2(unroll))}"

inc_per_iter = 0
for l in body_code:
inst = Instruction.parser(l)
# Increment happens through pointer modification
if loop_cnt_reg.lower() == inst[0].addr and inst[0].increment is not None:
inc_per_iter = inc_per_iter + simplify(inst[0].increment)
# Increment through explicit modification
elif loop_cnt_reg.lower() in (inst[0].args_out + inst[0].args_in_out) and inst[0].immediate is not None:
# TODO: subtract if we have a subtraction
inc_per_iter = inc_per_iter + simplify(inst[0].immediate)
logging.debug(f"Loop counter {loop_cnt_reg} is incremented by {inc_per_iter} per iteration.")

if fixup != 0 and loop_end_reg_fpr is not None:
yield f"{indent}push {{{loop_end_reg}}}"
yield f"{indent}vmov {loop_end_reg}, {loop_end_reg_fpr}"

if fixup != 0:
yield f"{indent}sub {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}"

if fixup != 0 and loop_end_reg_fpr is not None:
yield f"{indent}vmov {loop_end_reg_fpr}, {loop_end_reg}"
yield f"{indent}pop {{{loop_end_reg}}}"

if jump_if_empty is not None:
yield f"cbz {loop_cnt}, {jump_if_empty}"
yield f"{self.lbl}:"

def end(self, other, indentation=0):
"""Emit compare-and-branch at the end of the loop"""
indent = ' ' * indentation
lbl_start = self.lbl
if lbl_start.isdigit():
lbl_start += "b"

yield f'{indent}bne {lbl_start}'

class CmpLoop(Loop):
"""
Loop ending in a compare and a branch.
Expand Down
Loading