Skip to content

Commit

Permalink
Armv7-M: Default to .w for better alignment
Browse files Browse the repository at this point in the history
Currently, we maintain the instruction-width modifiers as they
are in the input code (except for some exceptions).
However, this can negatively impact performance as SLOTHY
may break code-alignment.
This commit changes the Armv7-M instruction writer, to output
.w for all instructions resulting in the best performance
(modulo the size of the instruction cache).
Unfortunately, LLVM (in the selftest) stumbles over some of these
.w modifiers in places where they do not have any effect.
To work around that, we remove the modifiers for the selftest.
  • Loading branch information
mkannwischer committed Jan 13, 2025
1 parent 09a9d3b commit 4db1d82
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
20 changes: 18 additions & 2 deletions slothy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,12 +1212,28 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa
include=include_paths)
except subprocess.CalledProcessError as exc:
log.error("CPreprocessor failed on the following input")
log.error(SouceLine.write_multiline(source))
log.error(SourceLine.write_multiline(source))
raise LLVM_Mc_Error from exc

if platform.system() == "Darwin":
source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source))


# Remove all width information - LLVM cannot handle .w for
# some instructions that only have a 32-bit encoding,
# e.g., uadd16.w works in gcc, but not LLVM.
# Unfortunately, for some instructions this depends
# on the registers used and, hence, adjusting the input to
# SLOTHY is not sufficient.
# As currently, we don't have a model of the instruction encodings,
# there is no principled way to reason about it.
if thumb:
for line in source:
instruction = line.text
instruction = instruction.replace(".w ", " ")
instruction = instruction.replace(".n ", " ")
line.set_text(instruction)

code = SourceLine.write_multiline(source)

log.debug(f"Calling LLVM MC assmelber on the following code")
Expand Down Expand Up @@ -1585,7 +1601,7 @@ def extract(source, lbl, forced_loop_type=None):
"""
Find a loop with start label `lbl` in `source` and return it together
with its type.
Args:
source: list of SourceLine objects
lbl: label of the loop to extract
Expand Down
12 changes: 8 additions & 4 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
# if new_fixup != 0:
# yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}"
if fixup != 0:
yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
#if new_fixup != 0 or fixup != 0:
if fixup != 0:
yield f"{indent}vmov {self.additional_data['endf']}, {self.additional_data['end']}"
Expand Down Expand Up @@ -383,7 +383,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
yield f"{indent}vmov {loop_end_reg}, {loop_end_reg_fpr}"

if fixup != 0:
yield f"{indent}sub {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}"
yield f"{indent}sub.w {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}"

if fixup != 0 and loop_end_reg_fpr is not None:
yield f"{indent}vmov {loop_end_reg_fpr}, {loop_end_reg}"
Expand Down Expand Up @@ -457,7 +457,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
# yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}"

if fixup != 0:
yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"
yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}"

if jump_if_empty is not None:
yield f"cbz {loop_cnt}, {jump_if_empty}"
Expand Down Expand Up @@ -499,7 +499,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None,
assert unroll in [1,2,4,8,16,32]
yield f"{indent}lsr {loop_cnt}, {loop_cnt}, #{int(math.log2(unroll))}"
if fixup != 0:
yield f"{indent}sub {loop_cnt}, {loop_cnt}, #{fixup}"
yield f"{indent}sub.w {loop_cnt}, {loop_cnt}, #{fixup}"
if jump_if_empty is not None:
yield f"cbz {loop_cnt}, {jump_if_empty}"
yield f"{self.lbl_start}:"
Expand Down Expand Up @@ -1079,6 +1079,10 @@ def make(cls, src):
return Armv7mInstruction.build(cls, src)

def write(self):
# Default to .w for all instructions for better performance
# TODO: find a more principled way to do this
self.width = ".w"

out = self.pattern
l = list(zip(self.args_in, self.pattern_inputs)) + \
list(zip(self.args_out, self.pattern_outputs)) + \
Expand Down

0 comments on commit 4db1d82

Please sign in to comment.