diff --git a/examples/naive/armv7m/armv7m_simple0.s b/examples/naive/armv7m/armv7m_simple0.s index f250650f..937c22f2 100644 --- a/examples/naive/armv7m/armv7m_simple0.s +++ b/examples/naive/armv7m/armv7m_simple0.s @@ -6,4 +6,27 @@ eor.w r1,r1, r3 smlabt r3,r2, r2, r1 asrs r3, r3,#1 str r3, [r0,#4] + +ldm r0, {r1-r2,r14} +add r1, r2,r1 +eor.w r1,r1, r14 +smlabt r3,r2, r2, r1 +asrs r3, r3,#1 +str r3, [r0,#4] + + +ldm r0, {r1-r3} +add r1, r2,r1 +eor.w r1,r1, r3 +smlabt r3,r2, r2, r1 +asrs r3, r3,#1 +str r3, [r0,#4] + +ldm r0, {r1,r2,r3} +add r1, r2,r1 +eor.w r1,r1, r3 +smlabt r3,r2, r2, r1 +asrs r3, r3,#1 +str r3, [r0,#4] + end: \ No newline at end of file diff --git a/examples/opt/armv7m/armv7m_simple0_opt_m7.s b/examples/opt/armv7m/armv7m_simple0_opt_m7.s index d9e1be33..72754c35 100644 --- a/examples/opt/armv7m/armv7m_simple0_opt_m7.s +++ b/examples/opt/armv7m/armv7m_simple0_opt_m7.s @@ -1,24 +1,42 @@ start: - // Instructions: 6 - // Expected cycles: 5 - // Expected IPC: 1.20 - // - // Cycle bound: 5.0 - // IPC bound: 1.20 - // - // Wall time: 0.01s - // User time: 0.01s - // - // ----- cycle (expected) ------> - // 0 25 - // |------------------------|---- - ldr r12, [r0, #4] // *............................. - add r12,r2,r12 // .*............................ - eor.w r10,r12,r3 // ..*........................... - smlabt r10,r2,r2,r10 // ..*........................... - asrs r3,r10,#1 // ....*......................... - str r3,[r0,#4] // ....*......................... + // Instructions: 24 + // Expected cycles: 14 + // Expected IPC: 1.71 + // + // Cycle bound: 14.0 + // IPC bound: 1.71 + // + // Wall time: 0.39s + // User time: 0.39s + // + // ----- cycle (expected) ------> + // 0 25 + // |------------------------|---- + ldr r11,[r0,#4] // *............................. + ldm r0,{r7,r9,r14} // .*............................ + add r8,r2,r11 // .*............................ + eor.w r11,r8,r3 // ..*........................... + smlabt r3,r2,r2,r11 // ..*........................... + add r11,r9,r7 // ....*......................... + ldm r0,{r1,r2,r7} // ....*......................... + eor.w r14,r11,r14 // .....*........................ + smlabt r8,r9,r9,r14 // .....*........................ + asrs r4,r3,#1 // ......*....................... + str r4,[r0,#4] // ......*....................... + add r3,r2,r1 // .......*...................... + ldm r0,{r11,r12,r14} // .......*...................... + eor.w r3,r3,r7 // ........*..................... + smlabt r7,r2,r2,r3 // ........*..................... + asrs r9,r8,#1 // .........*.................... + str r9,[r0,#4] // .........*.................... + asrs r3,r7,#1 // ..........*................... + add r7,r12,r11 // ..........*................... + eor.w r14,r7,r14 // ...........*.................. + smlabt r14,r12,r12,r14 // ...........*.................. + str r3,[r0,#4] // ............*................. + asrs r14,r14,#1 // .............*................ + str r14,[r0,#4] // .............*................ // ------ cycle (expected) ------> // 0 25 @@ -27,7 +45,25 @@ // add r1, r2,r1 // .*............................. // eor.w r1,r1, r3 // ..*............................ // smlabt r3,r2, r2, r1 // ..*............................ - // asrs r3, r3,#1 // ....*.......................... - // str r3, [r0,#4] // ....*.......................... + // asrs r3, r3,#1 // ......*........................ + // str r3, [r0,#4] // ......*........................ + // ldm r0, {r1-r2,r14} // .*............................. + // add r1, r2,r1 // ....*.......................... + // eor.w r1,r1, r14 // .....*......................... + // smlabt r3,r2, r2, r1 // .....*......................... + // asrs r3, r3,#1 // .........*..................... + // str r3, [r0,#4] // .........*..................... + // ldm r0, {r1-r3} // .......*....................... + // add r1, r2,r1 // ..........*.................... + // eor.w r1,r1, r3 // ...........*................... + // smlabt r3,r2, r2, r1 // ...........*................... + // asrs r3, r3,#1 // .............*................. + // str r3, [r0,#4] // .............*................. + // ldm r0, {r1,r2,r3} // ....*.......................... + // add r1, r2,r1 // .......*....................... + // eor.w r1,r1, r3 // ........*...................... + // smlabt r3,r2, r2, r1 // ........*...................... + // asrs r3, r3,#1 // ..........*.................... + // str r3, [r0,#4] // ............*.................. end: diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index deefdee7..be18e01b 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -2,6 +2,7 @@ import inspect import re import math +import itertools from enum import Enum from functools import cache @@ -125,12 +126,12 @@ def unconditional(lbl): class VmovCmpLoop(Loop): """ Loop ending in a vmov, a compare, and a branch. - + The modification to the value we compare against happens inside the loop body. The value that is being compared to is stashed to a floating point register before the loop starts and therefore needs to be recovered before - the comparison. - + the comparison. + WARNING: This type of loop is experimental as slothy has no knowledge about what happens inside the loop boundary! Especially, a register is written inside the boundary which may be used for renaming by slothy. Use with @@ -218,7 +219,7 @@ class CmpLoop(Loop): """ Loop ending in a compare and a branch. The modification to the value we compare against happens inside the loop body. - WARNING: This type of loop is experimental as slothy has no knowledge about + WARNING: This type of loop is experimental as slothy has no knowledge about what happens inside the loop boundary! Use with caution. Example: @@ -397,7 +398,7 @@ def __init__(self, *, mnemonic, self.flag = None self.width = None self.barrel = None - self.range = None + self.reg_list = None def extract_read_writes(self): """Extracts 'reads'/'writes' clauses from the source line of the instruction""" @@ -651,7 +652,11 @@ def pattern_i(i): index_pattern = "[0-9]+" width_pattern = "(?:\.w|\.n|)" barrel_pattern = "(?:lsl|ror|lsr|asr)\\\\s*" - range_pattern = "\{(?P[rs])(?P\\\\d+)-[rs](?P\\\\d+)\}" + + # reg_list is (,)* + # range is [rs]NN(-rsMM)? + range_pat = "([rs]\\\\d+)(-[rs](\\\\d+))?" + reg_list_pattern = "\{"+ range_pat + "(," + range_pat + ")*" +"\}" src = re.sub(" ", "\\\\s+", src) src = re.sub(",", "\\\\s*,\\\\s*", src) @@ -662,7 +667,7 @@ def pattern_i(i): src = replace_placeholders(src, "flag", flag_pattern, "flag") # TODO: Are any changes required for IT syntax? src = replace_placeholders(src, "width", width_pattern, "width") src = replace_placeholders(src, "barrel", barrel_pattern, "barrel") - src = replace_placeholders(src, "range", range_pattern, "range") + src = replace_placeholders(src, "reg_list", reg_list_pattern, "reg_list") src = r"\s*" + src + r"\s*(//.*)?\Z" return src @@ -789,6 +794,30 @@ def _instantiate_pattern(s, ty, arg, out): raise FatalParsingException(f"Failed to replace <{s}> by {rep} in {out}!") return res + @staticmethod + def _expand_reg_list(reg_list): + """Expanding list of registers that may contain ranges + Examples: + r1,r2,r3 + s1-s7 + r1-r3,r14 + """ + reg_list = reg_list.replace("{", "") + reg_list = reg_list.replace("}", "") + + reg_list_type = reg_list[0] + regs = [] + for reg_range in reg_list.split(","): + if "-" in reg_range: + start = reg_range.split("-")[0] + end = reg_range.split("-")[1] + start = int(start.replace(reg_list_type, "")) + end = int(end.replace(reg_list_type, "")) + regs += [f"{reg_list_type}{i}" for i in range(start, end+1)] + else: # not a range, just a register + regs += [reg_range] + return reg_list_type, regs + @staticmethod def build_core(obj, res): @@ -815,10 +844,7 @@ def group_name_i(i): group_to_attribute('flag', 'flag') group_to_attribute('width', 'width') group_to_attribute('barrel', 'barrel') - group_to_attribute('range', 'range') - group_to_attribute('range_start', 'range_start', int) - group_to_attribute('range_end', 'range_end', int) - group_to_attribute('range_type', 'range_type') + group_to_attribute('reg_list', 'reg_list') for s, ty in obj.pattern_inputs: if ty == RegisterType.FLAGS: @@ -891,7 +917,7 @@ def t_default(x): out = replace_pattern(out, "index", "index", str) out = replace_pattern(out, "width", "width", lambda x: x.lower()) out = replace_pattern(out, "barrel", "barrel", lambda x: x.lower()) - out = replace_pattern(out, "range", "range", lambda x: x.lower()) + out = replace_pattern(out, "reg_list", "reg_list", lambda x: x.lower()) out = out.replace("\\[", "[") out = out.replace("\\]", "]") @@ -1417,53 +1443,53 @@ def make(cls, src): return obj class ldm_interval(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name - pattern = "ldm ," + pattern = "ldm ," inputs = ["Ra"] outputs = [] def write(self): - reg_from = self.args_out[0] - reg_to = self.args_out[-1] - self.range = f"{{{reg_from}-{reg_to}}}" + regs = ",".join(self.args_out) + self.reg_list = f"{{{regs}}}" return super().write() @classmethod def make(cls, src): obj = Armv7mLoadInstruction.build(cls, src) - reg_type = Armv7mInstruction._infer_register_type(obj.range_type) - num_regs = len(RegisterType.list_registers(reg_type)) - obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads - obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list) + + obj.args_out = reg_list obj.num_out = len(obj.args_out) obj.arg_types_out = [RegisterType.GPR] * obj.num_out - obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] - obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + available_regs = RegisterType.list_registers(RegisterType.GPR) + obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])] + obj.args_out_restrictions = [ None for _ in range(obj.num_out) ] return obj class ldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name - pattern = "ldm !," + pattern = "ldm !," in_outs = ["Ra"] outputs = [] def write(self): - reg_from = self.args_out[0] - reg_to = self.args_out[-1] - self.range = f"{{{reg_from}-{reg_to}}}" + regs = ",".join(self.args_out) + self.reg_list = f"{{{regs}}}" return super().write() @classmethod def make(cls, src): obj = Armv7mLoadInstruction.build(cls, src) - reg_type = Armv7mInstruction._infer_register_type(obj.range_type) - num_regs = len(RegisterType.list_registers(reg_type)) - obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads - obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list) + + obj.args_out = reg_list obj.num_out = len(obj.args_out) obj.arg_types_out = [RegisterType.GPR] * obj.num_out - obj.args_out_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] - obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"r{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + obj.increment = obj.num_out * 4 + + available_regs = RegisterType.list_registers(RegisterType.GPR) + obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])] + obj.args_out_restrictions = [ None for _ in range(obj.num_out) ] return obj class vldr_with_imm(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name @@ -1496,27 +1522,28 @@ def make(cls, src): return obj class vldm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name - pattern = "vldm !," + pattern = "vldm !," in_outs = ["Ra"] outputs = [] def write(self): - reg_from = self.args_out[0] - reg_to = self.args_out[-1] - self.range = f"{{{reg_from}-{reg_to}}}" + regs = ",".join(self.args_out) + self.reg_list = f"{{{regs}}}" return super().write() @classmethod def make(cls, src): obj = Armv7mLoadInstruction.build(cls, src) - reg_type = Armv7mInstruction._infer_register_type(obj.range_type) - num_regs = len(RegisterType.list_registers(reg_type)) - obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads - obj.args_out = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list) + + obj.args_out = reg_list obj.num_out = len(obj.args_out) obj.arg_types_out = [RegisterType.FPR] * obj.num_out - obj.args_out_restrictions = [[ f"s{i+j}" for j in range(0, num_regs-obj.num_out)] for i in range(0, obj.num_out) ] - obj.args_out_combinations = [ ( list(range(0, obj.num_out)), [ [ f"s{i+j}" for i in range(0, obj.num_out)] for j in range(0, num_regs-obj.num_out) ] )] + obj.increment = obj.num_out * 4 + + available_regs = RegisterType.list_registers(RegisterType.FPR) + obj.args_out_combinations = [ (list(range(0, obj.num_out)), [list(a) for a in itertools.combinations(available_regs, obj.num_out)])] + obj.args_out_restrictions = [ None for _ in range(obj.num_out) ] return obj # Store @@ -1611,27 +1638,29 @@ def make(cls, src): return obj class stm_interval_inc_writeback(Armv7mLoadInstruction): # pylint: disable=missing-docstring,invalid-name - pattern = "stm !," + pattern = "stm !," in_outs = ["Ra"] outputs = [] def write(self): - reg_from = self.args_in[0] - reg_to = self.args_in[-1] - self.range = f"{{{reg_from}-{reg_to}}}" + regs = ",".join(self.args_out) + self.reg_list = f"{{{regs}}}" return super().write() @classmethod def make(cls, src): obj = Armv7mLoadInstruction.build(cls, src) - reg_type = Armv7mInstruction._infer_register_type(obj.range_type) - num_regs = len(RegisterType.list_registers(reg_type)) - obj.increment = (obj.range_end-obj.range_start+1) * 4 # word sized loads - obj.args_in = [f"{obj.range_type}{i}" for i in range(obj.range_start, obj.range_end+1)] + + reg_list_type, reg_list = Armv7mInstruction._expand_reg_list(obj.reg_list) + + obj.args_in = reg_list obj.num_in = len(obj.args_in) obj.arg_types_in = [RegisterType.GPR] * obj.num_in - obj.args_in_restrictions = [[ f"r{i+j}" for j in range(0, num_regs-obj.num_in)] for i in range(0, obj.num_in) ] - obj.args_in_combinations = [ ( list(range(0, obj.num_in)), [ [ f"r{i+j}" for i in range(0, obj.num_in)] for j in range(0, num_regs-obj.num_in) ] )] + obj.increment = obj.num_in * 4 + + available_regs = RegisterType.list_registers(RegisterType.GPR) + obj.args_in_combinations = [ (list(range(0, obj.num_in)), [list(a) for a in itertools.combinations(available_regs, obj.num_in)])] + obj.args_in_restrictions = [ None for _ in range(obj.num_in) ] return obj # Other class cmp(Armv7mBasicArithmetic): # pylint: disable=missing-docstring,invalid-name diff --git a/slothy/targets/arm_v7m/cortex_m7.py b/slothy/targets/arm_v7m/cortex_m7.py index 33c9d5f3..57cc8f11 100644 --- a/slothy/targets/arm_v7m/cortex_m7.py +++ b/slothy/targets/arm_v7m/cortex_m7.py @@ -313,7 +313,7 @@ def get_latency(src, out_idx, dst): # Load and store multiples take a long time to complete if instclass_src in [ldm_interval, ldm_interval_inc_writeback, stm_interval_inc_writeback, vldm_interval_inc_writeback]: - latency = (src.range_end - src.range_start) + 1 + latency = src.num_out # Can always store result in the same cycle # TODO: double-check this @@ -352,6 +352,6 @@ def evaluate_immediate(string_expr): def get_inverse_throughput(src): itp = lookup_multidict(inverse_throughput, src) if find_class(src) in [ldm_interval, ldm_interval_inc_writeback, stm_interval_inc_writeback, vldm_interval_inc_writeback]: - itp = (src.range_end - src.range_start) + 1 + itp = src.num_out return itp