slothy-optimizer · hanno-becker · Dec 17, 2024 · Dec 15, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/slothy/core/config.py b/slothy/core/config.py
@@ -135,15 +135,15 @@ def selftest_iterations(self):
         return self._selftest_iterations
 
     @property
-    def selftest_address_gprs(self):
+    def selftest_address_registers(self):
         """Dictionary of (reg, sz) items indicating which registers are assumed to be
         pointers to memory, and if so, of what size."""
-        return self._selftest_address_gprs
+        return self._selftest_address_registers
 
     @property
     def selftest_default_memory_size(self):
         """Default buffer size to use for registers which are automatically inferred to be
-        used as pointers and for which no memory size has been configured via `address_gprs`."""
+        used as pointers and for which no memory size has been configured via `address_registers`."""
         return self._selftest_default_memory_size
 
     @property
@@ -1183,7 +1183,7 @@ def __init__(self, Arch, Target):
 
         self._selftest = True
         self._selftest_iterations = 10
-        self._selftest_address_gprs = None
+        self._selftest_address_registers = None
         self._selftest_default_memory_size = 1024
         self._selfcheck = True
         self._selfcheck_failure_logfile = None
@@ -1307,9 +1307,9 @@ def selftest(self,val):
     @selftest_iterations.setter
     def selftest_iterations(self,val):
         self._selftest_iterations = val
-    @selftest_address_gprs.setter
-    def selftest_address_gprs(self,val):
-        self._selftest_address_gprs = val
+    @selftest_address_registers.setter
+    def selftest_address_registers(self,val):
+        self._selftest_address_registers = val
     @selftest_default_memory_size.setter
     def selftest_default_memory_size(self,val):
         self._selftest_default_memory_size = val

diff --git a/slothy/core/core.py b/slothy/core/core.py
@@ -846,30 +846,45 @@ def selftest(self, log):
 
         log.info(f"Running selftest ({self._config.selftest_iterations} iterations)...")
 
-        address_gprs = self._config.selftest_address_gprs
-        if address_gprs is None:
+        address_registers = self._config.selftest_address_registers
+        if address_registers is None:
             # Try to infer which registes need to be pointers
             # Look for load/store instructions and remember addresses
             addresses = set()
             for t in tree.nodes:
                 addr = getattr(t.inst, "addr", None)
                 if addr is None:
                     continue
-                addresses.add(addr)
+                addresses = addresses.union(tree.find_all_predecessors_input_registers(t, addr))
 
             # For now, we don't look into increments and immedate offsets
             # to gauge the amount of memory we actually need. Instaed, we
             # just allocate a buffer of a configurable default size.
             log.info(f"Inferred that the following registers seem to act as pointers: {addresses}")
             log.info(f"Using default buffer size of {self._config.selftest_default_memory_size} bytes. "
-                     "If you want different buffer sizes, set selftest_address_gprs manually.")
-            address_gprs = { a: self._config.selftest_default_memory_size for a in addresses }
+                     "If you want different buffer sizes, set selftest_address_registers manually.")
+            address_registers = { a: self._config.selftest_default_memory_size for a in addresses }
 
         # This produces _unrolled_ code, the same that is checked in the selfcheck.
         # The selftest should instead use the rolled form of the loop.
         iterations = 7
         if self.config.sw_pipelining.enabled is True:
             old_source, new_source = self.get_fully_unrolled_loop(iterations)
+
+            dfgc_preamble = DFGConfig(self.config, outputs=self.kernel_input_output)
+            dfgc_preamble.inputs_are_outputs = False
+            preamble_dfg = DFG(self.preamble, log, dfgc_preamble)
+
+            if preamble_dfg.has_symbolic_registers():
+                log.info("Skipping selftest as preamble contains symbolic registers.")
+                return
+
+            dfgc_postamble = DFGConfig(self.config, outputs=self.orig_outputs)
+            postamble_dfg = DFG(self.postamble, log.getChild("new_postamble"), dfgc_postamble)
+
+            if postamble_dfg.has_symbolic_registers():
+                log.info("Skipping selftest as postamble contains symbolic registers.")
+                return
         else:
             old_source, new_source = self.orig_code, self.code
 
@@ -879,7 +894,7 @@ def selftest(self, log):
         regs_expected = set(filter(lambda t: t.startswith("t") is False and
                                          t != "sp" and t != "flags", regs_expected))
 
-        SelfTest.run(self.config, log, old_source, new_source, address_gprs, regs_expected,
+        SelfTest.run(self.config, log, old_source, new_source, address_registers, regs_expected,
                      self.config.selftest_iterations)
 
     def selfcheck_with_fixup(self, log):

diff --git a/slothy/core/dataflow.py b/slothy/core/dataflow.py
@@ -775,6 +775,30 @@ def has_symbolic_registers(self):
                     return True
         return False
 
+    def find_all_predecessors_input_registers(self, consumer, register_name):
+        """ recursively finds the set of input registers registers that a certain value depends on."""
+        # ignore the stack pointer
+        if register_name == "sp":
+            return set()
+
+        producer = consumer.reg_state[register_name].src
+        # if this is a virtual input instruction this is an actual input
+        # otherwise this is computed from other inputs
+        if isinstance(producer.inst, VirtualInputInstruction):
+            return set(producer.inst.args_out)
+        else:
+            # go through all predecessors and recursively call this function
+            # Note that we only care about inputs (i.e., produced by a VirtualInputInstruction)
+            regs = []
+            if hasattr(producer.inst, "args_in"):
+                regs += producer.inst.args_in
+            if hasattr(producer.inst, "args_in_out"):
+                regs += producer.inst.args_in_out
+            predecessors = set()
+            for reg in regs:
+                predecessors = predecessors.union(self.find_all_predecessors_input_registers(producer, reg))
+            return set(predecessors)
+
     def ssa(self, filter_func=None):
         """Transform data flow graph into single static assignment (SSA) form."""
         # Go through non-virtual instruction nodes and assign unique names to

diff --git a/slothy/core/slothy.py b/slothy/core/slothy.py
@@ -172,7 +172,7 @@ def _dump(name, s, logger, err=False):
         for l in s:
             fun(f"> {l}")
 
-    def global_selftest(self, funcname, address_gprs, iterations=5):
+    def global_selftest(self, funcname, address_registers, iterations=5):
         """Conduct a function-level selftest
 
         - funcname: Name of function to be called. Must be exposed as a symbol
@@ -198,7 +198,7 @@ def global_selftest(self, funcname, address_gprs, iterations=5):
         old_source = self.original_source
         new_source = self.source
 
-        SelfTest.run(self.config, log, old_source, new_source, address_gprs,
+        SelfTest.run(self.config, log, old_source, new_source, address_registers,
                      self.config.arch.RegisterType.callee_saved_registers(), 5,
                      fnsym=funcname)
 

diff --git a/slothy/helper.py b/slothy/helper.py
@@ -1282,7 +1282,7 @@ class SelfTestException(Exception):
 class SelfTest():
 
     @staticmethod
-    def run(config, log, codeA, codeB, address_gprs, output_registers, iterations, fnsym=None):
+    def run(config, log, codeA, codeB, address_registers, output_registers, iterations, fnsym=None):
         CODE_BASE = 0x010000
         CODE_SZ = 0x010000
         CODE_END = CODE_BASE + CODE_SZ
@@ -1314,8 +1314,8 @@ def run_code(code, txt=None):
                 # If we expect a function return, put a valid address in the LR
                 # that serves as the marker to terminate emulation
                 mu.reg_write(config.arch.RegisterType.unicorn_link_register(), CODE_END)
-            # Setup stack
-            mu.reg_write(config.arch.RegisterType.unicorn_stack_pointer(), STACK_TOP)
+            # Setup stack and allocate initial stack memory
+            mu.reg_write(config.arch.RegisterType.unicorn_stack_pointer(), STACK_TOP - config.selftest_default_memory_size)
             # Copy code into emulator
             mu.mem_map(CODE_BASE, CODE_SZ)
             mu.mem_write(CODE_BASE, objcode)
@@ -1334,10 +1334,11 @@ def run_code(code, txt=None):
                     mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode))
                 else:
                     mu.emu_start(CODE_BASE + offset, CODE_END)
-            except:
+            except UcError as e:
                 log.error("Failed to emulate code using unicorn engine")
                 log.error("Code")
                 log.error(SourceLine.write_multiline(code))
+                raise SelfTestException(f"Selftest failed: Unicorn failed to emulate code: {str(e)}") from e
 
             final_register_contents = {}
             for r in regs:
@@ -1358,9 +1359,11 @@ def run_code(code, txt=None):
             initial_register_contents = {}
             for r in regs:
                 initial_register_contents[r] = int.from_bytes(os.urandom(16))
-            for (reg, sz) in address_gprs.items():
-                initial_register_contents[reg] = cur_ram
-                cur_ram += sz
+            for (reg, sz) in address_registers.items():
+                # allocate 2*sz and place pointer in the middle
+                # this makes sure that memory can be accessed at negative offsets
+                initial_register_contents[reg] = cur_ram + sz
+                cur_ram += 2*sz
 
             final_regs_old, final_mem_old = run_code(codeA, txt="old")
             final_regs_new, final_mem_new = run_code(codeB, txt="new")