Skip to content

Commit

Permalink
Implement global self test
Browse files Browse the repository at this point in the history
This commit prototypes an implementation of a 'global' selftest,
building and emulating the entire source code loaded into SLOTHY
and checking that input and output behave equivalently.

To use, call `slothy.global_selftest(...)` at any point during
stateful SLOTHY operations. Then, the global selftest will compare
the current state of the code with the original state.

See ntt_lyberr_123_4567 for an example.

This is so far only implemented for AArch64.
  • Loading branch information
hanno-becker committed Dec 9, 2024
1 parent f6c6404 commit 71a037b
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 52 deletions.
3 changes: 3 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,9 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 64
slothy.optimize_loop("layer123_start")
slothy.optimize_loop("layer4567_start")
# Build + emulate entire function to test that behaviour has not changed
slothy.global_selftest("ntt_kyber_123_4567",
{"x0": 1024, "x1": 1024, "x3": 1024, "x4": 1024, "x5": 1024})

class intt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
Expand Down
25 changes: 20 additions & 5 deletions examples/naive/aarch64/ntt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
/// SOFTWARE.
///

// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// Needed to provide ASM_LOAD directive
#include <hal_env.h>
// #include <hal_envh>

.macro mulmodq dst, src, const, idx0, idx1
sqrdmulh t2.8h, \src\().8h, \const\().h[\idx1]
Expand Down Expand Up @@ -154,7 +159,12 @@
.data
.p2align 4
roots:
#include "ntt_kyber_123_45_67_twiddles.s"
// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// #include "ntt_kyber_123_45_67_twiddles.s"

in .req x0
inp .req x1
Expand Down Expand Up @@ -223,9 +233,14 @@ ntt_kyber_123_4567:
_ntt_kyber_123_4567:
push_stack

ASM_LOAD(r_ptr0, roots)
ASM_LOAD(r_ptr1, roots_l56)
ASM_LOAD(xtmp, const_addr)
// Commented out for simple standalone emulation not
// requiring correct constant data.
//
// Should be commented when used.
//
// ASM_LOAD(r_ptr0, roots)
// ASM_LOAD(r_ptr1, roots_l56)
// ASM_LOAD(xtmp, const_addr)

ld1 {consts.8h}, [xtmp]

Expand Down
27 changes: 2 additions & 25 deletions slothy/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def selftest(self):
equivalence-check the loop-form (including the compare+branch instructions
at the loop boundary) rather than the unrolled code.
DEPENDENCY: To run this, you need `llvm-mc` the binary in your path or configured
as via `llvm_mc_binary`, and `unicorn-engine` Python bindings setup.
DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
in your PATH. Those are part of a standard LLVM setup.
NOTE: This is so far implemented as a repeated randomized test -- nothing clever.
"""
Expand Down Expand Up @@ -469,21 +469,6 @@ def compiler_include_paths(self):
or `with_llvm_mca_after` are set."""
return self._compiler_include_paths

@property
def llvm_mca_binary(self):
"""The llvm-mca binary to be used for estimated performance annotations
This is only relevant if `with_llvm_mca_before` or `with_llvm_mca_after`
is set."""
return self._llvm_mca_binary

@property
def llvm_mc_binary(self):
"""The llvm-mc binary to be used for assembling output data
This is only relevant if `selftest` is set."""
return self._llvm_mc_binary

@property
def timeout(self):
"""The timeout in seconds after which the underlying constraint solver stops
Expand Down Expand Up @@ -1228,8 +1213,6 @@ def __init__(self, Arch, Target):

self._compiler_binary = "gcc"
self._compiler_include_paths = None
self._llvm_mca_binary = "llvm-mca"
self._llvm_mc_binary = "llvm-mc"

self.keep_tags = True
self.inherit_macro_comments = False
Expand Down Expand Up @@ -1377,12 +1360,6 @@ def compiler_binary(self, val):
@compiler_include_paths.setter
def compiler_include_paths(self, val):
self._compiler_include_paths = val
@llvm_mca_binary.setter
def llvm_mca_binary(self, val):
self._llvm_mca_binary = val
@llvm_mc_binary.setter
def llvm_mc_binary(self, val):
self._llvm_mc_binary = val
@timeout.setter
def timeout(self, val):
self._timeout = val
Expand Down
5 changes: 2 additions & 3 deletions slothy/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -877,11 +877,10 @@ def selftest(self, log):
self._config.arch.RegisterType.list_registers(ty)]

def run_code(code, txt=None):
objcode = LLVM_Mc.assemble(code, self._config.llvm_mc_binary,
objcode, offset = LLVM_Mc.assemble(code,
self._config.arch.llvm_mc_arch,
self._config.arch.llvm_mc_attr,
log)

# Setup emulator
mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
# Copy initial register contents into emulator
Expand Down Expand Up @@ -937,7 +936,7 @@ def run_code(code, txt=None):
if final_regs_old[r] != final_regs_new[r]:
raise SlothySelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")

log.info("Selftest: OK")
log.info("Local selftest: OK")

def selfcheck_with_fixup(self, log):
"""Do selfcheck, and consider preamble/postamble fixup in case of SW pipelining
Expand Down
140 changes: 139 additions & 1 deletion slothy/core/slothy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
This module provides the Slothy class, which is a stateful interface to both
one-shot and heuristic optimiations using SLOTHY."""

import os
import logging
from types import SimpleNamespace

Expand All @@ -54,7 +55,16 @@
from slothy.core.heuristics import Heuristics
from slothy.helper import CPreprocessor, SourceLine
from slothy.helper import AsmAllocation, AsmMacro, AsmHelper, AsmIfElse
from slothy.helper import CPreprocessor, LLVM_Mca, LLVM_Mca_Error
from slothy.helper import CPreprocessor, LLVM_Mca, LLVM_Mc, LLVM_Mca_Error

try:
from unicorn import *
from unicorn.arm64_const import *
except ImportError:
Uc = None

class SlothyGlobalSelfTestException(Exception):
"""Exception thrown upon global selftest failures"""

class Slothy:
"""SLOTHY optimizer
Expand Down Expand Up @@ -87,6 +97,7 @@ def __init__(self, arch, target, logger=None):

# The source, once loaded, is represented as a list of strings
self._source = None
self._original_source = None
self.results = None

self.last_result = None
Expand All @@ -99,21 +110,40 @@ def source(self):
If you want the current source code as a multiline string, use get_source_as_string()."""
return self._source

@property
def original_source(self):
"""Returns the original source code as an array of SourceLine objects
If you want the current source code as a multiline string, use get_original_source_as_string()."""
return self._original_source

@source.setter
def source(self, val):
assert SourceLine.is_source(val)
self._source = val

@original_source.setter
def original_source(self, val):
assert SourceLine.is_source(val)
self._original_source = val

def get_source_as_string(self, comments=True, indentation=True, tags=True):
"""Retrieve current source code as multi-line string"""
return SourceLine.write_multiline(self.source, comments=comments,
indentation=indentation, tags=tags)

def get_original_source_as_string(self, comments=True, indentation=True, tags=True):
"""Retrieve original source code as multi-line string"""
return SourceLine.write_multiline(self.original_source, comments=comments,
indentation=indentation, tags=tags)

def set_source_as_string(self, s):
"""Provide input source code as multi-line string"""
assert isinstance(s, str)
reduce = not self.config.ignore_tags
self.source = SourceLine.read_multiline(s, reduce=reduce)
if self.original_source is None:
self.original_source = self.source

def load_source_raw(self, source):
"""Load source code from multi-line string"""
Expand Down Expand Up @@ -145,6 +175,114 @@ def _dump(name, s, logger, err=False):
for l in s:
fun(f"> {l}")

def global_selftest(self, funcname, address_gprs, iterations=5):
"""Conduct a function-level selftest
- funcname: Name of function to be called. Must be exposed as a symbol
- address_prs: Dictionary indicating which GPRs are pointers to buffers of which size.
For example, `{ "x0": 1024, "x4": 1024 }` would indicate that both x0 and x4
point to buffers of size 1024 bytes. The global selftest needs to know this to
setup valid calls to the assembly routine.
DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
in your PATH. Those are part of a standard LLVM setup.
"""

log = self.logger.getChild(f"global_selftest_{funcname}")

if Uc is None:
raise SlothyGlobalSelfTestException("Cannot run selftest -- unicorn-engine is not available.")

if self.config.arch.unicorn_arch is None or \
self.config.arch.llvm_mc_arch is None:
log.warning("Selftest not supported on target architecture")
return

old_source = self.original_source
new_source = self.source

CODE_BASE = 0x010000
CODE_SZ = 0x010000
CODE_END = CODE_BASE + CODE_SZ
RAM_BASE = 0x030000
RAM_SZ = 0x010000
STACK_BASE = 0x040000
STACK_SZ = 0x010000
STACK_TOP = STACK_BASE + STACK_SZ

regs = [r for ty in self.config.arch.RegisterType for r in \
self.config.arch.RegisterType.list_registers(ty)]

def run_code(code, txt=None):
objcode, offset = LLVM_Mc.assemble(code,
self.config.arch.llvm_mc_arch,
self.config.arch.llvm_mc_attr,
log, symbol=funcname,
preprocessor=self.config.compiler_binary,
include_paths=self.config.compiler_include_paths)
# Setup emulator
mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
# Copy initial register contents into emulator
for r,v in initial_register_contents.items():
ur = self.config.arch.RegisterType.unicorn_reg_by_name(r)
if ur is None:
continue
mu.reg_write(ur, v)
# Put a valid address in the LR that serves as the marker to terminate emulation
mu.reg_write(self.config.arch.RegisterType.unicorn_link_register(), CODE_END)
# Setup stack
mu.reg_write(self.config.arch.RegisterType.unicorn_stack_pointer(), STACK_TOP)
# Copy code into emulator
mu.mem_map(CODE_BASE, CODE_SZ)
mu.mem_write(CODE_BASE, objcode)

# Copy initial memory contents into emulator
mu.mem_map(RAM_BASE, RAM_SZ)
mu.mem_write(RAM_BASE, initial_memory)
# Setup stack
mu.mem_map(STACK_BASE, STACK_SZ)
mu.mem_write(STACK_BASE, initial_stack)
# Run emulator
mu.emu_start(CODE_BASE + offset, CODE_END)

final_register_contents = {}
for r in regs:
ur = self.config.arch.RegisterType.unicorn_reg_by_name(r)
if ur is None:
continue
final_register_contents[r] = mu.reg_read(ur)
final_memory_contents = mu.mem_read(RAM_BASE, RAM_SZ)

return final_register_contents, final_memory_contents

for _ in range(iterations):
initial_memory = os.urandom(RAM_SZ)
initial_stack = os.urandom(STACK_SZ)
cur_ram = RAM_BASE
# Set initial register contents arbitrarily, except for registers
# which must hold valid memory addresses.
initial_register_contents = {}
for r in regs:
initial_register_contents[r] = int.from_bytes(os.urandom(16))
for (reg, sz) in address_gprs.items():
initial_register_contents[reg] = cur_ram
cur_ram += sz

final_regs_old, final_mem_old = run_code(old_source, txt="old")
final_regs_new, final_mem_new = run_code(new_source, txt="new")

# Check if memory contents are the same
if final_mem_old != final_mem_new:
raise SlothyGlobalSelfTestException(f"Selftest failed: Memory mismatch")

# Check that callee-saved registers are the same
regs_expected = self.config.arch.RegisterType.callee_saved_registers()
for r in regs_expected:
if final_regs_old[r] != final_regs_new[r]:
raise SlothyGlobalSelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")

log.info(f"Global selftest for {funcname}: OK")

#
# Stateful wrappers around heuristics
#
Expand Down
Loading

0 comments on commit 71a037b

Please sign in to comment.