Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add equivalence tester #108

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Implement global self test
This commit prototypes an implementation of a 'global' selftest,
building and emulating the entire source code loaded into SLOTHY
and checking that input and output behave equivalently.

To use, call `slothy.global_selftest(...)` at any point during
stateful SLOTHY operations. Then, the global selftest will compare
the current state of the code with the original state.

See ntt_kyber_123_4567 for an example.
  • Loading branch information
hanno-becker committed Dec 9, 2024
commit 0e04f488534a84bdc5e2a73e742e5fdba075566f
34 changes: 29 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
@@ -656,6 +656,24 @@ def core(self,slothy):
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")

class Armv7mExample0Func(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0_func"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")
slothy.global_selftest("my_func", {"r0": 1024 })

class Armv7mLoopSubs(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_subs"
@@ -688,7 +706,7 @@ def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class Armv7mLoopVmovCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_vmov_cmp"
@@ -720,12 +738,13 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):

def core(self,slothy):
slothy.optimize()

class ntt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
name = "ntt_kyber_123_4567"
infile = name

self.var = var
if var != "":
name += f"_{var}"
infile += f"_{var}"
@@ -744,6 +763,10 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 64
slothy.optimize_loop("layer123_start")
slothy.optimize_loop("layer4567_start")
# Build + emulate entire function to test that behaviour has not changed
if self.var == "":
slothy.global_selftest("ntt_kyber_123_4567",
{"x0": 1024, "x1": 1024, "x3": 1024, "x4": 1024, "x5": 1024})

class intt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
@@ -1226,7 +1249,7 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 110
slothy.optimize_loop("layer123_start")




class ntt_dilithium_123(Example):
@@ -1349,7 +1372,7 @@ def core(self, slothy):
slothy.optimize_loop("layer5678_start")

slothy.config = conf.copy()

if self.timeout is not None:
slothy.config.timeout = self.timeout // 12

@@ -1366,7 +1389,7 @@ def core(self, slothy):
slothy.config.split_heuristic_stepsize = 0.1
slothy.config.constraints.stalls_first_attempt = 14
slothy.optimize_loop("layer1234_start")


class ntt_dilithium_1234(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA72):
@@ -1513,6 +1536,7 @@ def main():

# Armv7m examples
Armv7mExample0(),
Armv7mExample0Func(),

# Loop examples
AArch64LoopSubs(),
25 changes: 20 additions & 5 deletions examples/naive/aarch64/ntt_kyber_123_4567.s
Original file line number Diff line number Diff line change
@@ -23,8 +23,13 @@
/// SOFTWARE.
///

// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// Needed to provide ASM_LOAD directive
#include <hal_env.h>
// #include <hal_envh>

.macro mulmodq dst, src, const, idx0, idx1
sqrdmulh t2.8h, \src\().8h, \const\().h[\idx1]
@@ -154,7 +159,12 @@
.data
.p2align 4
roots:
#include "ntt_kyber_123_45_67_twiddles.s"
// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// #include "ntt_kyber_123_45_67_twiddles.s"

in .req x0
inp .req x1
@@ -223,9 +233,14 @@ ntt_kyber_123_4567:
_ntt_kyber_123_4567:
push_stack

ASM_LOAD(r_ptr0, roots)
ASM_LOAD(r_ptr1, roots_l56)
ASM_LOAD(xtmp, const_addr)
// Commented out for simple standalone emulation not
// requiring correct constant data.
//
// Should be commented when used.
//
// ASM_LOAD(r_ptr0, roots)
// ASM_LOAD(r_ptr1, roots_l56)
// ASM_LOAD(xtmp, const_addr)

ld1 {consts.8h}, [xtmp]

20 changes: 20 additions & 0 deletions examples/naive/armv7m/armv7m_simple0_func.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.syntax unified
//.cpu cortex-m4 // llvm-mc does not like this...
//.thumb // unicorn seems to get confused by this...

.align 2
.global my_func
// .type my_func, %function // llvm-mc does not like this...
my_func:
push {r4-r11, lr}

start:
ldr r8, [r0, #4]
add r8, r2, r8
eor.w r8, r8, r3
smlabt r3, r2, r2, r8
asrs r3, r3, #1
str r3, [r0, #4]
end:

pop {r4-r11, pc}
39 changes: 8 additions & 31 deletions slothy/core/config.py
Original file line number Diff line number Diff line change
@@ -122,8 +122,8 @@ def selftest(self):
equivalence-check the loop-form (including the compare+branch instructions
at the loop boundary) rather than the unrolled code.

DEPENDENCY: To run this, you need `llvm-mc` the binary in your path or configured
as via `llvm_mc_binary`, and `unicorn-engine` Python bindings setup.
DEPENDENCY: To run this, you need `llvm-nm`, `llvm-readobj`, `llvm-mc`
in your PATH. Those are part of a standard LLVM setup.

NOTE: This is so far implemented as a repeated randomized test -- nothing clever.
"""
@@ -416,8 +416,8 @@ def with_llvm_mca(self):
"""Indicates whether LLVM MCA should be run prior and after optimization
to obtain approximate performance data based on LLVM's scheduling models.

If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_before and self._with_llvm_mca_after

@@ -438,8 +438,8 @@ def with_llvm_mca_before(self):
"""Indicates whether LLVM MCA should be run prior to optimization
to obtain approximate performance data based on LLVM's scheduling models.

If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_before

@@ -448,8 +448,8 @@ def with_llvm_mca_after(self):
"""Indicates whether LLVM MCA should be run after optimization
to obtain approximate performance data based on LLVM's scheduling models.

If this is set, both Config.llvm_mca_binary and Config.compiler_binary
need to be set.
If this is set, Config.compiler_binary need to be set, and llcm-mca in
your PATH.
"""
return self._with_llvm_mca_after

@@ -469,21 +469,6 @@ def compiler_include_paths(self):
or `with_llvm_mca_after` are set."""
return self._compiler_include_paths

@property
def llvm_mca_binary(self):
"""The llvm-mca binary to be used for estimated performance annotations

This is only relevant if `with_llvm_mca_before` or `with_llvm_mca_after`
is set."""
return self._llvm_mca_binary

@property
def llvm_mc_binary(self):
"""The llvm-mc binary to be used for assembling output data

This is only relevant if `selftest` is set."""
return self._llvm_mc_binary

@property
def timeout(self):
"""The timeout in seconds after which the underlying constraint solver stops
@@ -1228,8 +1213,6 @@ def __init__(self, Arch, Target):

self._compiler_binary = "gcc"
self._compiler_include_paths = None
self._llvm_mca_binary = "llvm-mca"
self._llvm_mc_binary = "llvm-mc"

self.keep_tags = True
self.inherit_macro_comments = False
@@ -1377,12 +1360,6 @@ def compiler_binary(self, val):
@compiler_include_paths.setter
def compiler_include_paths(self, val):
self._compiler_include_paths = val
@llvm_mca_binary.setter
def llvm_mca_binary(self, val):
self._llvm_mca_binary = val
@llvm_mc_binary.setter
def llvm_mc_binary(self, val):
self._llvm_mc_binary = val
@timeout.setter
def timeout(self, val):
self._timeout = val
10 changes: 5 additions & 5 deletions slothy/core/core.py
Original file line number Diff line number Diff line change
@@ -42,7 +42,8 @@
from slothy.helper import LockAttributes, Permutation, DeferHandler, SourceLine, LLVM_Mc

try:
from unicorn import Uc
from unicorn import *
from unicorn.arm_const import *
except ImportError:
Uc = None

@@ -877,11 +878,10 @@ def selftest(self, log):
self._config.arch.RegisterType.list_registers(ty)]

def run_code(code, txt=None):
objcode = LLVM_Mc.assemble(code, self._config.llvm_mc_binary,
objcode, offset = LLVM_Mc.assemble(code,
self._config.arch.llvm_mc_arch,
self._config.arch.llvm_mc_attr,
log)

# Setup emulator
mu = Uc(self.config.arch.unicorn_arch, self.config.arch.unicorn_mode)
# Copy initial register contents into emulator
@@ -897,7 +897,7 @@ def run_code(code, txt=None):
mu.mem_map(RAM_BASE, RAM_SZ)
mu.mem_write(RAM_BASE, initial_memory)
# Run emulator
mu.emu_start(CODE_BASE, CODE_BASE + len(objcode))
mu.emu_start(CODE_BASE + offset, CODE_BASE + len(objcode))

final_register_contents = {}
for r in regs:
@@ -937,7 +937,7 @@ def run_code(code, txt=None):
if final_regs_old[r] != final_regs_new[r]:
raise SlothySelfTestException(f"Selftest failed: Register mismatch for {r}: {hex(final_regs_old[r])} != {hex(final_regs_new[r])}")

log.info("Selftest: OK")
log.info("Local selftest: OK")

def selfcheck_with_fixup(self, log):
"""Do selfcheck, and consider preamble/postamble fixup in case of SW pipelining
Loading
Loading