Skip to content

Commit

Permalink
Merge pull request #108 from slothy-optimizer/equiv_test
Browse files Browse the repository at this point in the history
Add equivalence tester
  • Loading branch information
hanno-becker authored Dec 9, 2024
2 parents 2ebb278 + 0e04f48 commit c31b6b3
Show file tree
Hide file tree
Showing 13 changed files with 781 additions and 83 deletions.
33 changes: 33 additions & 0 deletions .github/actions/setup-ubuntu/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Setup ubuntu
description: Setup ubuntu

inputs:
packages:
description: Space-separated list of additional packages to install
required: false
default: 'llvm llvm-runtime'

runs:
using: composite
steps:
- name: Update package repository
shell: bash
run: |
sudo apt-get update
- name: Install base packages
shell: bash
run: |
sudo apt-get install python3-venv python3-pip make -y
- name: Install additional packages
if: ${{ inputs.packages != ''}}
shell: bash
run: |
sudo apt-get install ${{ inputs.packages }} -y
- name: Setup Python venv
shell: bash
run: |
python3 -m venv venv
source venv/bin/activate
python3 -m pip install -r requirements.txt
deactivate
echo "$(pwd)/venv/bin/" >> "$GITHUB_PATH"
46 changes: 9 additions & 37 deletions .github/workflows/test_basic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,12 @@ jobs:
github.event.pull_request.user.login == 'mkannwischer'
}}
runs-on: ubuntu-latest
strategy:
matrix:
strategy:
matrix:
target: [slothy.targets.arm_v7m.cortex_m7,slothy.targets.arm_v81m.cortex_m55r1, slothy.targets.arm_v81m.cortex_m85r1, slothy.targets.aarch64.cortex_a55, slothy.targets.aarch64.cortex_a72_frontend, slothy.targets.aarch64.apple_m1_firestorm_experimental, slothy.targets.aarch64.apple_m1_icestorm_experimental]
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
python3 example.py --dry-run --only-target=${{ matrix.target }}
Expand All @@ -34,11 +30,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run tutorial
run: |
(cd tutorial && ./tutorial_all.sh)
Expand All @@ -51,11 +43,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
python3 example.py --examples simple0,simple1,simple0_loop,simple1_loop
Expand All @@ -68,11 +56,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
python3 example.py --examples ntt_kyber_1_23_45_67_m55,ntt_dilithium_12_34_56_78_m55 --timeout=300
Expand All @@ -85,11 +69,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
python3 example.py --examples ntt_kyber_123_4567_a55,ntt_dilithium_123_45678_a55 --timeout=300
Expand All @@ -102,11 +82,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
(cd paper/scripts && NO_LOG=Y ./slothy_sqmag.sh)
Expand All @@ -119,11 +95,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python dependencies
run: |
python3 -m venv venv
./venv/bin/python3 -m pip install -r requirements.txt
echo BASH_ENV="./venv/bin/activate" >> $GITHUB_ENV
- uses: ./.github/actions/setup-ubuntu
- name: Run examples
run: |
(cd paper/scripts && NO_LOG=Y ./slothy_fft.sh)
34 changes: 29 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,24 @@ def core(self,slothy):
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")

class Armv7mExample0Func(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "armv7m_simple0_func"
infile = name

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, rename=True, arch=arch, target=target)

def core(self,slothy):
slothy.config.variable_size=True
slothy.config.inputs_are_outputs = True
slothy.optimize(start="start", end="end")
slothy.global_selftest("my_func", {"r0": 1024 })

class Armv7mLoopSubs(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_subs"
Expand Down Expand Up @@ -688,7 +706,7 @@ def core(self,slothy):
slothy.config.variable_size=True
slothy.config.outputs = ["r6"]
slothy.optimize_loop("start")

class Armv7mLoopVmovCmp(Example):
def __init__(self, var="", arch=Arch_Armv7M, target=Target_CortexM7):
name = "loop_vmov_cmp"
Expand Down Expand Up @@ -720,12 +738,13 @@ def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):

def core(self,slothy):
slothy.optimize()

class ntt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
name = "ntt_kyber_123_4567"
infile = name

self.var = var
if var != "":
name += f"_{var}"
infile += f"_{var}"
Expand All @@ -744,6 +763,10 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 64
slothy.optimize_loop("layer123_start")
slothy.optimize_loop("layer4567_start")
# Build + emulate entire function to test that behaviour has not changed
if self.var == "":
slothy.global_selftest("ntt_kyber_123_4567",
{"x0": 1024, "x1": 1024, "x3": 1024, "x4": 1024, "x5": 1024})

class intt_kyber_123_4567(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55, timeout=None):
Expand Down Expand Up @@ -1226,7 +1249,7 @@ def core(self, slothy):
slothy.config.constraints.stalls_first_attempt = 110
slothy.optimize_loop("layer123_start")




class ntt_dilithium_123(Example):
Expand Down Expand Up @@ -1349,7 +1372,7 @@ def core(self, slothy):
slothy.optimize_loop("layer5678_start")

slothy.config = conf.copy()

if self.timeout is not None:
slothy.config.timeout = self.timeout // 12

Expand All @@ -1366,7 +1389,7 @@ def core(self, slothy):
slothy.config.split_heuristic_stepsize = 0.1
slothy.config.constraints.stalls_first_attempt = 14
slothy.optimize_loop("layer1234_start")


class ntt_dilithium_1234(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA72):
Expand Down Expand Up @@ -1513,6 +1536,7 @@ def main():

# Armv7m examples
Armv7mExample0(),
Armv7mExample0Func(),

# Loop examples
AArch64LoopSubs(),
Expand Down
25 changes: 20 additions & 5 deletions examples/naive/aarch64/ntt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
/// SOFTWARE.
///

// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// Needed to provide ASM_LOAD directive
#include <hal_env.h>
// #include <hal_envh>

.macro mulmodq dst, src, const, idx0, idx1
sqrdmulh t2.8h, \src\().8h, \const\().h[\idx1]
Expand Down Expand Up @@ -154,7 +159,12 @@
.data
.p2align 4
roots:
#include "ntt_kyber_123_45_67_twiddles.s"
// Commented out for simple standalone emulation not
// requiring correct constant data
//
// Should be commented when used.
//
// #include "ntt_kyber_123_45_67_twiddles.s"

in .req x0
inp .req x1
Expand Down Expand Up @@ -223,9 +233,14 @@ ntt_kyber_123_4567:
_ntt_kyber_123_4567:
push_stack

ASM_LOAD(r_ptr0, roots)
ASM_LOAD(r_ptr1, roots_l56)
ASM_LOAD(xtmp, const_addr)
// Commented out for simple standalone emulation not
// requiring correct constant data.
//
// Should be commented when used.
//
// ASM_LOAD(r_ptr0, roots)
// ASM_LOAD(r_ptr1, roots_l56)
// ASM_LOAD(xtmp, const_addr)

ld1 {consts.8h}, [xtmp]

Expand Down
20 changes: 20 additions & 0 deletions examples/naive/armv7m/armv7m_simple0_func.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.syntax unified
//.cpu cortex-m4 // llvm-mc does not like this...
//.thumb // unicorn seems to get confused by this...

.align 2
.global my_func
// .type my_func, %function // llvm-mc does not like this...
my_func:
push {r4-r11, lr}

start:
ldr r8, [r0, #4]
add r8, r2, r8
eor.w r8, r8, r3
smlabt r3, r2, r2, r8
asrs r3, r3, #1
str r3, [r0, #4]
end:

pop {r4-r11, pc}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy==1.26.4
ortools==9.7.2996
pandas==2.1.1
sympy==1.12
unicorn==2.1.1
Loading

0 comments on commit c31b6b3

Please sign in to comment.