Skip to content

Commit

Permalink
Add Kyber NTT 123-4567 variant with lazy transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Mar 20, 2024
1 parent 2ce4fa5 commit bef1def
Show file tree
Hide file tree
Showing 6 changed files with 1,048 additions and 15 deletions.
31 changes: 26 additions & 5 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,14 +590,31 @@ def core(self, slothy):
slothy.config.sw_pipelining.enabled = True
slothy.config.inputs_are_outputs = True
slothy.config.sw_pipelining.minimize_overlapping = False
slothy.config.sw_pipelining.optimize_preamble = False
slothy.config.sw_pipelining.allow_post = True
slothy.config.variable_size = True
slothy.config.reserved_regs = [
f"x{i}" for i in range(0, 7)] + ["x30", "sp"]
slothy.config.reserved_regs += self.target_reserved
slothy.config.constraints.stalls_first_attempt = 64
slothy.config.constraints.stalls_first_attempt = 32
slothy.config.inputs_are_outputs = True
slothy.optimize_loop("layer123_start")
slothy.optimize_loop("layer4567_start")

slothy.config.outputs = slothy.last_result.kernel_input_output + [f"x{i}" for i in range(0,6)]
slothy.config.locked_registers = [f"x{i}" for i in range(0,6)]
slothy.config.sw_pipelining.enabled = False
slothy.config.inputs_are_outputs = False
slothy.optimize(start="ntt_kyber_123_4567_preamble", end="layer123_start")

slothy.config.outputs = []
slothy.config.sw_pipelining.enabled = True
slothy.config.inputs_are_outputs = True
slothy.config.sw_pipelining.optimize_preamble = True
slothy.config.sw_pipelining.optimize_postamble = True
slothy.optimize_loop("layer4567_start", postamble_label="ntt_kyber_123_4567_postamble")

slothy.config.outputs = [f"v{i}" for i in range(8,16)]
slothy.config.locked_registers = [f"x{i}" for i in range(0,6)]
slothy.config.sw_pipelining.enabled = False
slothy.config.inputs_are_outputs = False
slothy.optimize(start="ntt_kyber_123_4567_postamble", end="ntt_kyber_123_4567_end")

class ntt_kyber_123(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
Expand Down Expand Up @@ -1286,20 +1303,23 @@ def main():
ntt_kyber_12_345_67(True, target=Target_CortexM85r1),
# Cortex-A55
ntt_kyber_123_4567(),
ntt_kyber_123_4567(var="lazy_trn"),
ntt_kyber_123_4567(var="scalar_load"),
ntt_kyber_123_4567(var="scalar_store"),
ntt_kyber_123_4567(var="scalar_load_store"),
ntt_kyber_123_4567(var="manual_st4"),
ntt_kyber_1234_567(),
# Cortex-A72
ntt_kyber_123_4567(target=Target_CortexA72),
ntt_kyber_123_4567(var="lazy_trn", target=Target_CortexA72),
ntt_kyber_123_4567(var="scalar_load", target=Target_CortexA72),
ntt_kyber_123_4567(var="scalar_store", target=Target_CortexA72),
ntt_kyber_123_4567(var="scalar_load_store", target=Target_CortexA72),
ntt_kyber_123_4567(var="manual_st4", target=Target_CortexA72),
ntt_kyber_1234_567(target=Target_CortexA72),
# # Apple M1 Firestorm
ntt_kyber_123_4567(target=Target_AppleM1_firestorm, timeout=3600),
ntt_kyber_123_4567(var="lazy_trn", target=Target_AppleM1_firestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_load", target=Target_AppleM1_firestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_store", target=Target_AppleM1_firestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_load_store", target=Target_AppleM1_firestorm, timeout=3600),
Expand All @@ -1308,6 +1328,7 @@ def main():
ntt_kyber_1234_567(var="manual_st4", target=Target_AppleM1_firestorm, timeout=300),
# Apple M1 Icestorm
ntt_kyber_123_4567(target=Target_AppleM1_icestorm, timeout=3600),
ntt_kyber_123_4567(var="lazy_trn", target=Target_AppleM1_icestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_load", target=Target_AppleM1_icestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_store", target=Target_AppleM1_icestorm, timeout=3600),
ntt_kyber_123_4567(var="scalar_load_store", target=Target_AppleM1_icestorm, timeout=3600),
Expand Down
Loading

0 comments on commit bef1def

Please sign in to comment.