Skip to content

Commit

Permalink
allow src=dst in mulmod macro
Browse files Browse the repository at this point in the history
  • Loading branch information
dop-amin committed Apr 7, 2024
1 parent ee53b4b commit 8cb796e
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 116 deletions.
54 changes: 24 additions & 30 deletions examples/naive/aarch64/intt_dilithium_1234_5678.s
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmls \dst, \src, modulus
vmls \dst, t2, modulus
.endm

.macro mulmod dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmls \dst, \src, modulus
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vmls \dst, t2, modulus
.endm

.macro montg_reduce a
Expand All @@ -114,12 +114,6 @@
mulmodq \b, tmp, \root, \idx0, \idx1
.endm

.macro mulmod_v dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmls \dst, \src, modulus
.endm

.macro gs_butterfly_v a, b, root, root_twisted
vsub tmp, \a, \b
vadd \a, \a, \b
Expand Down Expand Up @@ -486,25 +480,25 @@ layer1234_start:
str_vo data14, in, (14*(512/8))
str_vo data15, in, (15*(512/8))

mul_ninv data8, data9, data10, data11, data12, data13, data14, data15, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data8, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data9, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data10, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data11, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data12, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data13, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data14, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data15, modulus_half, neg_modulus_half, t2, t3

str_vi data8, in, (16)
str_vo data9, in, (-16 + 1*(512/8))
str_vo data10, in, (-16 + 2*(512/8))
str_vo data11, in, (-16 + 3*(512/8))
str_vo data12, in, (-16 + 4*(512/8))
str_vo data13, in, (-16 + 5*(512/8))
str_vo data14, in, (-16 + 6*(512/8))
str_vo data15, in, (-16 + 7*(512/8))
mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data1, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data2, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data3, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data4, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data5, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data6, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data7, modulus_half, neg_modulus_half, t2, t3

str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(512/8))
str_vo data2, in, (-16 + 2*(512/8))
str_vo data3, in, (-16 + 3*(512/8))
str_vo data4, in, (-16 + 4*(512/8))
str_vo data5, in, (-16 + 5*(512/8))
str_vo data6, in, (-16 + 6*(512/8))
str_vo data7, in, (-16 + 7*(512/8))

// layer1234_end:
subs count, count, #1
Expand Down
54 changes: 24 additions & 30 deletions examples/naive/aarch64/intt_dilithium_1234_5678_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmls \dst, \src, modulus
vmls \dst, t2, modulus
.endm

.macro mulmod dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmls \dst, \src, modulus
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vmls \dst, t2, modulus
.endm

.macro montg_reduce a
Expand All @@ -114,12 +114,6 @@
mulmodq \b, tmp, \root, \idx0, \idx1
.endm

.macro mulmod_v dst, src, const, const_twisted
vmul \dst, \src, \const
vqrdmulh \src, \src, \const_twisted
vmls \dst, \src, modulus
.endm

.macro gs_butterfly_v a, b, root, root_twisted
vsub tmp, \a, \b
vadd \a, \a, \b
Expand Down Expand Up @@ -484,25 +478,25 @@ layer1234_start:
str_vo data14, in, (14*(512/8))
str_vo data15, in, (15*(512/8))

mul_ninv data8, data9, data10, data11, data12, data13, data14, data15, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data8, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data9, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data10, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data11, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data12, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data13, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data14, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data15, modulus_half, neg_modulus_half, t2, t3

str_vi data8, in, (16)
str_vo data9, in, (-16 + 1*(512/8))
str_vo data10, in, (-16 + 2*(512/8))
str_vo data11, in, (-16 + 3*(512/8))
str_vo data12, in, (-16 + 4*(512/8))
str_vo data13, in, (-16 + 5*(512/8))
str_vo data14, in, (-16 + 6*(512/8))
str_vo data15, in, (-16 + 7*(512/8))
mul_ninv data0, data1, data2, data3, data4, data5, data6, data7, data0, data1, data2, data3, data4, data5, data6, data7

canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data1, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data2, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data3, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data4, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data5, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data6, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data7, modulus_half, neg_modulus_half, t2, t3

str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(512/8))
str_vo data2, in, (-16 + 2*(512/8))
str_vo data3, in, (-16 + 3*(512/8))
str_vo data4, in, (-16 + 4*(512/8))
str_vo data5, in, (-16 + 5*(512/8))
str_vo data6, in, (-16 + 6*(512/8))
str_vo data7, in, (-16 + 7*(512/8))

// layer1234_end:
subs count, count, #1
Expand Down
26 changes: 13 additions & 13 deletions examples/naive/aarch64/intt_dilithium_123_45678.s
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro montg_reduce a
Expand Down Expand Up @@ -514,17 +514,17 @@ layer123_start:
str_vo data6, in, (6*(1024/8))
str_vo data7, in, (7*(1024/8))

mul_ninv data4, data5, data6, data7, data0, data1, data2, data3
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

canonical_reduce data4, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data5, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data6, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data7, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data1, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data2, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data3, modulus_half, neg_modulus_half, t2, t3

str_vi data4, in, (16)
str_vo data5, in, (-16 + 1*(1024/8))
str_vo data6, in, (-16 + 2*(1024/8))
str_vo data7, in, (-16 + 3*(1024/8))
str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(1024/8))
str_vo data2, in, (-16 + 2*(1024/8))
str_vo data3, in, (-16 + 3*(1024/8))

subs count, count, #1
cbnz count, layer123_start
Expand Down
26 changes: 13 additions & 13 deletions examples/naive/aarch64/intt_dilithium_123_45678_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ xtmp1 .req x11
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().4s, \src\().4s, \const\().4s
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro montg_reduce a
Expand Down Expand Up @@ -523,17 +523,17 @@ layer123_start:
str_vo data6, in, (6*(1024/8))
str_vo data7, in, (7*(1024/8))

mul_ninv data4, data5, data6, data7, data0, data1, data2, data3
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

canonical_reduce data4, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data5, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data6, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data7, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data0, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data1, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data2, modulus_half, neg_modulus_half, t2, t3
canonical_reduce data3, modulus_half, neg_modulus_half, t2, t3

str_vi data4, in, (16)
str_vo data5, in, (-16 + 1*(1024/8))
str_vo data6, in, (-16 + 2*(1024/8))
str_vo data7, in, (-16 + 3*(1024/8))
str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(1024/8))
str_vo data2, in, (-16 + 2*(1024/8))
str_vo data3, in, (-16 + 3*(1024/8))

subs count, count, #1
cbnz count, layer123_start
Expand Down
24 changes: 9 additions & 15 deletions examples/naive/aarch64/intt_kyber_123_4567.s
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro gs_butterfly a, b, root, idx0, idx1
Expand All @@ -84,12 +84,6 @@
mulmodq \b, tmp, \root, \idx0, \idx1
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro gs_butterfly_v a, b, root, root_twisted
sub tmp.8h, \a\().8h, \b\().8h
add \a\().8h, \a\().8h, \b\().8h
Expand Down Expand Up @@ -443,12 +437,12 @@ layer123_start:
str_vo data6, in, (6*(512/8))
str_vo data7, in, (7*(512/8))

mul_ninv data4, data5, data6, data7, data0, data1, data2, data3
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

str_vi data4, in, (16)
str_vo data5, in, (-16 + 1*(512/8))
str_vo data6, in, (-16 + 2*(512/8))
str_vo data7, in, (-16 + 3*(512/8))
str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(512/8))
str_vo data2, in, (-16 + 2*(512/8))
str_vo data3, in, (-16 + 3*(512/8))


subs count, count, #1
Expand Down
24 changes: 9 additions & 15 deletions examples/naive/aarch64/intt_kyber_123_4567_manual_ld4.s
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@
.endm

.macro mulmodq dst, src, const, idx0, idx1
vqrdmulhq t2, \src, \const, \idx1
vmulq \dst, \src, \const, \idx0
vqrdmulhq \src, \src, \const, \idx1
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro mulmod dst, src, const, const_twisted
vqrdmulh t2, \src, \const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
vmlsq \dst, t2, consts, 0
.endm

.macro gs_butterfly a, b, root, idx0, idx1
Expand All @@ -84,12 +84,6 @@
mulmodq \b, tmp, \root, \idx0, \idx1
.endm

.macro mulmod_v dst, src, const, const_twisted
mul \dst\().8h, \src\().8h, \const\().8h
vqrdmulh \src, \src, \const_twisted
vmlsq \dst, \src, consts, 0
.endm

.macro gs_butterfly_v a, b, root, root_twisted
sub tmp.8h, \a\().8h, \b\().8h
add \a\().8h, \a\().8h, \b\().8h
Expand Down Expand Up @@ -438,12 +432,12 @@ layer123_start:
str_vo data6, in, (6*(512/8))
str_vo data7, in, (7*(512/8))

mul_ninv data4, data5, data6, data7, data0, data1, data2, data3
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3

str_vi data4, in, (16)
str_vo data5, in, (-16 + 1*(512/8))
str_vo data6, in, (-16 + 2*(512/8))
str_vo data7, in, (-16 + 3*(512/8))
str_vi data0, in, (16)
str_vo data1, in, (-16 + 1*(512/8))
str_vo data2, in, (-16 + 2*(512/8))
str_vo data3, in, (-16 + 3*(512/8))


subs count, count, #1
Expand Down

0 comments on commit 8cb796e

Please sign in to comment.