From 4af8c4bbf368dc1a9304e162ca3a483d1921cf65 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 10 Sep 2024 13:16:14 +0200 Subject: [PATCH 1/2] Revert "PQ: fix timing sidechannels and add IPDWing" For TLS, early adopters prefer X25519MLKEM768. Remove IPDWing in preparation for adding X25519MLKEM768. https://datatracker.ietf.org/doc/draft-kwiatkowski-tls-ecdhe-mlkem/ This reverts commit 4725a930d5fd1118517cc929fc520b2de3e6eff1. --- boring-sys/patches/boring-pq.patch | 5858 +++++++++++++--------------- boring/src/lib.rs | 4 - boring/src/ssl/mod.rs | 5 - 3 files changed, 2668 insertions(+), 3199 deletions(-) diff --git a/boring-sys/patches/boring-pq.patch b/boring-sys/patches/boring-pq.patch index d4294dc5..2ffeee6c 100644 --- a/boring-sys/patches/boring-pq.patch +++ b/boring-sys/patches/boring-pq.patch @@ -1,4 +1,4 @@ -From 836d390deaf8b50fed0cafd55b17a63e80454d7f Mon Sep 17 00:00:00 2001 +From 4cba2164726c8d2647e38548a266a70c4942d567 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Fri, 22 Jul 2022 16:43:48 +0200 Subject: [PATCH] Add temporary post-quantum key agreements @@ -20,55 +20,43 @@ This patch adds: key agreement should only be used for testing: to see if the smaller keyshare makes a difference. -4. Supportfor IPDWing under codepoint 0xfe41. This key agreement - is a preliminary version of X-Wing using the initial public draft - of ML-KEM. It should not be used. - The patch also replaces Google's implementation of Kyber, by the portable reference implementation, so as to support Kyber512. Cf RTG-2076 RTG-2051 RTG-2508 RTG-2707 RTG-2607 --- - BUILD.generated.bzl | 7 +- + BUILD.generated.bzl | 5 +- BUILD.generated_tests.bzl | 4 - - CMakeLists.txt | 6 +- - sources.json | 11 +- - src/crypto/CMakeLists.txt | 7 +- - src/crypto/kyber/fips202.c | 699 +++++++ - src/crypto/kyber/fips202.h | 29 + + CMakeLists.txt | 4 +- + sources.json | 9 +- + src/crypto/CMakeLists.txt | 5 +- src/crypto/kyber/internal.h | 91 - - src/crypto/kyber/ipdwing.c | 110 ++ src/crypto/kyber/keccak.c | 204 -- - src/crypto/kyber/kyber.c | 2319 +++++++++++++++------- - src/crypto/kyber/kyber.h | 29 + + src/crypto/kyber/kyber.c | 2865 ++++++++++++++++++++------- src/crypto/kyber/kyber512.c | 5 + src/crypto/kyber/kyber768.c | 4 + src/crypto/kyber/kyber_test.cc | 229 --- - src/crypto/obj/obj_dat.h | 17 +- - src/crypto/obj/obj_mac.num | 4 + - src/crypto/obj/objects.txt | 6 +- - src/include/openssl/kyber.h | 252 ++- - src/include/openssl/nid.h | 12 + - src/include/openssl/ssl.h | 4 + + src/crypto/obj/obj_dat.h | 14 +- + src/crypto/obj/obj_mac.num | 3 + + src/crypto/obj/objects.txt | 5 +- + src/include/openssl/kyber.h | 199 +- + src/include/openssl/nid.h | 9 + + src/include/openssl/ssl.h | 3 + src/sources.cmake | 2 - - src/ssl/extensions.cc | 4 + - src/ssl/ssl_key_share.cc | 493 ++++- + src/ssl/extensions.cc | 3 + + src/ssl/ssl_key_share.cc | 412 +++- src/ssl/ssl_lib.cc | 2 +- - src/ssl/ssl_test.cc | 29 +- + src/ssl/ssl_test.cc | 25 +- src/tool/speed.cc | 162 +- - 30 files changed, 3276 insertions(+), 5445 deletions(-) - create mode 100644 src/crypto/kyber/fips202.c - create mode 100644 src/crypto/kyber/fips202.h + 26 files changed, 2797 insertions(+), 5447 deletions(-) delete mode 100644 src/crypto/kyber/internal.h - create mode 100644 src/crypto/kyber/ipdwing.c delete mode 100644 src/crypto/kyber/keccak.c - create mode 100644 src/crypto/kyber/kyber.h create mode 100644 src/crypto/kyber/kyber512.c create mode 100644 src/crypto/kyber/kyber768.c delete mode 100644 src/crypto/kyber/kyber_test.cc diff --git a/BUILD.generated.bzl b/BUILD.generated.bzl -index 738e1055f..d1d232399 100644 +index 738e1055f..9466757a2 100644 --- a/BUILD.generated.bzl +++ b/BUILD.generated.bzl @@ -253,7 +253,6 @@ crypto_internal_headers = [ @@ -79,16 +67,14 @@ index 738e1055f..d1d232399 100644 "src/crypto/lhash/internal.h", "src/crypto/obj/obj_dat.h", "src/crypto/pkcs7/internal.h", -@@ -382,8 +381,10 @@ crypto_sources = [ +@@ -382,8 +381,8 @@ crypto_sources = [ "src/crypto/fipsmodule/fips_shared_support.c", "src/crypto/hpke/hpke.c", "src/crypto/hrss/hrss.c", - "src/crypto/kyber/keccak.c", - "src/crypto/kyber/kyber.c", -+ "src/crypto/kyber/fips202.c", + "src/crypto/kyber/kyber512.c", + "src/crypto/kyber/kyber768.c", -+ "src/crypto/kyber/ipdwing.c", "src/crypto/lhash/lhash.c", "src/crypto/mem.c", "src/crypto/obj/obj.c", @@ -122,40 +108,36 @@ index 92dec1e01..8f70dedc0 100644 "src/crypto/pkcs8/test/no_encryption.p12", "src/crypto/pkcs8/test/nss.p12", diff --git a/CMakeLists.txt b/CMakeLists.txt -index faed2befa..678a0167a 100644 +index faed2befa..931c0e3a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -375,8 +375,10 @@ add_library( +@@ -375,8 +375,8 @@ add_library( src/crypto/fipsmodule/fips_shared_support.c src/crypto/hpke/hpke.c src/crypto/hrss/hrss.c - src/crypto/kyber/keccak.c - src/crypto/kyber/kyber.c -+ src/crypto/kyber/fips202.c + src/crypto/kyber/kyber512.c + src/crypto/kyber/kyber768.c -+ src/crypto/kyber/ipdwing.c src/crypto/lhash/lhash.c src/crypto/mem.c src/crypto/obj/obj.c diff --git a/sources.json b/sources.json -index 4c0048e1d..d021a14b1 100644 +index 4c0048e1d..f6ea5c40f 100644 --- a/sources.json +++ b/sources.json -@@ -111,8 +111,10 @@ +@@ -111,8 +111,8 @@ "src/crypto/fipsmodule/fips_shared_support.c", "src/crypto/hpke/hpke.c", "src/crypto/hrss/hrss.c", - "src/crypto/kyber/keccak.c", - "src/crypto/kyber/kyber.c", -+ "src/crypto/kyber/fips202.c", + "src/crypto/kyber/kyber512.c", + "src/crypto/kyber/kyber768.c", -+ "src/crypto/kyber/ipdwing.c", "src/crypto/lhash/lhash.c", "src/crypto/mem.c", "src/crypto/obj/obj.c", -@@ -549,7 +551,6 @@ +@@ -549,7 +549,6 @@ "src/crypto/hpke/hpke_test.cc", "src/crypto/hrss/hrss_test.cc", "src/crypto/impl_dispatch_test.cc", @@ -163,7 +145,7 @@ index 4c0048e1d..d021a14b1 100644 "src/crypto/lhash/lhash_test.cc", "src/crypto/obj/obj_test.cc", "src/crypto/pem/pem_test.cc", -@@ -634,8 +635,6 @@ +@@ -634,8 +633,6 @@ "src/crypto/fipsmodule/rand/ctrdrbg_vectors.txt", "src/crypto/hmac_extra/hmac_tests.txt", "src/crypto/hpke/hpke_test_vectors.txt", @@ -172,7 +154,7 @@ index 4c0048e1d..d021a14b1 100644 "src/crypto/pkcs8/test/empty_password.p12", "src/crypto/pkcs8/test/no_encryption.p12", "src/crypto/pkcs8/test/nss.p12", -@@ -1060,4 +1059,4 @@ +@@ -1060,4 +1057,4 @@ "urandom_test": [ "src/crypto/fipsmodule/rand/urandom_test.cc" ] @@ -180,23 +162,21 @@ index 4c0048e1d..d021a14b1 100644 \ No newline at end of file +} diff --git a/src/crypto/CMakeLists.txt b/src/crypto/CMakeLists.txt -index cdb5ddca1..9dcb7a566 100644 +index cdb5ddca1..2052fa791 100644 --- a/src/crypto/CMakeLists.txt +++ b/src/crypto/CMakeLists.txt -@@ -170,8 +170,10 @@ add_library( +@@ -170,8 +170,8 @@ add_library( ex_data.c hpke/hpke.c hrss/hrss.c - kyber/keccak.c - kyber/kyber.c -+ kyber/fips202.c + kyber/kyber512.c + kyber/kyber768.c -+ kyber/ipdwing.c lhash/lhash.c mem.c obj/obj.c -@@ -400,7 +402,6 @@ add_executable( +@@ -400,7 +400,6 @@ add_executable( hmac_extra/hmac_test.cc hrss/hrss_test.cc impl_dispatch_test.cc @@ -204,746 +184,6 @@ index cdb5ddca1..9dcb7a566 100644 lhash/lhash_test.cc obj/obj_test.cc pem/pem_test.cc -diff --git a/src/crypto/kyber/fips202.c b/src/crypto/kyber/fips202.c -new file mode 100644 -index 000000000..9713a4f7e ---- /dev/null -+++ b/src/crypto/kyber/fips202.c -@@ -0,0 +1,699 @@ -+/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from -+ * http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202" -+ * implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein, -+ * and Peter Schwabe */ -+ -+#include "fips202.h" -+ -+#define NROUNDS 24 -+#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) -+ -+/************************************************* -+* Name: load64 -+* -+* Description: Load 8 bytes into uint64_t in little-endian order -+* -+* Arguments: - const uint8_t *x: pointer to input byte array -+* -+* Returns the loaded 64-bit unsigned integer -+**************************************************/ -+static uint64_t load64(const uint8_t x[8]) { -+ unsigned int i; -+ uint64_t r = 0; -+ -+ for(i=0;i<8;i++) -+ r |= (uint64_t)x[i] << 8*i; -+ -+ return r; -+} -+ -+/************************************************* -+* Name: store64 -+* -+* Description: Store a 64-bit integer to array of 8 bytes in little-endian order -+* -+* Arguments: - uint8_t *x: pointer to the output byte array (allocated) -+* - uint64_t u: input 64-bit unsigned integer -+**************************************************/ -+static void store64(uint8_t x[8], uint64_t u) { -+ unsigned int i; -+ -+ for(i=0;i<8;i++) -+ x[i] = u >> 8*i; -+} -+ -+/* Keccak round constants */ -+static const uint64_t KeccakF_RoundConstants[NROUNDS] = { -+ (uint64_t)0x0000000000000001ULL, -+ (uint64_t)0x0000000000008082ULL, -+ (uint64_t)0x800000000000808aULL, -+ (uint64_t)0x8000000080008000ULL, -+ (uint64_t)0x000000000000808bULL, -+ (uint64_t)0x0000000080000001ULL, -+ (uint64_t)0x8000000080008081ULL, -+ (uint64_t)0x8000000000008009ULL, -+ (uint64_t)0x000000000000008aULL, -+ (uint64_t)0x0000000000000088ULL, -+ (uint64_t)0x0000000080008009ULL, -+ (uint64_t)0x000000008000000aULL, -+ (uint64_t)0x000000008000808bULL, -+ (uint64_t)0x800000000000008bULL, -+ (uint64_t)0x8000000000008089ULL, -+ (uint64_t)0x8000000000008003ULL, -+ (uint64_t)0x8000000000008002ULL, -+ (uint64_t)0x8000000000000080ULL, -+ (uint64_t)0x000000000000800aULL, -+ (uint64_t)0x800000008000000aULL, -+ (uint64_t)0x8000000080008081ULL, -+ (uint64_t)0x8000000000008080ULL, -+ (uint64_t)0x0000000080000001ULL, -+ (uint64_t)0x8000000080008008ULL -+}; -+ -+/************************************************* -+* Name: KeccakF1600_StatePermute -+* -+* Description: The Keccak F1600 Permutation -+* -+* Arguments: - uint64_t *state: pointer to input/output Keccak state -+**************************************************/ -+static void KeccakF1600_StatePermute(uint64_t state[25]) -+{ -+ int round; -+ -+ uint64_t Aba, Abe, Abi, Abo, Abu; -+ uint64_t Aga, Age, Agi, Ago, Agu; -+ uint64_t Aka, Ake, Aki, Ako, Aku; -+ uint64_t Ama, Ame, Ami, Amo, Amu; -+ uint64_t Asa, Ase, Asi, Aso, Asu; -+ uint64_t BCa, BCe, BCi, BCo, BCu; -+ uint64_t Da, De, Di, Do, Du; -+ uint64_t Eba, Ebe, Ebi, Ebo, Ebu; -+ uint64_t Ega, Ege, Egi, Ego, Egu; -+ uint64_t Eka, Eke, Eki, Eko, Eku; -+ uint64_t Ema, Eme, Emi, Emo, Emu; -+ uint64_t Esa, Ese, Esi, Eso, Esu; -+ -+ //copyFromState(A, state) -+ Aba = state[ 0]; -+ Abe = state[ 1]; -+ Abi = state[ 2]; -+ Abo = state[ 3]; -+ Abu = state[ 4]; -+ Aga = state[ 5]; -+ Age = state[ 6]; -+ Agi = state[ 7]; -+ Ago = state[ 8]; -+ Agu = state[ 9]; -+ Aka = state[10]; -+ Ake = state[11]; -+ Aki = state[12]; -+ Ako = state[13]; -+ Aku = state[14]; -+ Ama = state[15]; -+ Ame = state[16]; -+ Ami = state[17]; -+ Amo = state[18]; -+ Amu = state[19]; -+ Asa = state[20]; -+ Ase = state[21]; -+ Asi = state[22]; -+ Aso = state[23]; -+ Asu = state[24]; -+ -+ for(round = 0; round < NROUNDS; round += 2) { -+ // prepareTheta -+ BCa = Aba^Aga^Aka^Ama^Asa; -+ BCe = Abe^Age^Ake^Ame^Ase; -+ BCi = Abi^Agi^Aki^Ami^Asi; -+ BCo = Abo^Ago^Ako^Amo^Aso; -+ BCu = Abu^Agu^Aku^Amu^Asu; -+ -+ //thetaRhoPiChiIotaPrepareTheta(round, A, E) -+ Da = BCu^ROL(BCe, 1); -+ De = BCa^ROL(BCi, 1); -+ Di = BCe^ROL(BCo, 1); -+ Do = BCi^ROL(BCu, 1); -+ Du = BCo^ROL(BCa, 1); -+ -+ Aba ^= Da; -+ BCa = Aba; -+ Age ^= De; -+ BCe = ROL(Age, 44); -+ Aki ^= Di; -+ BCi = ROL(Aki, 43); -+ Amo ^= Do; -+ BCo = ROL(Amo, 21); -+ Asu ^= Du; -+ BCu = ROL(Asu, 14); -+ Eba = BCa ^((~BCe)& BCi ); -+ Eba ^= (uint64_t)KeccakF_RoundConstants[round]; -+ Ebe = BCe ^((~BCi)& BCo ); -+ Ebi = BCi ^((~BCo)& BCu ); -+ Ebo = BCo ^((~BCu)& BCa ); -+ Ebu = BCu ^((~BCa)& BCe ); -+ -+ Abo ^= Do; -+ BCa = ROL(Abo, 28); -+ Agu ^= Du; -+ BCe = ROL(Agu, 20); -+ Aka ^= Da; -+ BCi = ROL(Aka, 3); -+ Ame ^= De; -+ BCo = ROL(Ame, 45); -+ Asi ^= Di; -+ BCu = ROL(Asi, 61); -+ Ega = BCa ^((~BCe)& BCi ); -+ Ege = BCe ^((~BCi)& BCo ); -+ Egi = BCi ^((~BCo)& BCu ); -+ Ego = BCo ^((~BCu)& BCa ); -+ Egu = BCu ^((~BCa)& BCe ); -+ -+ Abe ^= De; -+ BCa = ROL(Abe, 1); -+ Agi ^= Di; -+ BCe = ROL(Agi, 6); -+ Ako ^= Do; -+ BCi = ROL(Ako, 25); -+ Amu ^= Du; -+ BCo = ROL(Amu, 8); -+ Asa ^= Da; -+ BCu = ROL(Asa, 18); -+ Eka = BCa ^((~BCe)& BCi ); -+ Eke = BCe ^((~BCi)& BCo ); -+ Eki = BCi ^((~BCo)& BCu ); -+ Eko = BCo ^((~BCu)& BCa ); -+ Eku = BCu ^((~BCa)& BCe ); -+ -+ Abu ^= Du; -+ BCa = ROL(Abu, 27); -+ Aga ^= Da; -+ BCe = ROL(Aga, 36); -+ Ake ^= De; -+ BCi = ROL(Ake, 10); -+ Ami ^= Di; -+ BCo = ROL(Ami, 15); -+ Aso ^= Do; -+ BCu = ROL(Aso, 56); -+ Ema = BCa ^((~BCe)& BCi ); -+ Eme = BCe ^((~BCi)& BCo ); -+ Emi = BCi ^((~BCo)& BCu ); -+ Emo = BCo ^((~BCu)& BCa ); -+ Emu = BCu ^((~BCa)& BCe ); -+ -+ Abi ^= Di; -+ BCa = ROL(Abi, 62); -+ Ago ^= Do; -+ BCe = ROL(Ago, 55); -+ Aku ^= Du; -+ BCi = ROL(Aku, 39); -+ Ama ^= Da; -+ BCo = ROL(Ama, 41); -+ Ase ^= De; -+ BCu = ROL(Ase, 2); -+ Esa = BCa ^((~BCe)& BCi ); -+ Ese = BCe ^((~BCi)& BCo ); -+ Esi = BCi ^((~BCo)& BCu ); -+ Eso = BCo ^((~BCu)& BCa ); -+ Esu = BCu ^((~BCa)& BCe ); -+ -+ // prepareTheta -+ BCa = Eba^Ega^Eka^Ema^Esa; -+ BCe = Ebe^Ege^Eke^Eme^Ese; -+ BCi = Ebi^Egi^Eki^Emi^Esi; -+ BCo = Ebo^Ego^Eko^Emo^Eso; -+ BCu = Ebu^Egu^Eku^Emu^Esu; -+ -+ //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) -+ Da = BCu^ROL(BCe, 1); -+ De = BCa^ROL(BCi, 1); -+ Di = BCe^ROL(BCo, 1); -+ Do = BCi^ROL(BCu, 1); -+ Du = BCo^ROL(BCa, 1); -+ -+ Eba ^= Da; -+ BCa = Eba; -+ Ege ^= De; -+ BCe = ROL(Ege, 44); -+ Eki ^= Di; -+ BCi = ROL(Eki, 43); -+ Emo ^= Do; -+ BCo = ROL(Emo, 21); -+ Esu ^= Du; -+ BCu = ROL(Esu, 14); -+ Aba = BCa ^((~BCe)& BCi ); -+ Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; -+ Abe = BCe ^((~BCi)& BCo ); -+ Abi = BCi ^((~BCo)& BCu ); -+ Abo = BCo ^((~BCu)& BCa ); -+ Abu = BCu ^((~BCa)& BCe ); -+ -+ Ebo ^= Do; -+ BCa = ROL(Ebo, 28); -+ Egu ^= Du; -+ BCe = ROL(Egu, 20); -+ Eka ^= Da; -+ BCi = ROL(Eka, 3); -+ Eme ^= De; -+ BCo = ROL(Eme, 45); -+ Esi ^= Di; -+ BCu = ROL(Esi, 61); -+ Aga = BCa ^((~BCe)& BCi ); -+ Age = BCe ^((~BCi)& BCo ); -+ Agi = BCi ^((~BCo)& BCu ); -+ Ago = BCo ^((~BCu)& BCa ); -+ Agu = BCu ^((~BCa)& BCe ); -+ -+ Ebe ^= De; -+ BCa = ROL(Ebe, 1); -+ Egi ^= Di; -+ BCe = ROL(Egi, 6); -+ Eko ^= Do; -+ BCi = ROL(Eko, 25); -+ Emu ^= Du; -+ BCo = ROL(Emu, 8); -+ Esa ^= Da; -+ BCu = ROL(Esa, 18); -+ Aka = BCa ^((~BCe)& BCi ); -+ Ake = BCe ^((~BCi)& BCo ); -+ Aki = BCi ^((~BCo)& BCu ); -+ Ako = BCo ^((~BCu)& BCa ); -+ Aku = BCu ^((~BCa)& BCe ); -+ -+ Ebu ^= Du; -+ BCa = ROL(Ebu, 27); -+ Ega ^= Da; -+ BCe = ROL(Ega, 36); -+ Eke ^= De; -+ BCi = ROL(Eke, 10); -+ Emi ^= Di; -+ BCo = ROL(Emi, 15); -+ Eso ^= Do; -+ BCu = ROL(Eso, 56); -+ Ama = BCa ^((~BCe)& BCi ); -+ Ame = BCe ^((~BCi)& BCo ); -+ Ami = BCi ^((~BCo)& BCu ); -+ Amo = BCo ^((~BCu)& BCa ); -+ Amu = BCu ^((~BCa)& BCe ); -+ -+ Ebi ^= Di; -+ BCa = ROL(Ebi, 62); -+ Ego ^= Do; -+ BCe = ROL(Ego, 55); -+ Eku ^= Du; -+ BCi = ROL(Eku, 39); -+ Ema ^= Da; -+ BCo = ROL(Ema, 41); -+ Ese ^= De; -+ BCu = ROL(Ese, 2); -+ Asa = BCa ^((~BCe)& BCi ); -+ Ase = BCe ^((~BCi)& BCo ); -+ Asi = BCi ^((~BCo)& BCu ); -+ Aso = BCo ^((~BCu)& BCa ); -+ Asu = BCu ^((~BCa)& BCe ); -+ } -+ -+ //copyToState(state, A) -+ state[ 0] = Aba; -+ state[ 1] = Abe; -+ state[ 2] = Abi; -+ state[ 3] = Abo; -+ state[ 4] = Abu; -+ state[ 5] = Aga; -+ state[ 6] = Age; -+ state[ 7] = Agi; -+ state[ 8] = Ago; -+ state[ 9] = Agu; -+ state[10] = Aka; -+ state[11] = Ake; -+ state[12] = Aki; -+ state[13] = Ako; -+ state[14] = Aku; -+ state[15] = Ama; -+ state[16] = Ame; -+ state[17] = Ami; -+ state[18] = Amo; -+ state[19] = Amu; -+ state[20] = Asa; -+ state[21] = Ase; -+ state[22] = Asi; -+ state[23] = Aso; -+ state[24] = Asu; -+} -+ -+ -+/************************************************* -+* Name: keccak_squeeze -+* -+* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes. -+* Modifies the state. Can be called multiple times to keep -+* squeezing, i.e., is incremental. -+* -+* Arguments: - uint8_t *out: pointer to output -+* - size_t outlen: number of bytes to be squeezed (written to out) -+* - uint64_t *s: pointer to input/output Keccak state -+* - unsigned int pos: number of bytes in current block already squeezed -+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -+* -+* Returns new position pos in current block -+**************************************************/ -+static unsigned int keccak_squeeze(uint8_t *out, -+ size_t outlen, -+ uint64_t s[25], -+ unsigned int pos, -+ unsigned int r) -+{ -+ unsigned int i; -+ -+ while(outlen) { -+ if(pos == r) { -+ KeccakF1600_StatePermute(s); -+ pos = 0; -+ } -+ for(i=pos;i < r && i < pos+outlen; i++) -+ *out++ = s[i/8] >> 8*(i%8); -+ outlen -= i-pos; -+ pos = i; -+ } -+ -+ return pos; -+} -+ -+/************************************************* -+* Name: keccak_init -+* -+* Description: Initializes the Keccak state. -+* -+* Arguments: - uint64_t *s: pointer to Keccak state -+**************************************************/ -+static void keccak_init(uint64_t s[25]) -+{ -+ unsigned int i; -+ for(i=0;i<25;i++) -+ s[i] = 0; -+} -+ -+ -+/************************************************* -+* Name: keccak_absorb -+* -+* Description: Absorb step of Keccak; incremental. -+* -+* Arguments: - uint64_t *s: pointer to Keccak state -+* - unsigned int pos: position in current block to be absorbed -+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+* -+* Returns new position pos in current block -+**************************************************/ -+static unsigned int keccak_absorb(uint64_t s[25], -+ unsigned int pos, -+ unsigned int r, -+ const uint8_t *in, -+ size_t inlen) -+{ -+ unsigned int i; -+ -+ while(pos+inlen >= r) { -+ for(i=pos;i= r) { -+ for(i=0;is, SHAKE128_RATE, in, inlen, 0x1F); -+ state->pos = SHAKE128_RATE; -+} -+ -+/************************************************* -+* Name: shake128_squeezeblocks -+* -+* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of -+* SHAKE128_RATE bytes each. Can be called multiple times -+* to keep squeezing. Assumes new block has not yet been -+* started (state->pos = SHAKE128_RATE). -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t nblocks: number of blocks to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) -+{ -+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE); -+} -+ -+/************************************************* -+* Name: shake256_squeeze -+* -+* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many -+* bytes. Can be called multiple times to keep squeezing. -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t outlen : number of bytes to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state) -+{ -+ state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE); -+} -+ -+/************************************************* -+* Name: shake256_init -+* -+* Description: Initilizes Keccak state for use as SHAKE256 XOF -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state -+**************************************************/ -+void shake256_init(keccak_state *state) -+{ -+ keccak_init(state->s); -+ state->pos = 0; -+} -+ -+/************************************************* -+* Name: shake256_absorb -+* -+* Description: Absorb step of the SHAKE256 XOF; incremental. -+* -+* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+**************************************************/ -+void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen) -+{ -+ state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen); -+} -+ -+/************************************************* -+* Name: shake256_finalize -+* -+* Description: Finalize absorb step of the SHAKE256 XOF. -+* -+* Arguments: - keccak_state *state: pointer to Keccak state -+**************************************************/ -+void shake256_finalize(keccak_state *state) -+{ -+ keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F); -+ state->pos = SHAKE256_RATE; -+} -+ -+/************************************************* -+* Name: shake256_absorb_once -+* -+* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental. -+* -+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state -+* - const uint8_t *in: pointer to input to be absorbed into s -+* - size_t inlen: length of input in bytes -+**************************************************/ -+void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) -+{ -+ keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F); -+ state->pos = SHAKE256_RATE; -+} -+ -+/************************************************* -+* Name: shake256_squeezeblocks -+* -+* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of -+* SHAKE256_RATE bytes each. Can be called multiple times -+* to keep squeezing. Assumes next block has not yet been -+* started (state->pos = SHAKE256_RATE). -+* -+* Arguments: - uint8_t *out: pointer to output blocks -+* - size_t nblocks: number of blocks to be squeezed (written to output) -+* - keccak_state *s: pointer to input/output Keccak state -+**************************************************/ -+void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) -+{ -+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE); -+} -+ -+/************************************************* -+* Name: shake256 -+* -+* Description: SHAKE256 XOF with non-incremental API -+* -+* Arguments: - uint8_t *out: pointer to output -+* - size_t outlen: requested output length in bytes -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) -+{ -+ size_t nblocks; -+ keccak_state state; -+ -+ shake256_absorb_once(&state, in, inlen); -+ nblocks = outlen/SHAKE256_RATE; -+ shake256_squeezeblocks(out, nblocks, &state); -+ outlen -= nblocks*SHAKE256_RATE; -+ out += nblocks*SHAKE256_RATE; -+ shake256_squeeze(out, outlen, &state); -+} -+ -+/************************************************* -+* Name: sha3_256 -+* -+* Description: SHA3-256 with non-incremental API -+* -+* Arguments: - uint8_t *h: pointer to output (32 bytes) -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen) -+{ -+ unsigned int i; -+ uint64_t s[25]; -+ -+ keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06); -+ KeccakF1600_StatePermute(s); -+ for(i=0;i<4;i++) -+ store64(h+8*i,s[i]); -+} -+ -+/************************************************* -+* Name: sha3_512 -+* -+* Description: SHA3-512 with non-incremental API -+* -+* Arguments: - uint8_t *h: pointer to output (64 bytes) -+* - const uint8_t *in: pointer to input -+* - size_t inlen: length of input in bytes -+**************************************************/ -+void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen) -+{ -+ unsigned int i; -+ uint64_t s[25]; -+ -+ keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06); -+ KeccakF1600_StatePermute(s); -+ for(i=0;i<8;i++) -+ store64(h+8*i,s[i]); -+} -+ -+ -diff --git a/src/crypto/kyber/fips202.h b/src/crypto/kyber/fips202.h -new file mode 100644 -index 000000000..7c37570bc ---- /dev/null -+++ b/src/crypto/kyber/fips202.h -@@ -0,0 +1,29 @@ -+#ifndef OPENSSL_HEADER_KYBER_FIPS202_H -+#define OPENSSL_HEADER_KYBER_FIPS202_H -+ -+#include -+#include -+ -+#define SHAKE128_RATE 168 -+#define SHAKE256_RATE 136 -+#define SHA3_256_RATE 136 -+#define SHA3_512_RATE 72 -+ -+typedef struct { -+ uint64_t s[25]; -+ unsigned int pos; -+} keccak_state; -+ -+void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); -+void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); -+void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state); -+void shake256_init(keccak_state *state); -+void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen); -+void shake256_finalize(keccak_state *state); -+void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); -+void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); -+void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); -+void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen); -+void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen); -+ -+#endif diff --git a/src/crypto/kyber/internal.h b/src/crypto/kyber/internal.h deleted file mode 100644 index b3bfa86b8..000000000 @@ -1025,138 +265,22 @@ index b3bfa86b8..000000000 - struct KYBER_private_key *out_private_key, - const uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]); - --// KYBER_encap_external_entropy is a deterministic function to encapsulate --// |out_shared_secret_len| bytes of |out_shared_secret| to |ciphertext|, using --// |KYBER_ENCAP_ENTROPY| bytes of |entropy| for randomization. The --// decapsulating side will be able to recover |entropy| in full. This --// function is should only be used for tests, regular callers should use the --// non-deterministic |KYBER_encap| directly. --OPENSSL_EXPORT void KYBER_encap_external_entropy( -- uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t *out_shared_secret, -- size_t out_shared_secret_len, const struct KYBER_public_key *public_key, -- const uint8_t entropy[KYBER_ENCAP_ENTROPY]); -- --#if defined(__cplusplus) --} --#endif -- --#endif // OPENSSL_HEADER_CRYPTO_KYBER_INTERNAL_H -diff --git a/src/crypto/kyber/ipdwing.c b/src/crypto/kyber/ipdwing.c -new file mode 100644 -index 000000000..d55cfefc9 ---- /dev/null -+++ b/src/crypto/kyber/ipdwing.c -@@ -0,0 +1,110 @@ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+#include "fips202.h" -+#include "kyber.h" -+ -+static const char *label = "\\.//^\\"; -+ -+static void combiner( -+ uint8_t out[32], -+ const uint8_t ss_m[32], -+ const uint8_t ss_x[32], -+ const uint8_t ct_x[32], -+ const uint8_t pk_x[32]) { -+ uint8_t buf[6+32*4]; -+ memcpy(buf, label, 6); -+ memcpy(buf+6, ss_m, 32); -+ memcpy(buf+6+32, ss_x, 32); -+ memcpy(buf+6+32*2, ct_x, 32); -+ memcpy(buf+6+32*3, pk_x, 32); -+ sha3_256(out, buf, 6+32*4); -+} -+ -+void IPDWING_generate_key( -+ struct IPDWING_public_key *out_pub, -+ struct IPDWING_private_key *out_priv, -+ const uint8_t seed[IPDWING_GENERATE_KEY_BYTES]) { -+ KYBER768_generate_key( -+ &out_pub->m, -+ &out_priv->m, -+ seed); -+ memcpy(out_priv->x, seed+64, 32); -+ X25519_public_from_private(out_pub->x, out_priv->x); -+ memcpy(out_priv->xpub, out_pub->x, 32); -+} -+ -+void IPDWING_encap( -+ uint8_t out_ciphertext[IPDWING_CIPHERTEXT_BYTES], -+ uint8_t ss[IPDWING_KEY_BYTES], -+ const struct IPDWING_public_key *in_pub, -+ const uint8_t seed[IPDWING_ENCAP_BYTES]) { -+ -+ uint8_t ss_m[32]; -+ uint8_t ss_x[32]; -+ uint8_t *ct_x = out_ciphertext + KYBER768_CIPHERTEXT_BYTES; -+ const uint8_t *ek_x = seed + 32; -+ X25519_public_from_private(ct_x, ek_x); -+ -+ X25519( -+ ss_x, -+ ek_x, -+ in_pub->x -+ ); -+ -+ KYBER768_encap2( -+ out_ciphertext, -+ ss_m, -+ &in_pub->m, -+ seed, -+ 1 -+ ); -+ -+ combiner(ss, ss_m, ss_x, ct_x, in_pub->x); -+} -+ -+void IPDWING_decap( -+ uint8_t out_shared_key[IPDWING_KEY_BYTES], -+ const struct IPDWING_private_key *in_priv, -+ const uint8_t *ct) { -+ -+ uint8_t ss_m[32]; -+ uint8_t ss_x[32]; -+ const uint8_t *ct_x = ct + KYBER768_CIPHERTEXT_BYTES; -+ -+ KYBER768_decap2( -+ ss_m, -+ &in_priv->m, -+ ct, -+ KYBER768_CIPHERTEXT_BYTES, -+ 1 -+ ); -+ -+ X25519( -+ ss_x, -+ in_priv->x, -+ ct_x -+ ); -+ -+ combiner(out_shared_key, ss_m, ss_x, ct_x, in_priv->xpub); -+} -+ -+void IPDWING_marshal_public_key( -+ uint8_t out[IPDWING_PUBLIC_KEY_BYTES], -+ const struct IPDWING_public_key *in) { -+ KYBER768_marshal_public_key(out, &in->m); -+ memcpy(out + KYBER768_PUBLIC_KEY_BYTES, in->x, 32); -+} -+ -+void IPDWING_parse_public_key( -+ struct IPDWING_public_key *out, -+ const uint8_t in[IPDWING_PUBLIC_KEY_BYTES]) { -+ KYBER768_parse_public_key(&out->m, in); -+ memcpy(out->x, in + KYBER768_PUBLIC_KEY_BYTES, 32); -+} -+ +-// KYBER_encap_external_entropy is a deterministic function to encapsulate +-// |out_shared_secret_len| bytes of |out_shared_secret| to |ciphertext|, using +-// |KYBER_ENCAP_ENTROPY| bytes of |entropy| for randomization. The +-// decapsulating side will be able to recover |entropy| in full. This +-// function is should only be used for tests, regular callers should use the +-// non-deterministic |KYBER_encap| directly. +-OPENSSL_EXPORT void KYBER_encap_external_entropy( +- uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t *out_shared_secret, +- size_t out_shared_secret_len, const struct KYBER_public_key *public_key, +- const uint8_t entropy[KYBER_ENCAP_ENTROPY]); +- +-#if defined(__cplusplus) +-} +-#endif +- +-#endif // OPENSSL_HEADER_CRYPTO_KYBER_INTERNAL_H diff --git a/src/crypto/kyber/keccak.c b/src/crypto/kyber/keccak.c deleted file mode 100644 index f1c012d11..000000000 @@ -1368,10 +492,10 @@ index f1c012d11..000000000 - } -} diff --git a/src/crypto/kyber/kyber.c b/src/crypto/kyber/kyber.c -index 776c085f9..5acd45cd9 100644 +index 776c085f9..346d4daec 100644 --- a/src/crypto/kyber/kyber.c +++ b/src/crypto/kyber/kyber.c -@@ -1,833 +1,1706 @@ +@@ -1,833 +1,2252 @@ -/* Copyright (c) 2023, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any @@ -1390,1966 +514,2818 @@ index 776c085f9..5acd45cd9 100644 +// https://github.com/pq-crystals/kyber +// 8e00ec73035147d18b27d06048dff322f8de1f29 +// -+// with some small modifications: ++// with some small modifications: ++// ++// - Merged into one file. ++// - Removed 90s version. ++// - Seeds are passed as paramters. ++// - Changed the API to be more BoringSSL-like ++// ++// TODO ++// ++// - Optimizations ++// ++// The majority of Kyber's time is spent in keccak: generating the matrix ++// A, hashing the public key, et cetera. This can be sped up dramatically ++// by using a multiway keccak implementation such as f1600x4 on AVX2. ++// ++// Also the NTT and other operations can be sped up with SIMD. This is ++// more complex and the gains are more modest. See the avx2 reference ++// implementation or https://github.com/cloudflare/circl/tree/main/pke/kyber ++// ++// - Option to keep A stored in private key. ++ ++#ifndef KYBER_K ++#error "Don't compile this file direcly" ++#endif + + #include ++#include + +-#include +-#include +- +-#include +-#include +- +-#include "../internal.h" +-#include "./internal.h" +- +- +-// See +-// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf +- +-#define DEGREE 256 +-#define RANK 3 +- +-static const size_t kBarrettMultiplier = 5039; +-static const unsigned kBarrettShift = 24; +-static const uint16_t kPrime = 3329; +-static const int kLog2Prime = 12; +-static const uint16_t kHalfPrime = (/*kPrime=*/3329 - 1) / 2; +-static const int kDU = 10; +-static const int kDV = 4; +-// kInverseDegree is 128^-1 mod 3329; 128 because kPrime does not have a 512th +-// root of unity. +-static const uint16_t kInverseDegree = 3303; +-static const size_t kEncodedVectorSize = +- (/*kLog2Prime=*/12 * DEGREE / 8) * RANK; +-static const size_t kCompressedVectorSize = /*kDU=*/10 * RANK * DEGREE / 8; +- +-typedef struct scalar { +- // On every function entry and exit, 0 <= c < kPrime. +- uint16_t c[DEGREE]; +-} scalar; +- +-typedef struct vector { +- scalar v[RANK]; +-} vector; +- +-typedef struct matrix { +- scalar v[RANK][RANK]; +-} matrix; +- +-// This bit of Python will be referenced in some of the following comments: +-// +-// p = 3329 +-// +-// def bitreverse(i): +-// ret = 0 +-// for n in range(7): +-// bit = i & 1 +-// ret <<= 1 +-// ret |= bit +-// i >>= 1 +-// return ret +- +-// kNTTRoots = [pow(17, bitreverse(i), p) for i in range(128)] +-static const uint16_t kNTTRoots[128] = { +- 1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, +- 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, +- 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, +- 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, +- 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, +- 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, +- 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, +- 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, +- 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, +- 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, +- 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154, +-}; ++#include ++#include ++#include + +-// kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] +-static const uint16_t kInverseNTTRoots[128] = { +- 1, 1600, 40, 749, 2481, 1432, 2699, 687, 1583, 2760, 69, 543, +- 2532, 3136, 1410, 2267, 2508, 1355, 450, 936, 447, 2794, 1235, 1903, +- 1996, 1089, 3273, 283, 1853, 1990, 882, 3033, 2419, 2102, 219, 855, +- 2681, 1848, 712, 682, 927, 1795, 461, 1891, 2877, 2522, 1894, 1010, +- 1414, 2009, 3296, 464, 2697, 816, 1352, 2679, 1274, 1052, 1025, 2132, +- 1573, 76, 2998, 3040, 1175, 2444, 394, 1219, 2300, 1455, 2117, 1607, +- 2443, 554, 1179, 2186, 2303, 2926, 2237, 525, 735, 863, 2768, 1230, +- 2572, 556, 3010, 2266, 1684, 1239, 780, 2954, 109, 1292, 1031, 1745, +- 2688, 3061, 992, 2596, 941, 892, 1021, 2390, 642, 1868, 2377, 1482, +- 1540, 540, 1678, 1626, 279, 314, 1173, 2573, 3096, 48, 667, 1920, +- 2229, 1041, 2606, 1692, 680, 2746, 568, 3312, +-}; ++#if (KYBER_K == 2) ++#define KYBER_NAMESPACE(s) KYBER512_##s ++#elif (KYBER_K == 3) ++#define KYBER_NAMESPACE(s) KYBER768_##s ++#elif (KYBER_K == 4) ++#define KYBER_NAMESPACE(s) KYBER1024_##s ++#else ++#error "KYBER_K must be in {2,3,4}" ++#endif + +-// kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] +-static const uint16_t kModRoots[128] = { +- 17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, +- 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, +- 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, +- 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, +- 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, +- 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, +- 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, +- 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, +- 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, +- 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, +- 2110, 1219, 2935, 394, 885, 2444, 2154, 1175, +-}; ++#define public_key KYBER_NAMESPACE(public_key) ++#define private_key KYBER_NAMESPACE(private_key) + +-// reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. +-static uint16_t reduce_once(uint16_t x) { +- assert(x < 2 * kPrime); +- const uint16_t subtracted = x - kPrime; +- uint16_t mask = 0u - (subtracted >> 15); +- // On Aarch64, omitting a |value_barrier_u16| results in a 2x speedup of Kyber +- // overall and Clang still produces constant-time code using `csel`. On other +- // platforms & compilers on godbolt that we care about, this code also +- // produces constant-time output. +- return (mask & x) | (~mask & subtracted); +-} +- +-// constant time reduce x mod kPrime using Barrett reduction. x must be less +-// than kPrime + 2×kPrime². +-static uint16_t reduce(uint32_t x) { +- assert(x < kPrime + 2u * kPrime * kPrime); +- uint64_t product = (uint64_t)x * kBarrettMultiplier; +- uint32_t quotient = product >> kBarrettShift; +- uint32_t remainder = x - quotient * kPrime; +- return reduce_once(remainder); +-} +- +-static void scalar_zero(scalar *out) { OPENSSL_memset(out, 0, sizeof(*out)); } +- +-static void vector_zero(vector *out) { OPENSSL_memset(out, 0, sizeof(*out)); } +- +-// In place number theoretic transform of a given scalar. +-// Note that Kyber's kPrime 3329 does not have a 512th root of unity, so this +-// transform leaves off the last iteration of the usual FFT code, with the 128 +-// relevant roots of unity being stored in |kNTTRoots|. This means the output +-// should be seen as 128 elements in GF(3329^2), with the coefficients of the +-// elements being consecutive entries in |s->c|. +-static void scalar_ntt(scalar *s) { +- int offset = DEGREE; +- // `int` is used here because using `size_t` throughout caused a ~5% slowdown +- // with Clang 14 on Aarch64. +- for (int step = 1; step < DEGREE / 2; step <<= 1) { +- offset >>= 1; +- int k = 0; +- for (int i = 0; i < step; i++) { +- const uint32_t step_root = kNTTRoots[i + step]; +- for (int j = k; j < k + offset; j++) { +- uint16_t odd = reduce(step_root * s->c[j + offset]); +- uint16_t even = s->c[j]; +- s->c[j] = reduce_once(odd + even); +- s->c[j + offset] = reduce_once(even - odd + kPrime); +- } +- k += 2 * offset; ++#define generate_key KYBER_NAMESPACE(generate_key) ++#define encap KYBER_NAMESPACE(encap) ++#define decap KYBER_NAMESPACE(decap) ++#define marshal_public_key KYBER_NAMESPACE(marshal_public_key) ++#define parse_public_key KYBER_NAMESPACE(parse_public_key) ++ ++ ++// ++// params.h ++// ++#define KYBER_N 256 ++#define KYBER_Q 3329 ++ ++#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ ++#define KYBER_SSBYTES 32 /* size in bytes of shared key */ ++ ++#define KYBER_POLYBYTES 384 ++#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) ++ ++#if KYBER_K == 2 ++#define KYBER_ETA1 3 ++#define KYBER_POLYCOMPRESSEDBYTES 128 ++#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) ++#elif KYBER_K == 3 ++#define KYBER_ETA1 2 ++#define KYBER_POLYCOMPRESSEDBYTES 128 ++#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) ++#elif KYBER_K == 4 ++#define KYBER_ETA1 2 ++#define KYBER_POLYCOMPRESSEDBYTES 160 ++#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) ++#endif ++ ++#define KYBER_ETA2 2 ++ ++#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) ++#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) ++#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) ++#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) ++ ++#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) ++/* 32 bytes of additional space to save H(pk) */ ++#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) ++#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) ++ ++// ++// verify.h ++// ++static int verify(const uint8_t *a, const uint8_t *b, size_t len); ++static void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); ++ ++// ++// reduce.h +// -+// - Merged into one file. -+// - Removed 90s version. -+// - Seeds are passed as paramters. -+// - Changed the API to be more BoringSSL-like ++#define MONT -1044 // 2^16 mod q ++#define QINV -3327 // q^-1 mod 2^16 ++ ++static int16_t montgomery_reduce(int32_t a); ++static int16_t barrett_reduce(int16_t a); ++ +// -+// TODO ++// ntt.h +// -+// - Optimizations ++static void ntt(int16_t poly[256]); ++static void invntt(int16_t poly[256]); ++static void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); ++ +// -+// The majority of Kyber's time is spent in keccak: generating the matrix -+// A, hashing the public key, et cetera. This can be sped up dramatically -+// by using a multiway keccak implementation such as f1600x4 on AVX2. ++// poly.h +// -+// Also the NTT and other operations can be sped up with SIMD. This is -+// more complex and the gains are more modest. See the avx2 reference -+// implementation or https://github.com/cloudflare/circl/tree/main/pke/kyber ++ ++/* ++ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial ++ * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] ++ */ ++typedef struct{ ++ int16_t coeffs[KYBER_N]; ++} poly; ++ ++static void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); ++static void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); ++ ++static void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); ++static void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); ++ ++static void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); ++static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); ++ ++static void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); ++static void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); ++ ++static void poly_ntt(poly *r); ++static void poly_invntt_tomont(poly *r); ++static void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); ++static void poly_tomont(poly *r); ++ ++static void poly_reduce(poly *r); ++ ++static void poly_add(poly *r, const poly *a, const poly *b); ++static void poly_sub(poly *r, const poly *a, const poly *b); ++ +// -+// - Option to keep A stored in private key. - --#include -+#ifndef KYBER_K -+#error "Don't compile this file direcly" ++// cbd.h ++// ++static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); ++static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); ++ ++// ++// polyvec.h ++// ++ ++typedef struct{ ++ poly vec[KYBER_K]; ++} polyvec; ++ ++static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); ++static void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); ++ ++static void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); ++static void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); ++ ++static void polyvec_ntt(polyvec *r); ++static void polyvec_invntt_tomont(polyvec *r); ++ ++static void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); ++ ++static void polyvec_reduce(polyvec *r); ++ ++static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); ++ ++// ++// indcpa.h ++// ++ ++static void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); ++static void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], ++ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], ++ const uint8_t seed[KYBER_SYMBYTES]); ++ ++static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], ++ const uint8_t m[KYBER_INDCPA_MSGBYTES], ++ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], ++ const uint8_t coins[KYBER_SYMBYTES]); ++ ++static void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], ++ const uint8_t c[KYBER_INDCPA_BYTES], ++ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); ++ ++// ++// fips202.h ++// ++ ++#define SHAKE128_RATE 168 ++#define SHAKE256_RATE 136 ++#define SHA3_256_RATE 136 ++#define SHA3_512_RATE 72 ++ ++typedef struct { ++ uint64_t s[25]; ++ unsigned int pos; ++} keccak_state; ++ ++static void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); ++static void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); ++ ++static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state); ++static void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); ++static void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); ++ ++static void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); ++static void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen); ++static void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen); ++ ++// ++// symmetric.h ++// ++ ++typedef keccak_state xof_state; ++ ++static void kyber_shake128_absorb(keccak_state *s, ++ const uint8_t seed[KYBER_SYMBYTES], ++ uint8_t x, ++ uint8_t y); ++ ++static void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); ++ ++#define XOF_BLOCKBYTES SHAKE128_RATE ++ ++#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) ++#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) ++#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) ++#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) ++#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) ++#define kdf(OUT, IN, INBYTES) shake256(OUT, KYBER_SSBYTES, IN, INBYTES) ++ ++ ++// ++// verify.c ++// ++ ++/************************************************* ++* Name: verify ++* ++* Description: Compare two arrays for equality in constant time. ++* ++* Arguments: const uint8_t *a: pointer to first byte array ++* const uint8_t *b: pointer to second byte array ++* size_t len: length of the byte arrays ++* ++* Returns 0 if the byte arrays are equal, 1 otherwise ++**************************************************/ ++static int verify(const uint8_t *a, const uint8_t *b, size_t len) ++{ ++ size_t i; ++ uint8_t r = 0; ++ ++ for(i=0;i> 63; ++} ++ ++/************************************************* ++* Name: cmov ++* ++* Description: Copy len bytes from x to r if b is 1; ++* don't modify x if b is 0. Requires b to be in {0,1}; ++* assumes two's complement representation of negative integers. ++* Runs in constant time. ++* ++* Arguments: uint8_t *r: pointer to output byte array ++* const uint8_t *x: pointer to input byte array ++* size_t len: Amount of bytes to be copied ++* uint8_t b: Condition bit; has to be in {0,1} ++**************************************************/ ++static void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) ++{ ++ size_t i; ++ ++ b = -b; ++ for(i=0;i> 16; ++ return t; ++} ++ ++/************************************************* ++* Name: barrett_reduce ++* ++* Description: Barrett reduction; given a 16-bit integer a, computes ++* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} ++* ++* Arguments: - int16_t a: input integer to be reduced ++* ++* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. ++**************************************************/ ++static int16_t barrett_reduce(int16_t a) { ++ int16_t t; ++ const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; ++ ++ t = ((int32_t)v*a + (1<<25)) >> 26; ++ t *= KYBER_Q; ++ return a - t; ++} ++ ++// ++// cbd.c ++// ++ ++/************************************************* ++* Name: load32_littleendian ++* ++* Description: load 4 bytes into a 32-bit integer ++* in little-endian order ++* ++* Arguments: - const uint8_t *x: pointer to input byte array ++* ++* Returns 32-bit unsigned integer loaded from x ++**************************************************/ ++static uint32_t load32_littleendian(const uint8_t x[4]) ++{ ++ uint32_t r; ++ r = (uint32_t)x[0]; ++ r |= (uint32_t)x[1] << 8; ++ r |= (uint32_t)x[2] << 16; ++ r |= (uint32_t)x[3] << 24; ++ return r; ++} ++ ++/************************************************* ++* Name: load24_littleendian ++* ++* Description: load 3 bytes into a 32-bit integer ++* in little-endian order. ++* This function is only needed for Kyber-512 ++* ++* Arguments: - const uint8_t *x: pointer to input byte array ++* ++* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) ++**************************************************/ ++#if KYBER_ETA1 == 3 ++static uint32_t load24_littleendian(const uint8_t x[3]) ++{ ++ uint32_t r; ++ r = (uint32_t)x[0]; ++ r |= (uint32_t)x[1] << 8; ++ r |= (uint32_t)x[2] << 16; ++ return r; ++} +#endif ++ ++ ++/************************************************* ++* Name: cbd2 ++* ++* Description: Given an array of uniformly random bytes, compute ++* polynomial with coefficients distributed according to ++* a centered binomial distribution with parameter eta=2 ++* ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *buf: pointer to input byte array ++**************************************************/ ++static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) ++{ ++ unsigned int i,j; ++ uint32_t t,d; ++ int16_t a,b; ++ ++ for(i=0;i>1) & 0x55555555; ++ ++ for(j=0;j<8;j++) { ++ a = (d >> (4*j+0)) & 0x3; ++ b = (d >> (4*j+2)) & 0x3; ++ r->coeffs[8*i+j] = a - b; + } + } + } --#include --#include -+#include -+#include - --#include --#include -+#include -+#include -+#include - -+#include "fips202.h" -+#include "kyber.h" - #include "../internal.h" --#include "./internal.h" -- -- --// See --// https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf -- --#define DEGREE 256 --#define RANK 3 -- --static const size_t kBarrettMultiplier = 5039; --static const unsigned kBarrettShift = 24; --static const uint16_t kPrime = 3329; --static const int kLog2Prime = 12; --static const uint16_t kHalfPrime = (/*kPrime=*/3329 - 1) / 2; --static const int kDU = 10; --static const int kDV = 4; --// kInverseDegree is 128^-1 mod 3329; 128 because kPrime does not have a 512th --// root of unity. --static const uint16_t kInverseDegree = 3303; --static const size_t kEncodedVectorSize = -- (/*kLog2Prime=*/12 * DEGREE / 8) * RANK; --static const size_t kCompressedVectorSize = /*kDU=*/10 * RANK * DEGREE / 8; -- --typedef struct scalar { -- // On every function entry and exit, 0 <= c < kPrime. -- uint16_t c[DEGREE]; --} scalar; -- --typedef struct vector { -- scalar v[RANK]; --} vector; -- --typedef struct matrix { -- scalar v[RANK][RANK]; --} matrix; -- --// This bit of Python will be referenced in some of the following comments: --// --// p = 3329 --// --// def bitreverse(i): --// ret = 0 --// for n in range(7): --// bit = i & 1 --// ret <<= 1 --// ret |= bit --// i >>= 1 --// return ret -- --// kNTTRoots = [pow(17, bitreverse(i), p) for i in range(128)] --static const uint16_t kNTTRoots[128] = { -- 1, 1729, 2580, 3289, 2642, 630, 1897, 848, 1062, 1919, 193, 797, -- 2786, 3260, 569, 1746, 296, 2447, 1339, 1476, 3046, 56, 2240, 1333, -- 1426, 2094, 535, 2882, 2393, 2879, 1974, 821, 289, 331, 3253, 1756, -- 1197, 2304, 2277, 2055, 650, 1977, 2513, 632, 2865, 33, 1320, 1915, -- 2319, 1435, 807, 452, 1438, 2868, 1534, 2402, 2647, 2617, 1481, 648, -- 2474, 3110, 1227, 910, 17, 2761, 583, 2649, 1637, 723, 2288, 1100, -- 1409, 2662, 3281, 233, 756, 2156, 3015, 3050, 1703, 1651, 2789, 1789, -- 1847, 952, 1461, 2687, 939, 2308, 2437, 2388, 733, 2337, 268, 641, -- 1584, 2298, 2037, 3220, 375, 2549, 2090, 1645, 1063, 319, 2773, 757, -- 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, -- 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154, --}; - --// kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] --static const uint16_t kInverseNTTRoots[128] = { -- 1, 1600, 40, 749, 2481, 1432, 2699, 687, 1583, 2760, 69, 543, -- 2532, 3136, 1410, 2267, 2508, 1355, 450, 936, 447, 2794, 1235, 1903, -- 1996, 1089, 3273, 283, 1853, 1990, 882, 3033, 2419, 2102, 219, 855, -- 2681, 1848, 712, 682, 927, 1795, 461, 1891, 2877, 2522, 1894, 1010, -- 1414, 2009, 3296, 464, 2697, 816, 1352, 2679, 1274, 1052, 1025, 2132, -- 1573, 76, 2998, 3040, 1175, 2444, 394, 1219, 2300, 1455, 2117, 1607, -- 2443, 554, 1179, 2186, 2303, 2926, 2237, 525, 735, 863, 2768, 1230, -- 2572, 556, 3010, 2266, 1684, 1239, 780, 2954, 109, 1292, 1031, 1745, -- 2688, 3061, 992, 2596, 941, 892, 1021, 2390, 642, 1868, 2377, 1482, -- 1540, 540, 1678, 1626, 279, 314, 1173, 2573, 3096, 48, 667, 1920, -- 2229, 1041, 2606, 1692, 680, 2746, 568, 3312, --}; -+#if (KYBER_K == 2) -+#define KYBER_NAMESPACE(s) KYBER512_##s -+#elif (KYBER_K == 3) -+#define KYBER_NAMESPACE(s) KYBER768_##s -+#elif (KYBER_K == 4) -+#define KYBER_NAMESPACE(s) KYBER1024_##s +-static void vector_ntt(vector *a) { +- for (int i = 0; i < RANK; i++) { +- scalar_ntt(&a->v[i]); ++/************************************************* ++* Name: cbd3 ++* ++* Description: Given an array of uniformly random bytes, compute ++* polynomial with coefficients distributed according to ++* a centered binomial distribution with parameter eta=3. ++* This function is only needed for Kyber-512 ++* ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *buf: pointer to input byte array ++**************************************************/ ++#if KYBER_ETA1 == 3 ++static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) ++{ ++ unsigned int i,j; ++ uint32_t t,d; ++ int16_t a,b; ++ ++ for(i=0;i>1) & 0x00249249; ++ d += (t>>2) & 0x00249249; ++ ++ for(j=0;j<4;j++) { ++ a = (d >> (6*j+0)) & 0x7; ++ b = (d >> (6*j+3)) & 0x7; ++ r->coeffs[4*i+j] = a - b; ++ } + } + } ++#endif ++ ++static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) ++{ ++#if KYBER_ETA1 == 2 ++ cbd2(r, buf); ++#elif KYBER_ETA1 == 3 ++ cbd3(r, buf); +#else -+#error "KYBER_K must be in {2,3,4}" ++#error "This implementation requires eta1 in {2,3}" +#endif ++} --// kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] --static const uint16_t kModRoots[128] = { -- 17, 3312, 2761, 568, 583, 2746, 2649, 680, 1637, 1692, 723, 2606, -- 2288, 1041, 1100, 2229, 1409, 1920, 2662, 667, 3281, 48, 233, 3096, -- 756, 2573, 2156, 1173, 3015, 314, 3050, 279, 1703, 1626, 1651, 1678, -- 2789, 540, 1789, 1540, 1847, 1482, 952, 2377, 1461, 1868, 2687, 642, -- 939, 2390, 2308, 1021, 2437, 892, 2388, 941, 733, 2596, 2337, 992, -- 268, 3061, 641, 2688, 1584, 1745, 2298, 1031, 2037, 1292, 3220, 109, -- 375, 2954, 2549, 780, 2090, 1239, 1645, 1684, 1063, 2266, 319, 3010, -- 2773, 556, 757, 2572, 2099, 1230, 561, 2768, 2466, 863, 2594, 735, -- 2804, 525, 1092, 2237, 403, 2926, 1026, 2303, 1143, 2186, 2150, 1179, -- 2775, 554, 886, 2443, 1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300, -- 2110, 1219, 2935, 394, 885, 2444, 2154, 1175, --}; -+#define public_key KYBER_NAMESPACE(public_key) -+#define private_key KYBER_NAMESPACE(private_key) - --// reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. --static uint16_t reduce_once(uint16_t x) { -- assert(x < 2 * kPrime); -- const uint16_t subtracted = x - kPrime; -- uint16_t mask = 0u - (subtracted >> 15); -- // On Aarch64, omitting a |value_barrier_u16| results in a 2x speedup of Kyber -- // overall and Clang still produces constant-time code using `csel`. On other -- // platforms & compilers on godbolt that we care about, this code also -- // produces constant-time output. -- return (mask & x) | (~mask & subtracted); --} -- --// constant time reduce x mod kPrime using Barrett reduction. x must be less --// than kPrime + 2×kPrime². --static uint16_t reduce(uint32_t x) { -- assert(x < kPrime + 2u * kPrime * kPrime); -- uint64_t product = (uint64_t)x * kBarrettMultiplier; -- uint32_t quotient = product >> kBarrettShift; -- uint32_t remainder = x - quotient * kPrime; -- return reduce_once(remainder); --} -- --static void scalar_zero(scalar *out) { OPENSSL_memset(out, 0, sizeof(*out)); } -- --static void vector_zero(vector *out) { OPENSSL_memset(out, 0, sizeof(*out)); } -- --// In place number theoretic transform of a given scalar. --// Note that Kyber's kPrime 3329 does not have a 512th root of unity, so this --// transform leaves off the last iteration of the usual FFT code, with the 128 --// relevant roots of unity being stored in |kNTTRoots|. This means the output --// should be seen as 128 elements in GF(3329^2), with the coefficients of the --// elements being consecutive entries in |s->c|. --static void scalar_ntt(scalar *s) { -- int offset = DEGREE; +-// In place inverse number theoretic transform of a given scalar, with pairs of +-// entries of s->v being interpreted as elements of GF(3329^2). Just as with the +-// number theoretic transform, this leaves off the first step of the normal iFFT +-// to account for the fact that 3329 does not have a 512th root of unity, using +-// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. +-static void scalar_inverse_ntt(scalar *s) { +- int step = DEGREE / 2; - // `int` is used here because using `size_t` throughout caused a ~5% slowdown - // with Clang 14 on Aarch64. -- for (int step = 1; step < DEGREE / 2; step <<= 1) { -- offset >>= 1; +- for (int offset = 2; offset < DEGREE; offset <<= 1) { +- step >>= 1; - int k = 0; - for (int i = 0; i < step; i++) { -- const uint32_t step_root = kNTTRoots[i + step]; +- uint32_t step_root = kInverseNTTRoots[i + step]; - for (int j = k; j < k + offset; j++) { -- uint16_t odd = reduce(step_root * s->c[j + offset]); +- uint16_t odd = s->c[j + offset]; - uint16_t even = s->c[j]; - s->c[j] = reduce_once(odd + even); -- s->c[j + offset] = reduce_once(even - odd + kPrime); -- } -- k += 2 * offset; -+#define generate_key KYBER_NAMESPACE(generate_key) -+#define encap KYBER_NAMESPACE(encap) -+#define decap KYBER_NAMESPACE(decap) -+#define marshal_public_key KYBER_NAMESPACE(marshal_public_key) -+#define parse_public_key KYBER_NAMESPACE(parse_public_key) -+ -+#define decap2 KYBER_NAMESPACE(decap2) -+#define encap2 KYBER_NAMESPACE(encap2) -+ -+ -+// -+// params.h -+// -+#define KYBER_N 256 -+#define KYBER_Q 3329 -+ -+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -+#define KYBER_SSBYTES 32 /* size in bytes of shared key */ -+ -+#define KYBER_POLYBYTES 384 -+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) -+ -+#if KYBER_K == 2 -+#define KYBER_ETA1 3 -+#define KYBER_POLYCOMPRESSEDBYTES 128 -+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -+#elif KYBER_K == 3 -+#define KYBER_ETA1 2 -+#define KYBER_POLYCOMPRESSEDBYTES 128 -+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) -+#elif KYBER_K == 4 -+#define KYBER_ETA1 2 -+#define KYBER_POLYCOMPRESSEDBYTES 160 -+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) +- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); ++static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) ++{ ++#if KYBER_ETA2 == 2 ++ cbd2(r, buf); ++#else ++#error "This implementation requires eta2 = 2" +#endif -+ -+#define KYBER_ETA2 2 -+ -+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) -+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) -+ -+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -+/* 32 bytes of additional space to save H(pk) */ -+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) -+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) -+ -+// -+// verify.h -+// -+static int verify(const uint8_t *a, const uint8_t *b, size_t len); -+static void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); -+ -+// -+// reduce.h -+// -+#define MONT -1044 // 2^16 mod q -+#define QINV -3327 // q^-1 mod 2^16 -+ -+static int16_t montgomery_reduce(int32_t a); -+static int16_t barrett_reduce(int16_t a); -+ -+// -+// ntt.h -+// -+static void ntt(int16_t poly[256]); -+static void invntt(int16_t poly[256]); -+static void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); -+ -+// -+// poly.h -+// -+ -+/* -+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial -+ * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] -+ */ -+typedef struct{ -+ int16_t coeffs[KYBER_N]; -+} poly; -+ -+static void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); -+static void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); -+ -+static void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); -+static void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); -+ -+static void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); -+static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); -+ -+static void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); -+static void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); -+ -+static void poly_ntt(poly *r); -+static void poly_invntt_tomont(poly *r); -+static void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); -+static void poly_tomont(poly *r); -+ -+static void poly_reduce(poly *r); -+ -+static void poly_add(poly *r, const poly *a, const poly *b); -+static void poly_sub(poly *r, const poly *a, const poly *b); ++} + +// -+// cbd.h ++// ntt.c +// -+static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); -+static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); + -+// -+// polyvec.h -+// ++/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + -+typedef struct{ -+ poly vec[KYBER_K]; -+} polyvec; ++#define KYBER_ROOT_OF_UNITY 17 + -+static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); -+static void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); ++static const uint8_t tree[128] = { ++ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, ++ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, ++ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, ++ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, ++ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, ++ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, ++ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, ++ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 ++}; + -+static void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); -+static void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); ++void init_ntt() { ++ unsigned int i; ++ int16_t tmp[128]; + -+static void polyvec_ntt(polyvec *r); -+static void polyvec_invntt_tomont(polyvec *r); ++ tmp[0] = MONT; ++ for(i=1;i<128;i++) ++ tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); + -+static void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); ++ for(i=0;i<128;i++) { ++ zetas[i] = tmp[tree[i]]; ++ if(zetas[i] > KYBER_Q/2) ++ zetas[i] -= KYBER_Q; ++ if(zetas[i] < -KYBER_Q/2) ++ zetas[i] += KYBER_Q; ++ } ++} ++*/ + -+static void polyvec_reduce(polyvec *r); ++static const int16_t zetas[128] = { ++ -1044, -758, -359, -1517, 1493, 1422, 287, 202, ++ -171, 622, 1577, 182, 962, -1202, -1474, 1468, ++ 573, -1325, 264, 383, -829, 1458, -1602, -130, ++ -681, 1017, 732, 608, -1542, 411, -205, -1571, ++ 1223, 652, -552, 1015, -1293, 1491, -282, -1544, ++ 516, -8, -320, -666, -1618, -1162, 126, 1469, ++ -853, -90, -271, 830, 107, -1421, -247, -951, ++ -398, 961, -1508, -725, 448, -1065, 677, -1275, ++ -1103, 430, 555, 843, -1251, 871, 1550, 105, ++ 422, 587, 177, -235, -291, -460, 1574, 1653, ++ -246, 778, 1159, -147, -777, 1483, -602, 1119, ++ -1590, 644, -872, 349, 418, 329, -156, -75, ++ 817, 1097, 603, 610, 1322, -1285, -1465, 384, ++ -1215, -136, 1218, -1335, -874, 220, -1187, -1659, ++ -1185, -1530, -1278, 794, -1510, -854, -870, 478, ++ -108, -308, 996, 991, 958, -1460, 1522, 1628 ++}; + -+static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); ++/************************************************* ++* Name: fqmul ++* ++* Description: Multiplication followed by Montgomery reduction ++* ++* Arguments: - int16_t a: first factor ++* - int16_t b: second factor ++* ++* Returns 16-bit integer congruent to a*b*R^{-1} mod q ++**************************************************/ ++static int16_t fqmul(int16_t a, int16_t b) { ++ return montgomery_reduce((int32_t)a*b); ++} + -+// -+// indcpa.h -+// ++/************************************************* ++* Name: ntt ++* ++* Description: Inplace number-theoretic transform (NTT) in Rq. ++* input is in standard order, output is in bitreversed order ++* ++* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq ++**************************************************/ ++static void ntt(int16_t r[256]) { ++ unsigned int len, start, j, k; ++ int16_t t, zeta; + -+static void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); -+static void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], -+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], -+ const uint8_t seed[KYBER_SYMBYTES]); ++ k = 1; ++ for(len = 128; len >= 2; len >>= 1) { ++ for(start = 0; start < 256; start = j + len) { ++ zeta = zetas[k++]; ++ for(j = start; j < start + len; j++) { ++ t = fqmul(zeta, r[j + len]); ++ r[j + len] = r[j] - t; ++ r[j] = r[j] + t; + } +- k += 2 * offset; + } + } +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = reduce(s->c[i] * kInverseDegree); +- } + } + +-static void vector_inverse_ntt(vector *a) { +- for (int i = 0; i < RANK; i++) { +- scalar_inverse_ntt(&a->v[i]); ++/************************************************* ++* Name: invntt_tomont ++* ++* Description: Inplace inverse number-theoretic transform in Rq and ++* multiplication by Montgomery factor 2^16. ++* Input is in bitreversed order, output is in standard order ++* ++* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq ++**************************************************/ ++static void invntt(int16_t r[256]) { ++ unsigned int start, len, j, k; ++ int16_t t, zeta; ++ const int16_t f = 1441; // mont^2/128 + -+static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], -+ const uint8_t m[KYBER_INDCPA_MSGBYTES], -+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], -+ const uint8_t coins[KYBER_SYMBYTES]); ++ k = 127; ++ for(len = 2; len <= 128; len <<= 1) { ++ for(start = 0; start < 256; start = j + len) { ++ zeta = zetas[k--]; ++ for(j = start; j < start + len; j++) { ++ t = r[j]; ++ r[j] = barrett_reduce(t + r[j + len]); ++ r[j + len] = r[j + len] - t; ++ r[j + len] = fqmul(zeta, r[j + len]); ++ } ++ } + } + -+static void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], -+ const uint8_t c[KYBER_INDCPA_BYTES], -+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); ++ for(j = 0; j < 256; j++) ++ r[j] = fqmul(r[j], f); ++} + ++/************************************************* ++* Name: basemul ++* ++* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) ++* used for multiplication of elements in Rq in NTT domain ++* ++* Arguments: - int16_t r[2]: pointer to the output polynomial ++* - const int16_t a[2]: pointer to the first factor ++* - const int16_t b[2]: pointer to the second factor ++* - int16_t zeta: integer defining the reduction polynomial ++**************************************************/ ++static void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) ++{ ++ r[0] = fqmul(a[1], b[1]); ++ r[0] = fqmul(r[0], zeta); ++ r[0] += fqmul(a[0], b[0]); ++ r[1] = fqmul(a[0], b[1]); ++ r[1] += fqmul(a[1], b[0]); + } + +-static void scalar_add(scalar *lhs, const scalar *rhs) { +- for (int i = 0; i < DEGREE; i++) { +- lhs->c[i] = reduce_once(lhs->c[i] + rhs->c[i]); +// -+// fips202.h ++// poly.c +// + ++/************************************************* ++* Name: poly_compress ++* ++* Description: Compression and subsequent serialization of a polynomial ++* ++* Arguments: - uint8_t *r: pointer to output byte array ++* (of length KYBER_POLYCOMPRESSEDBYTES) ++* - const poly *a: pointer to input polynomial ++**************************************************/ ++static void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) ++{ ++ unsigned int i,j; ++ int16_t u; ++ uint8_t t[8]; + -+typedef keccak_state xof_state; ++#if (KYBER_POLYCOMPRESSEDBYTES == 128) ++ for(i=0;icoeffs[8*i+j]; ++ u += (u >> 15) & KYBER_Q; ++ t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; ++ } + -+static void kyber_shake128_absorb(keccak_state *s, -+ const uint8_t seed[KYBER_SYMBYTES], -+ uint8_t x, -+ uint8_t y); ++ r[0] = t[0] | (t[1] << 4); ++ r[1] = t[2] | (t[3] << 4); ++ r[2] = t[4] | (t[5] << 4); ++ r[3] = t[6] | (t[7] << 4); ++ r += 4; + } ++#elif (KYBER_POLYCOMPRESSEDBYTES == 160) ++ for(i=0;icoeffs[8*i+j]; ++ u += (u >> 15) & KYBER_Q; ++ t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; ++ } + -+static void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); ++ r[0] = (t[0] >> 0) | (t[1] << 5); ++ r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); ++ r[2] = (t[3] >> 1) | (t[4] << 4); ++ r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); ++ r[4] = (t[6] >> 2) | (t[7] << 3); ++ r += 5; ++ } ++#else ++#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" ++#endif + } + +-static void scalar_sub(scalar *lhs, const scalar *rhs) { +- for (int i = 0; i < DEGREE; i++) { +- lhs->c[i] = reduce_once(lhs->c[i] - rhs->c[i] + kPrime); ++/************************************************* ++* Name: poly_decompress ++* ++* Description: De-serialization and subsequent decompression of a polynomial; ++* approximate inverse of poly_compress ++* ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *a: pointer to input byte array ++* (of length KYBER_POLYCOMPRESSEDBYTES bytes) ++**************************************************/ ++static void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) ++{ ++ unsigned int i; + ++#if (KYBER_POLYCOMPRESSEDBYTES == 128) ++ for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; ++ r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; ++ a += 1; ++ } ++#elif (KYBER_POLYCOMPRESSEDBYTES == 160) ++ unsigned int j; ++ uint8_t t[8]; ++ for(i=0;i> 0); ++ t[1] = (a[0] >> 5) | (a[1] << 3); ++ t[2] = (a[1] >> 2); ++ t[3] = (a[1] >> 7) | (a[2] << 1); ++ t[4] = (a[2] >> 4) | (a[3] << 4); ++ t[5] = (a[3] >> 1); ++ t[6] = (a[3] >> 6) | (a[4] << 2); ++ t[7] = (a[4] >> 3); ++ a += 5; + -+#define XOF_BLOCKBYTES SHAKE128_RATE ++ for(j=0;j<8;j++) ++ r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; + } ++#else ++#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" ++#endif + } + +-// Multiplying two scalars in the number theoretically transformed state. Since +-// 3329 does not have a 512th root of unity, this means we have to interpret +-// the 2*ith and (2*i+1)th entries of the scalar as elements of GF(3329)[X]/(X^2 +-// - 17^(2*bitreverse(i)+1)) The value of 17^(2*bitreverse(i)+1) mod 3329 is +-// stored in the precomputed |kModRoots| table. Note that our Barrett transform +-// only allows us to multipy two reduced numbers together, so we need some +-// intermediate reduction steps, even if an uint64_t could hold 3 multiplied +-// numbers. +-static void scalar_mult(scalar *out, const scalar *lhs, const scalar *rhs) { +- for (int i = 0; i < DEGREE / 2; i++) { +- uint32_t real_real = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i]; +- uint32_t img_img = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i + 1]; +- uint32_t real_img = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i + 1]; +- uint32_t img_real = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i]; +- out->c[2 * i] = +- reduce(real_real + (uint32_t)reduce(img_img) * kModRoots[i]); +- out->c[2 * i + 1] = reduce(img_real + real_img); ++/************************************************* ++* Name: poly_tobytes ++* ++* Description: Serialization of a polynomial ++* ++* Arguments: - uint8_t *r: pointer to output byte array ++* (needs space for KYBER_POLYBYTES bytes) ++* - const poly *a: pointer to input polynomial ++**************************************************/ ++static void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) ++{ ++ unsigned int i; ++ uint16_t t0, t1; + -+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -+#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) -+#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -+#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -+#define kdf(OUT, IN, INBYTES) shake256(OUT, KYBER_SSBYTES, IN, INBYTES) ++ for(i=0;icoeffs[2*i]; ++ t0 += ((int16_t)t0 >> 15) & KYBER_Q; ++ t1 = a->coeffs[2*i+1]; ++ t1 += ((int16_t)t1 >> 15) & KYBER_Q; ++ r[3*i+0] = (t0 >> 0); ++ r[3*i+1] = (t0 >> 8) | (t1 << 4); ++ r[3*i+2] = (t1 >> 4); + } + } + +-static void vector_add(vector *lhs, const vector *rhs) { +- for (int i = 0; i < RANK; i++) { +- scalar_add(&lhs->v[i], &rhs->v[i]); ++/************************************************* ++* Name: poly_frombytes ++* ++* Description: De-serialization of a polynomial; ++* inverse of poly_tobytes ++* ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *a: pointer to input byte array ++* (of KYBER_POLYBYTES bytes) ++**************************************************/ ++static void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) ++{ ++ unsigned int i; ++ for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; ++ r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; + } + } + +-static void matrix_mult(vector *out, const matrix *m, const vector *a) { +- vector_zero(out); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- scalar product; +- scalar_mult(&product, &m->v[i][j], &a->v[j]); +- scalar_add(&out->v[i], &product); ++/************************************************* ++* Name: poly_frommsg ++* ++* Description: Convert 32-byte message to polynomial ++* ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *msg: pointer to input message ++**************************************************/ ++static void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) ++{ ++ unsigned int i,j; ++ int16_t mask; + ++#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) ++#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" ++#endif + -+// -+// verify.c -+// ++ for(i=0;i> j)&1); ++ r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); + } + } + } + +-static void matrix_mult_transpose(vector *out, const matrix *m, +- const vector *a) { +- vector_zero(out); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- scalar product; +- scalar_mult(&product, &m->v[j][i], &a->v[j]); +- scalar_add(&out->v[i], &product); ++/************************************************* ++* Name: poly_tomsg ++* ++* Description: Convert polynomial to 32-byte message ++* ++* Arguments: - uint8_t *msg: pointer to output message ++* - const poly *a: pointer to input polynomial ++**************************************************/ ++static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) ++{ ++ unsigned int i,j; ++ uint16_t t; + ++ for(i=0;icoeffs[8*i+j]; ++ t += ((int16_t)t >> 15) & KYBER_Q; ++ t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; ++ msg[i] |= t << j; + } + } + } + +-static void scalar_inner_product(scalar *out, const vector *lhs, +- const vector *rhs) { +- scalar_zero(out); +- for (int i = 0; i < RANK; i++) { +- scalar product; +- scalar_mult(&product, &lhs->v[i], &rhs->v[i]); +- scalar_add(out, &product); +- } +/************************************************* -+* Name: verify -+* -+* Description: Compare two arrays for equality in constant time. ++* Name: poly_getnoise_eta1 +* -+* Arguments: const uint8_t *a: pointer to first byte array -+* const uint8_t *b: pointer to second byte array -+* size_t len: length of the byte arrays ++* Description: Sample a polynomial deterministically from a seed and a nonce, ++* with output polynomial close to centered binomial distribution ++* with parameter KYBER_ETA1 +* -+* Returns 0 if the byte arrays are equal, 1 otherwise ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *seed: pointer to input seed ++* (of length KYBER_SYMBYTES bytes) ++* - uint8_t nonce: one-byte input nonce +**************************************************/ -+static int verify(const uint8_t *a, const uint8_t *b, size_t len) ++static void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ -+ size_t i; -+ uint8_t r = 0; -+ -+ for(i=0;i> 63; -+} -+ ++ uint8_t buf[KYBER_ETA1*KYBER_N/4]; ++ prf(buf, sizeof(buf), seed, nonce); ++ poly_cbd_eta1(r, buf); + } + +-// Algorithm 1 of the Kyber spec. Rejection samples a Keccak stream to get +-// uniformly distributed elements. This is used for matrix expansion and only +-// operates on public inputs. +-static void scalar_from_keccak_vartime(scalar *out, +- struct BORINGSSL_keccak_st *keccak_ctx) { +- assert(keccak_ctx->offset == 0); +- assert(keccak_ctx->rate_bytes == 168); +- static_assert(168 % 3 == 0, "block and coefficient boundaries do not align"); +- +- int done = 0; +- while (done < DEGREE) { +- uint8_t block[168]; +- BORINGSSL_keccak_squeeze(keccak_ctx, block, sizeof(block)); +- for (size_t i = 0; i < sizeof(block) && done < DEGREE; i += 3) { +- uint16_t d1 = block[i] + 256 * (block[i + 1] % 16); +- uint16_t d2 = block[i + 1] / 16 + 16 * block[i + 2]; +- if (d1 < kPrime) { +- out->c[done++] = d1; +- } +- if (d2 < kPrime && done < DEGREE) { +- out->c[done++] = d2; +- } +- } +- } +/************************************************* -+* Name: cmov ++* Name: poly_getnoise_eta2 +* -+* Description: Copy len bytes from x to r if b is 1; -+* don't modify x if b is 0. Requires b to be in {0,1}; -+* assumes two's complement representation of negative integers. -+* Runs in constant time. ++* Description: Sample a polynomial deterministically from a seed and a nonce, ++* with output polynomial close to centered binomial distribution ++* with parameter KYBER_ETA2 +* -+* Arguments: uint8_t *r: pointer to output byte array -+* const uint8_t *x: pointer to input byte array -+* size_t len: Amount of bytes to be copied -+* uint8_t b: Condition bit; has to be in {0,1} ++* Arguments: - poly *r: pointer to output polynomial ++* - const uint8_t *seed: pointer to input seed ++* (of length KYBER_SYMBYTES bytes) ++* - uint8_t nonce: one-byte input nonce +**************************************************/ -+static void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) ++static void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) +{ -+ size_t i; -+ -+ b = -b; -+ for(i=0;i> 1) & 1); +- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); +- out->c[i] = reduce_once(value); +- +- byte >>= 4; +- value = kPrime; +- value += (byte & 1) + ((byte >> 1) & 1); +- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); +- out->c[i + 1] = reduce_once(value); +- } + +/************************************************* -+* Name: montgomery_reduce ++* Name: poly_ntt +* -+* Description: Montgomery reduction; given a 32-bit integer a, computes -+* 16-bit integer congruent to a * R^-1 mod q, where R=2^16 ++* Description: Computes negacyclic number-theoretic transform (NTT) of ++* a polynomial in place; ++* inputs assumed to be in normal order, output in bitreversed order +* -+* Arguments: - int32_t a: input integer to be reduced; -+* has to be in {-q2^15,...,q2^15-1} ++* Arguments: - uint16_t *r: pointer to in/output polynomial ++**************************************************/ ++static void poly_ntt(poly *r) ++{ ++ ntt(r->coeffs); ++ poly_reduce(r); + } + +-// Generates a secret vector by using +-// |scalar_centered_binomial_distribution_eta_2_with_prf|, using the given seed +-// appending and incrementing |counter| for entry of the vector. +-static void vector_generate_secret_eta_2(vector *out, uint8_t *counter, +- const uint8_t seed[32]) { +- uint8_t input[33]; +- OPENSSL_memcpy(input, seed, 32); +- for (int i = 0; i < RANK; i++) { +- input[32] = (*counter)++; +- scalar_centered_binomial_distribution_eta_2_with_prf(&out->v[i], input); +- } ++/************************************************* ++* Name: poly_invntt_tomont +* -+* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. ++* Description: Computes inverse of negacyclic number-theoretic transform (NTT) ++* of a polynomial in place; ++* inputs assumed to be in bitreversed order, output in normal order ++* ++* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ -+static int16_t montgomery_reduce(int32_t a) ++static void poly_invntt_tomont(poly *r) +{ -+ int16_t t; -+ -+ t = (int16_t)a*QINV; -+ t = (a - (int32_t)t*KYBER_Q) >> 16; -+ return t; -+} -+ ++ invntt(r->coeffs); + } + +-// Expands the matrix of a seed for key generation and for encaps-CPA. +-static void matrix_expand(matrix *out, const uint8_t rho[32]) { +- uint8_t input[34]; +- OPENSSL_memcpy(input, rho, 32); +- for (int i = 0; i < RANK; i++) { +- for (int j = 0; j < RANK; j++) { +- input[32] = i; +- input[33] = j; +- struct BORINGSSL_keccak_st keccak_ctx; +- BORINGSSL_keccak_init(&keccak_ctx, input, sizeof(input), +- boringssl_shake128); +- scalar_from_keccak_vartime(&out->v[i][j], &keccak_ctx); +- } +/************************************************* -+* Name: barrett_reduce ++* Name: poly_basemul_montgomery +* -+* Description: Barrett reduction; given a 16-bit integer a, computes -+* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} ++* Description: Multiplication of two polynomials in NTT domain +* -+* Arguments: - int16_t a: input integer to be reduced ++* Arguments: - poly *r: pointer to output polynomial ++* - const poly *a: pointer to first input polynomial ++* - const poly *b: pointer to second input polynomial ++**************************************************/ ++static void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) ++{ ++ unsigned int i; ++ for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); ++ basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); + } + } + +-static const uint8_t kMasks[8] = {0x01, 0x03, 0x07, 0x0f, +- 0x1f, 0x3f, 0x7f, 0xff}; +- +-static void scalar_encode(uint8_t *out, const scalar *s, int bits) { +- assert(bits <= (int)sizeof(*s->c) * 8 && bits != 1); +- +- uint8_t out_byte = 0; +- int out_byte_bits = 0; +- +- for (int i = 0; i < DEGREE; i++) { +- uint16_t element = s->c[i]; +- int element_bits_done = 0; +- +- while (element_bits_done < bits) { +- int chunk_bits = bits - element_bits_done; +- int out_bits_remaining = 8 - out_byte_bits; +- if (chunk_bits >= out_bits_remaining) { +- chunk_bits = out_bits_remaining; +- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; +- *out = out_byte; +- out++; +- out_byte_bits = 0; +- out_byte = 0; +- } else { +- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; +- out_byte_bits += chunk_bits; ++/************************************************* ++* Name: poly_tomont +* -+* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. ++* Description: Inplace conversion of all coefficients of a polynomial ++* from normal domain to Montgomery domain ++* ++* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ -+static int16_t barrett_reduce(int16_t a) { -+ int16_t t; -+ const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; -+ -+ t = ((int32_t)v*a + (1<<25)) >> 26; -+ t *= KYBER_Q; -+ return a - t; ++static void poly_tomont(poly *r) ++{ ++ unsigned int i; ++ const int16_t f = (1ULL << 32) % KYBER_Q; ++ for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); +} + -+// -+// cbd.c -+// -+ +/************************************************* -+* Name: load32_littleendian -+* -+* Description: load 4 bytes into a 32-bit integer -+* in little-endian order ++* Name: poly_reduce +* -+* Arguments: - const uint8_t *x: pointer to input byte array ++* Description: Applies Barrett reduction to all coefficients of a polynomial ++* for details of the Barrett reduction see comments in reduce.c +* -+* Returns 32-bit unsigned integer loaded from x ++* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ -+static uint32_t load32_littleendian(const uint8_t x[4]) ++static void poly_reduce(poly *r) +{ -+ uint32_t r; -+ r = (uint32_t)x[0]; -+ r |= (uint32_t)x[1] << 8; -+ r |= (uint32_t)x[2] << 16; -+ r |= (uint32_t)x[3] << 24; -+ return r; ++ unsigned int i; ++ for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); +} + +/************************************************* -+* Name: load24_littleendian -+* -+* Description: load 3 bytes into a 32-bit integer -+* in little-endian order. -+* This function is only needed for Kyber-512 ++* Name: poly_add +* -+* Arguments: - const uint8_t *x: pointer to input byte array ++* Description: Add two polynomials; no modular reduction is performed +* -+* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) ++* Arguments: - poly *r: pointer to output polynomial ++* - const poly *a: pointer to first input polynomial ++* - const poly *b: pointer to second input polynomial +**************************************************/ -+#if KYBER_ETA1 == 3 -+static uint32_t load24_littleendian(const uint8_t x[3]) ++static void poly_add(poly *r, const poly *a, const poly *b) +{ -+ uint32_t r; -+ r = (uint32_t)x[0]; -+ r |= (uint32_t)x[1] << 8; -+ r |= (uint32_t)x[2] << 16; -+ return r; ++ unsigned int i; ++ for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; +} -+#endif -+ + +/************************************************* -+* Name: cbd2 ++* Name: poly_sub +* -+* Description: Given an array of uniformly random bytes, compute -+* polynomial with coefficients distributed according to -+* a centered binomial distribution with parameter eta=2 ++* Description: Subtract two polynomials; no modular reduction is performed +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *buf: pointer to input byte array ++* Arguments: - poly *r: pointer to output polynomial ++* - const poly *a: pointer to first input polynomial ++* - const poly *b: pointer to second input polynomial +**************************************************/ -+static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) ++static void poly_sub(poly *r, const poly *a, const poly *b) +{ -+ unsigned int i,j; -+ uint32_t t,d; -+ int16_t a,b; ++ unsigned int i; ++ for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; ++} + -+ for(i=0;i>1) & 0x55555555; ++// ++// polyvec.c ++// + -+ for(j=0;j<8;j++) { -+ a = (d >> (4*j+0)) & 0x3; -+ b = (d >> (4*j+2)) & 0x3; -+ r->coeffs[8*i+j] = a - b; - } - } - } - --static void vector_ntt(vector *a) { -- for (int i = 0; i < RANK; i++) { -- scalar_ntt(&a->v[i]); +/************************************************* -+* Name: cbd3 ++* Name: polyvec_compress +* -+* Description: Given an array of uniformly random bytes, compute -+* polynomial with coefficients distributed according to -+* a centered binomial distribution with parameter eta=3. -+* This function is only needed for Kyber-512 ++* Description: Compress and serialize vector of polynomials +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *buf: pointer to input byte array ++* Arguments: - uint8_t *r: pointer to output byte array ++* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) ++* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ -+#if KYBER_ETA1 == 3 -+static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) ++static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) +{ -+ unsigned int i,j; -+ uint32_t t,d; -+ int16_t a,b; -+ -+ for(i=0;i>1) & 0x00249249; -+ d += (t>>2) & 0x00249249; ++ unsigned int i,j,k; + -+ for(j=0;j<4;j++) { -+ a = (d >> (6*j+0)) & 0x7; -+ b = (d >> (6*j+3)) & 0x7; -+ r->coeffs[4*i+j] = a - b; ++#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) ++ uint16_t t[8]; ++ for(i=0;ivec[i].coeffs[8*j+k]; ++ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; ++ t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; + } + +- element_bits_done += chunk_bits; +- element >>= chunk_bits; ++ r[ 0] = (t[0] >> 0); ++ r[ 1] = (t[0] >> 8) | (t[1] << 3); ++ r[ 2] = (t[1] >> 5) | (t[2] << 6); ++ r[ 3] = (t[2] >> 2); ++ r[ 4] = (t[2] >> 10) | (t[3] << 1); ++ r[ 5] = (t[3] >> 7) | (t[4] << 4); ++ r[ 6] = (t[4] >> 4) | (t[5] << 7); ++ r[ 7] = (t[5] >> 1); ++ r[ 8] = (t[5] >> 9) | (t[6] << 2); ++ r[ 9] = (t[6] >> 6) | (t[7] << 5); ++ r[10] = (t[7] >> 3); ++ r += 11; + } + } ++#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) ++ uint16_t t[4]; ++ for(i=0;ivec[i].coeffs[4*j+k]; ++ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; ++ t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; ++ } + +- if (out_byte_bits > 0) { +- *out = out_byte; ++ r[0] = (t[0] >> 0); ++ r[1] = (t[0] >> 8) | (t[1] << 2); ++ r[2] = (t[1] >> 6) | (t[2] << 4); ++ r[3] = (t[2] >> 4) | (t[3] << 6); ++ r[4] = (t[3] >> 2); ++ r += 5; + } } - } -+#endif -+ -+static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) -+{ -+#if KYBER_ETA1 == 2 -+ cbd2(r, buf); -+#elif KYBER_ETA1 == 3 -+ cbd3(r, buf); -+#else -+#error "This implementation requires eta1 in {2,3}" -+#endif -+} -+ -+static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) -+{ -+#if KYBER_ETA2 == 2 -+ cbd2(r, buf); +#else -+#error "This implementation requires eta2 = 2" ++#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif -+} + } --// In place inverse number theoretic transform of a given scalar, with pairs of --// entries of s->v being interpreted as elements of GF(3329^2). Just as with the --// number theoretic transform, this leaves off the first step of the normal iFFT --// to account for the fact that 3329 does not have a 512th root of unity, using --// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. --static void scalar_inverse_ntt(scalar *s) { -- int step = DEGREE / 2; -- // `int` is used here because using `size_t` throughout caused a ~5% slowdown -- // with Clang 14 on Aarch64. -- for (int offset = 2; offset < DEGREE; offset <<= 1) { -- step >>= 1; -- int k = 0; -- for (int i = 0; i < step; i++) { -- uint32_t step_root = kInverseNTTRoots[i + step]; -- for (int j = k; j < k + offset; j++) { -- uint16_t odd = s->c[j + offset]; -- uint16_t even = s->c[j]; -- s->c[j] = reduce_once(odd + even); -- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); -+// -+// ntt.c -+// -+ -+/* Code to generate zetas and zetas_inv used in the number-theoretic transform: -+ -+#define KYBER_ROOT_OF_UNITY 17 -+ -+static const uint8_t tree[128] = { -+ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, -+ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, -+ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, -+ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, -+ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, -+ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, -+ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, -+ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 -+}; -+ -+void init_ntt() { -+ unsigned int i; -+ int16_t tmp[128]; +-// scalar_encode_1 is |scalar_encode| specialised for |bits| == 1. +-static void scalar_encode_1(uint8_t out[32], const scalar *s) { +- for (int i = 0; i < DEGREE; i += 8) { +- uint8_t out_byte = 0; +- for (int j = 0; j < 8; j++) { +- out_byte |= (s->c[i + j] & 1) << j; ++/************************************************* ++* Name: polyvec_decompress ++* ++* Description: De-serialize and decompress vector of polynomials; ++* approximate inverse of polyvec_compress ++* ++* Arguments: - polyvec *r: pointer to output vector of polynomials ++* - const uint8_t *a: pointer to input byte array ++* (of length KYBER_POLYVECCOMPRESSEDBYTES) ++**************************************************/ ++static void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) ++{ ++ unsigned int i,j,k; + -+ tmp[0] = MONT; -+ for(i=1;i<128;i++) -+ tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); ++#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) ++ uint16_t t[8]; ++ for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); ++ t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); ++ t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); ++ t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); ++ t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); ++ t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); ++ t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); ++ t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); ++ a += 11; + -+ for(i=0;i<128;i++) { -+ zetas[i] = tmp[tree[i]]; -+ if(zetas[i] > KYBER_Q/2) -+ zetas[i] -= KYBER_Q; -+ if(zetas[i] < -KYBER_Q/2) -+ zetas[i] += KYBER_Q; ++ for(k=0;k<8;k++) ++ r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; ++ } + } -+} -+*/ ++#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) ++ uint16_t t[4]; ++ for(i=0;i> 0) | ((uint16_t)a[1] << 8); ++ t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); ++ t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); ++ t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); ++ a += 5; + -+static const int16_t zetas[128] = { -+ -1044, -758, -359, -1517, 1493, 1422, 287, 202, -+ -171, 622, 1577, 182, 962, -1202, -1474, 1468, -+ 573, -1325, 264, 383, -829, 1458, -1602, -130, -+ -681, 1017, 732, 608, -1542, 411, -205, -1571, -+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544, -+ 516, -8, -320, -666, -1618, -1162, 126, 1469, -+ -853, -90, -271, 830, 107, -1421, -247, -951, -+ -398, 961, -1508, -725, 448, -1065, 677, -1275, -+ -1103, 430, 555, 843, -1251, 871, 1550, 105, -+ 422, 587, 177, -235, -291, -460, 1574, 1653, -+ -246, 778, 1159, -147, -777, 1483, -602, 1119, -+ -1590, 644, -872, 349, 418, 329, -156, -75, -+ 817, 1097, 603, 610, 1322, -1285, -1465, 384, -+ -1215, -136, 1218, -1335, -874, 220, -1187, -1659, -+ -1185, -1530, -1278, 794, -1510, -854, -870, 478, -+ -108, -308, 996, 991, 958, -1460, 1522, 1628 -+}; ++ for(k=0;k<4;k++) ++ r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; + } +- *out = out_byte; +- out++; + } ++#else ++#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" ++#endif ++} + +/************************************************* -+* Name: fqmul -+* -+* Description: Multiplication followed by Montgomery reduction ++* Name: polyvec_tobytes +* -+* Arguments: - int16_t a: first factor -+* - int16_t b: second factor ++* Description: Serialize vector of polynomials +* -+* Returns 16-bit integer congruent to a*b*R^{-1} mod q ++* Arguments: - uint8_t *r: pointer to output byte array ++* (needs space for KYBER_POLYVECBYTES) ++* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ -+static int16_t fqmul(int16_t a, int16_t b) { -+ return montgomery_reduce((int32_t)a*b); ++static void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) ++{ ++ unsigned int i; ++ for(i=0;ivec[i]); +} + +/************************************************* -+* Name: ntt ++* Name: polyvec_frombytes +* -+* Description: Inplace number-theoretic transform (NTT) in Rq. -+* input is in standard order, output is in bitreversed order ++* Description: De-serialize vector of polynomials; ++* inverse of polyvec_tobytes +* -+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq ++* Arguments: - uint8_t *r: pointer to output byte array ++* - const polyvec *a: pointer to input vector of polynomials ++* (of length KYBER_POLYVECBYTES) +**************************************************/ -+static void ntt(int16_t r[256]) { -+ unsigned int len, start, j, k; -+ int16_t t, zeta; -+ -+ k = 1; -+ for(len = 128; len >= 2; len >>= 1) { -+ for(start = 0; start < 256; start = j + len) { -+ zeta = zetas[k++]; -+ for(j = start; j < start + len; j++) { -+ t = fqmul(zeta, r[j + len]); -+ r[j + len] = r[j] - t; -+ r[j] = r[j] + t; - } -- k += 2 * offset; - } - } -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = reduce(s->c[i] * kInverseDegree); -- } ++static void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) ++{ ++ unsigned int i; ++ for(i=0;ivec[i], a+i*KYBER_POLYBYTES); } --static void vector_inverse_ntt(vector *a) { +-// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 +-// (DEGREE) is divisible by 8, the individual vector entries will always fill a +-// whole number of bytes, so we do not need to worry about bit packing here. +-static void vector_encode(uint8_t *out, const vector *a, int bits) { - for (int i = 0; i < RANK; i++) { -- scalar_inverse_ntt(&a->v[i]); +- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); +/************************************************* -+* Name: invntt_tomont ++* Name: polyvec_ntt +* -+* Description: Inplace inverse number-theoretic transform in Rq and -+* multiplication by Montgomery factor 2^16. -+* Input is in bitreversed order, output is in standard order ++* Description: Apply forward NTT to all elements of a vector of polynomials +* -+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq ++* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ -+static void invntt(int16_t r[256]) { -+ unsigned int start, len, j, k; -+ int16_t t, zeta; -+ const int16_t f = 1441; // mont^2/128 -+ -+ k = 127; -+ for(len = 2; len <= 128; len <<= 1) { -+ for(start = 0; start < 256; start = j + len) { -+ zeta = zetas[k--]; -+ for(j = start; j < start + len; j++) { -+ t = r[j]; -+ r[j] = barrett_reduce(t + r[j + len]); -+ r[j + len] = r[j + len] - t; -+ r[j + len] = fqmul(zeta, r[j + len]); -+ } -+ } - } ++static void polyvec_ntt(polyvec *r) ++{ ++ unsigned int i; ++ for(i=0;ivec[i]); ++} + -+ for(j = 0; j < 256; j++) -+ r[j] = fqmul(r[j], f); - } - --static void scalar_add(scalar *lhs, const scalar *rhs) { -- for (int i = 0; i < DEGREE; i++) { -- lhs->c[i] = reduce_once(lhs->c[i] + rhs->c[i]); +/************************************************* -+* Name: basemul ++* Name: polyvec_invntt_tomont +* -+* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) -+* used for multiplication of elements in Rq in NTT domain ++* Description: Apply inverse NTT to all elements of a vector of polynomials ++* and multiply by Montgomery factor 2^16 +* -+* Arguments: - int16_t r[2]: pointer to the output polynomial -+* - const int16_t a[2]: pointer to the first factor -+* - const int16_t b[2]: pointer to the second factor -+* - int16_t zeta: integer defining the reduction polynomial ++* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ -+static void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) ++static void polyvec_invntt_tomont(polyvec *r) +{ -+ r[0] = fqmul(a[1], b[1]); -+ r[0] = fqmul(r[0], zeta); -+ r[0] += fqmul(a[0], b[0]); -+ r[1] = fqmul(a[0], b[1]); -+ r[1] += fqmul(a[1], b[0]); ++ unsigned int i; ++ for(i=0;ivec[i]); +} + -+// -+// poly.c -+// -+ +/************************************************* -+* Name: poly_compress ++* Name: polyvec_basemul_acc_montgomery +* -+* Description: Compression and subsequent serialization of a polynomial ++* Description: Multiply elements of a and b in NTT domain, accumulate into r, ++* and multiply by 2^-16. +* -+* Arguments: - uint8_t *r: pointer to output byte array -+* (of length KYBER_POLYCOMPRESSEDBYTES) -+* - const poly *a: pointer to input polynomial ++* Arguments: - poly *r: pointer to output polynomial ++* - const polyvec *a: pointer to first input vector of polynomials ++* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ -+static void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) ++static void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) +{ -+ unsigned int i,j; -+ int16_t u; -+ uint32_t d0; -+ uint8_t t[8]; -+ -+#if (KYBER_POLYCOMPRESSEDBYTES == 128) -+ for(i=0;icoeffs[8*i+j]; -+ u += (u >> 15) & KYBER_Q; -+ d0 = u << 4; -+ d0 += 1665; -+ d0 *= 80635; -+ d0 >>= 28; -+ t[j] = d0 & 0xf; -+ } -+ -+ r[0] = t[0] | (t[1] << 4); -+ r[1] = t[2] | (t[3] << 4); -+ r[2] = t[4] | (t[5] << 4); -+ r[3] = t[6] | (t[7] << 4); -+ r += 4; -+ } -+#elif (KYBER_POLYCOMPRESSEDBYTES == 160) -+ for(i=0;icoeffs[8*i+j]; -+ u += (u >> 15) & KYBER_Q; -+ d0 = u << 5; -+ d0 += 1664; -+ d0 *= 40318; -+ d0 >>= 27; -+ t[j] = d0 & 0x1f; -+ } ++ unsigned int i; ++ poly t; + -+ r[0] = (t[0] >> 0) | (t[1] << 5); -+ r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); -+ r[2] = (t[3] >> 1) | (t[4] << 4); -+ r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); -+ r[4] = (t[6] >> 2) | (t[7] << 3); -+ r += 5; ++ poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); ++ for(i=1;ivec[i], &b->vec[i]); ++ poly_add(r, r, &t); } -+#else -+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -+#endif ++ ++ poly_reduce(r); } --static void scalar_sub(scalar *lhs, const scalar *rhs) { +-// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in +-// |out|. It returns one on success and zero if any parsed value is >= +-// |kPrime|. +-static int scalar_decode(scalar *out, const uint8_t *in, int bits) { +- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); ++/************************************************* ++* Name: polyvec_reduce ++* ++* Description: Applies Barrett reduction to each coefficient ++* of each element of a vector of polynomials; ++* for details of the Barrett reduction see comments in reduce.c ++* ++* Arguments: - polyvec *r: pointer to input/output polynomial ++**************************************************/ ++static void polyvec_reduce(polyvec *r) ++{ ++ unsigned int i; ++ for(i=0;ivec[i]); ++} + +- uint8_t in_byte = 0; +- int in_byte_bits_left = 0; ++/************************************************* ++* Name: polyvec_add ++* ++* Description: Add vectors of polynomials ++* ++* Arguments: - polyvec *r: pointer to output vector of polynomials ++* - const polyvec *a: pointer to first input vector of polynomials ++* - const polyvec *b: pointer to second input vector of polynomials ++**************************************************/ ++static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) ++{ ++ unsigned int i; ++ for(i=0;ivec[i], &a->vec[i], &b->vec[i]); ++} + - for (int i = 0; i < DEGREE; i++) { -- lhs->c[i] = reduce_once(lhs->c[i] - rhs->c[i] + kPrime); +- uint16_t element = 0; +- int element_bits_done = 0; ++// ++// indcpa.c ++// + +- while (element_bits_done < bits) { +- if (in_byte_bits_left == 0) { +- in_byte = *in; +- in++; +- in_byte_bits_left = 8; +- } +/************************************************* -+* Name: poly_decompress ++* Name: pack_pk +* -+* Description: De-serialization and subsequent decompression of a polynomial; -+* approximate inverse of poly_compress ++* Description: Serialize the public key as concatenation of the ++* serialized vector of polynomials pk ++* and the public seed used to generate the matrix A. +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *a: pointer to input byte array -+* (of length KYBER_POLYCOMPRESSEDBYTES bytes) ++* Arguments: uint8_t *r: pointer to the output serialized public key ++* polyvec *pk: pointer to the input public-key polyvec ++* const uint8_t *seed: pointer to the input public seed +**************************************************/ -+static void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) ++static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], ++ polyvec *pk, ++ const uint8_t seed[KYBER_SYMBYTES]) +{ -+ unsigned int i; -+ -+#if (KYBER_POLYCOMPRESSEDBYTES == 128) -+ for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; -+ r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; -+ a += 1; -+ } -+#elif (KYBER_POLYCOMPRESSEDBYTES == 160) -+ unsigned int j; -+ uint8_t t[8]; -+ for(i=0;i> 0); -+ t[1] = (a[0] >> 5) | (a[1] << 3); -+ t[2] = (a[1] >> 2); -+ t[3] = (a[1] >> 7) | (a[2] << 1); -+ t[4] = (a[2] >> 4) | (a[3] << 4); -+ t[5] = (a[3] >> 1); -+ t[6] = (a[3] >> 6) | (a[4] << 2); -+ t[7] = (a[4] >> 3); -+ a += 5; -+ -+ for(j=0;j<8;j++) -+ r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; - } -+#else -+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" -+#endif - } ++ size_t i; ++ polyvec_tobytes(r, pk); ++ for(i=0;ic[2 * i] * rhs->c[2 * i]; -- uint32_t img_img = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i + 1]; -- uint32_t real_img = (uint32_t)lhs->c[2 * i] * rhs->c[2 * i + 1]; -- uint32_t img_real = (uint32_t)lhs->c[2 * i + 1] * rhs->c[2 * i]; -- out->c[2 * i] = -- reduce(real_real + (uint32_t)reduce(img_img) * kModRoots[i]); -- out->c[2 * i + 1] = reduce(img_real + real_img); +- int chunk_bits = bits - element_bits_done; +- if (chunk_bits > in_byte_bits_left) { +- chunk_bits = in_byte_bits_left; +- } +/************************************************* -+* Name: poly_tobytes ++* Name: unpack_pk +* -+* Description: Serialization of a polynomial ++* Description: De-serialize public key from a byte array; ++* approximate inverse of pack_pk +* -+* Arguments: - uint8_t *r: pointer to output byte array -+* (needs space for KYBER_POLYBYTES bytes) -+* - const poly *a: pointer to input polynomial ++* Arguments: - polyvec *pk: pointer to output public-key polynomial vector ++* - uint8_t *seed: pointer to output seed to generate matrix A ++* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ -+static void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) ++static void unpack_pk(polyvec *pk, ++ uint8_t seed[KYBER_SYMBYTES], ++ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ -+ unsigned int i; -+ uint16_t t0, t1; -+ -+ for(i=0;icoeffs[2*i]; -+ t0 += ((int16_t)t0 >> 15) & KYBER_Q; -+ t1 = a->coeffs[2*i+1]; -+ t1 += ((int16_t)t1 >> 15) & KYBER_Q; -+ r[3*i+0] = (t0 >> 0); -+ r[3*i+1] = (t0 >> 8) | (t1 << 4); -+ r[3*i+2] = (t1 >> 4); - } - } ++ size_t i; ++ polyvec_frombytes(pk, packedpk); ++ for(i=0;iv[i], &rhs->v[i]); +- element |= (in_byte & kMasks[chunk_bits - 1]) << element_bits_done; +- in_byte_bits_left -= chunk_bits; +- in_byte >>= chunk_bits; +/************************************************* -+* Name: poly_frombytes ++* Name: pack_sk +* -+* Description: De-serialization of a polynomial; -+* inverse of poly_tobytes ++* Description: Serialize the secret key +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *a: pointer to input byte array -+* (of KYBER_POLYBYTES bytes) ++* Arguments: - uint8_t *r: pointer to output serialized secret key ++* - polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ -+static void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) ++static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) +{ -+ unsigned int i; -+ for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; -+ r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; - } - } ++ polyvec_tobytes(r, sk); ++} --static void matrix_mult(vector *out, const matrix *m, const vector *a) { -- vector_zero(out); -- for (int i = 0; i < RANK; i++) { -- for (int j = 0; j < RANK; j++) { -- scalar product; -- scalar_mult(&product, &m->v[i][j], &a->v[j]); -- scalar_add(&out->v[i], &product); +- element_bits_done += chunk_bits; +- } +/************************************************* -+* Name: poly_frommsg ++* Name: unpack_sk +* -+* Description: Convert 32-byte message to polynomial ++* Description: De-serialize the secret key; inverse of pack_sk +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *msg: pointer to input message ++* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) ++* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ -+static void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) ++static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) +{ -+ unsigned int i,j; -+ int16_t mask; -+ -+#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) -+#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" -+#endif -+ -+ for(i=0;i> j)&1); -+ r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); - } - } - } ++ polyvec_frombytes(sk, packedsk); ++} --static void matrix_mult_transpose(vector *out, const matrix *m, -- const vector *a) { -- vector_zero(out); -- for (int i = 0; i < RANK; i++) { -- for (int j = 0; j < RANK; j++) { -- scalar product; -- scalar_mult(&product, &m->v[j][i], &a->v[j]); -- scalar_add(&out->v[i], &product); +- if (element >= kPrime) { +- return 0; +- } +- out->c[i] = element; +- } +/************************************************* -+* Name: poly_tomsg ++* Name: pack_ciphertext +* -+* Description: Convert polynomial to 32-byte message ++* Description: Serialize the ciphertext as concatenation of the ++* compressed and serialized vector of polynomials b ++* and the compressed and serialized polynomial v +* -+* Arguments: - uint8_t *msg: pointer to output message -+* - const poly *a: pointer to input polynomial ++* Arguments: uint8_t *r: pointer to the output serialized ciphertext ++* poly *pk: pointer to the input vector of polynomials b ++* poly *v: pointer to the input polynomial v +**************************************************/ -+static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) ++static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) +{ -+ unsigned int i,j; -+ uint32_t t; -+ -+ for(i=0;icoeffs[8*i+j]; -+ t <<= 1; -+ t += 1665; -+ t *= 80635; -+ t >>= 28; -+ t &= 1; -+ msg[i] |= t << j; - } - } ++ polyvec_compress(r, b); ++ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); ++} + +- return 1; ++/************************************************* ++* Name: unpack_ciphertext ++* ++* Description: De-serialize and decompress ciphertext from a byte array; ++* approximate inverse of pack_ciphertext ++* ++* Arguments: - polyvec *b: pointer to the output vector of polynomials b ++* - poly *v: pointer to the output polynomial v ++* - const uint8_t *c: pointer to the input serialized ciphertext ++**************************************************/ ++static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES]) ++{ ++ polyvec_decompress(b, c); ++ poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); } --static void scalar_inner_product(scalar *out, const vector *lhs, -- const vector *rhs) { -- scalar_zero(out); -- for (int i = 0; i < RANK; i++) { -- scalar product; -- scalar_mult(&product, &lhs->v[i], &rhs->v[i]); -- scalar_add(out, &product); -- } +-// scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. +-static void scalar_decode_1(scalar *out, const uint8_t in[32]) { +- for (int i = 0; i < DEGREE; i += 8) { +- uint8_t in_byte = *in; +- in++; +- for (int j = 0; j < 8; j++) { +- out->c[i + j] = in_byte & 1; +- in_byte >>= 1; +- } +/************************************************* -+* Name: poly_getnoise_eta1 ++* Name: rej_uniform +* -+* Description: Sample a polynomial deterministically from a seed and a nonce, -+* with output polynomial close to centered binomial distribution -+* with parameter KYBER_ETA1 ++* Description: Run rejection sampling on uniform random bytes to generate ++* uniform random integers mod q +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const uint8_t *seed: pointer to input seed -+* (of length KYBER_SYMBYTES bytes) -+* - uint8_t nonce: one-byte input nonce ++* Arguments: - int16_t *r: pointer to output buffer ++* - unsigned int len: requested number of 16-bit integers (uniform mod q) ++* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) ++* - unsigned int buflen: length of input buffer in bytes ++* ++* Returns number of sampled 16-bit integers (at most len) +**************************************************/ -+static void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) ++static unsigned int rej_uniform(int16_t *r, ++ unsigned int len, ++ const uint8_t *buf, ++ unsigned int buflen) +{ -+ uint8_t buf[KYBER_ETA1*KYBER_N/4]; -+ prf(buf, sizeof(buf), seed, nonce); -+ poly_cbd_eta1(r, buf); ++ unsigned int ctr, pos; ++ uint16_t val0, val1; ++ ++ ctr = pos = 0; ++ while(ctr < len && pos + 3 <= buflen) { ++ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; ++ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; ++ pos += 3; ++ ++ if(val0 < KYBER_Q) ++ r[ctr++] = val0; ++ if(ctr < len && val1 < KYBER_Q) ++ r[ctr++] = val1; + } ++ ++ return ctr; } --// Algorithm 1 of the Kyber spec. Rejection samples a Keccak stream to get --// uniformly distributed elements. This is used for matrix expansion and only --// operates on public inputs. --static void scalar_from_keccak_vartime(scalar *out, -- struct BORINGSSL_keccak_st *keccak_ctx) { -- assert(keccak_ctx->offset == 0); -- assert(keccak_ctx->rate_bytes == 168); -- static_assert(168 % 3 == 0, "block and coefficient boundaries do not align"); -- -- int done = 0; -- while (done < DEGREE) { -- uint8_t block[168]; -- BORINGSSL_keccak_squeeze(keccak_ctx, block, sizeof(block)); -- for (size_t i = 0; i < sizeof(block) && done < DEGREE; i += 3) { -- uint16_t d1 = block[i] + 256 * (block[i + 1] % 16); -- uint16_t d2 = block[i + 1] / 16 + 16 * block[i + 2]; -- if (d1 < kPrime) { -- out->c[done++] = d1; -- } -- if (d2 < kPrime && done < DEGREE) { -- out->c[done++] = d2; -- } -- } -- } +-// Decodes 32*|RANK|*|bits| bytes from |in| into |out|. It returns one on +-// success or zero if any parsed value is >= |kPrime|. +-static int vector_decode(vector *out, const uint8_t *in, int bits) { +- for (int i = 0; i < RANK; i++) { +- if (!scalar_decode(&out->v[i], in + i * bits * DEGREE / 8, bits)) { +- return 0; ++#define gen_a(A,B) gen_matrix(A,B,0) ++#define gen_at(A,B) gen_matrix(A,B,1) ++ +/************************************************* -+* Name: poly_getnoise_eta2 ++* Name: gen_matrix +* -+* Description: Sample a polynomial deterministically from a seed and a nonce, -+* with output polynomial close to centered binomial distribution -+* with parameter KYBER_ETA2 ++* Description: Deterministically generate matrix A (or the transpose of A) ++* from a seed. Entries of the matrix are polynomials that look ++* uniformly random. Performs rejection sampling on output of ++* a XOF +* -+* Arguments: - poly *r: pointer to output polynomial ++* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed -+* (of length KYBER_SYMBYTES bytes) -+* - uint8_t nonce: one-byte input nonce ++* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ -+static void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) ++#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) ++// Not static for benchmarking ++static void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) +{ -+ uint8_t buf[KYBER_ETA2*KYBER_N/4]; -+ prf(buf, sizeof(buf), seed, nonce); -+ poly_cbd_eta2(r, buf); - } - --// Algorithm 2 of the Kyber spec, with eta fixed to two and the PRF call --// included. Creates binominally distributed elements by sampling 2*|eta| bits, --// and setting the coefficient to the count of the first bits minus the count of --// the second bits, resulting in a centered binomial distribution. Since eta is --// two this gives -2/2 with a probability of 1/16, -1/1 with probability 1/4, --// and 0 with probability 3/8. --static void scalar_centered_binomial_distribution_eta_2_with_prf( -- scalar *out, const uint8_t input[33]) { -- uint8_t entropy[128]; -- static_assert(sizeof(entropy) == 2 * /*kEta=*/2 * DEGREE / 8, ""); -- BORINGSSL_keccak(entropy, sizeof(entropy), input, 33, boringssl_shake256); ++ unsigned int ctr, i, j, k; ++ unsigned int buflen, off; ++ uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; ++ xof_state state; ++ ++ for(i=0;i> kBarrettShift; +- uint32_t remainder = product - quotient * kPrime; - -- uint16_t value = kPrime; -- value += (byte & 1) + ((byte >> 1) & 1); -- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); -- out->c[i] = reduce_once(value); +- // Adjust the quotient to round correctly: +- // 0 <= remainder <= kHalfPrime round to 0 +- // kHalfPrime < remainder <= kPrime + kHalfPrime round to 1 +- // kPrime + kHalfPrime < remainder < 2 * kPrime round to 2 +- assert(remainder < 2u * kPrime); +- quotient += 1 & constant_time_lt_w(kHalfPrime, remainder); +- quotient += 1 & constant_time_lt_w(kPrime + kHalfPrime, remainder); +- return quotient & ((1 << bits) - 1); +-} - -- byte >>= 4; -- value = kPrime; -- value += (byte & 1) + ((byte >> 1) & 1); -- value -= ((byte >> 2) & 1) + ((byte >> 3) & 1); -- out->c[i + 1] = reduce_once(value); +-// Decompresses |x| by using an equi-distant representative. The formula is +-// round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to +-// implement this logic using only bit operations. +-static uint16_t decompress(uint16_t x, int bits) { +- uint32_t product = (uint32_t)x * kPrime; +- uint32_t power = 1 << bits; +- // This is |product| % power, since |power| is a power of 2. +- uint32_t remainder = product & (power - 1); +- // This is |product| / power, since |power| is a power of 2. +- uint32_t lower = product >> bits; +- // The rounding logic works since the first half of numbers mod |power| have a +- // 0 as first bit, and the second half has a 1 as first bit, since |power| is +- // a power of 2. As a 12 bit number, |remainder| is always positive, so we +- // will shift in 0s for a right shift. +- return lower + (remainder >> (bits - 1)); +-} +- +-static void scalar_compress(scalar *s, int bits) { +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = compress(s->c[i], bits); - } -+ + } + +-static void scalar_decompress(scalar *s, int bits) { +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = decompress(s->c[i], bits); +/************************************************* -+* Name: poly_ntt ++* Name: indcpa_keypair +* -+* Description: Computes negacyclic number-theoretic transform (NTT) of -+* a polynomial in place; -+* inputs assumed to be in normal order, output in bitreversed order ++* Description: Generates public and private key for the CPA-secure ++* public-key encryption scheme underlying Kyber +* -+* Arguments: - uint16_t *r: pointer to in/output polynomial ++* Arguments: - uint8_t *pk: pointer to output public key ++* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) ++* - uint8_t *sk: pointer to output private key ++ (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +**************************************************/ -+static void poly_ntt(poly *r) ++static void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], ++ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], ++ const uint8_t seed[KYBER_SYMBYTES]) +{ -+ ntt(r->coeffs); -+ poly_reduce(r); ++ unsigned int i; ++ uint8_t buf[2*KYBER_SYMBYTES]; ++ const uint8_t *publicseed = buf; ++ const uint8_t *noiseseed = buf+KYBER_SYMBYTES; ++ uint8_t nonce = 0; ++ polyvec a[KYBER_K], e, pkpv, skpv; ++ ++ memcpy(buf, seed, KYBER_SYMBYTES); ++ hash_g(buf, buf, KYBER_SYMBYTES); ++ ++ gen_a(a, publicseed); ++ ++ for(i=0;iv[i], input); +- scalar_compress(&a->v[i], bits); - } +/************************************************* -+* Name: poly_invntt_tomont ++* Name: indcpa_enc +* -+* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -+* of a polynomial in place; -+* inputs assumed to be in bitreversed order, output in normal order ++* Description: Encryption function of the CPA-secure ++* public-key encryption scheme underlying Kyber. +* -+* Arguments: - uint16_t *a: pointer to in/output polynomial ++* Arguments: - uint8_t *c: pointer to output ciphertext ++* (of length KYBER_INDCPA_BYTES bytes) ++* - const uint8_t *m: pointer to input message ++* (of length KYBER_INDCPA_MSGBYTES bytes) ++* - const uint8_t *pk: pointer to input public key ++* (of length KYBER_INDCPA_PUBLICKEYBYTES) ++* - const uint8_t *coins: pointer to input random coins used as seed ++* (of length KYBER_SYMBYTES) to deterministically ++* generate all randomness +**************************************************/ -+static void poly_invntt_tomont(poly *r) ++static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], ++ const uint8_t m[KYBER_INDCPA_MSGBYTES], ++ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], ++ const uint8_t coins[KYBER_SYMBYTES]) +{ -+ invntt(r->coeffs); ++ unsigned int i; ++ uint8_t seed[KYBER_SYMBYTES]; ++ uint8_t nonce = 0; ++ polyvec sp, pkpv, ep, at[KYBER_K], b; ++ poly v, k, epp; ++ ++ unpack_pk(&pkpv, seed, pk); ++ poly_frommsg(&k, m); ++ gen_at(at, seed); ++ ++ for(i=0;iv[i][j], &keccak_ctx); -- } +- scalar_decompress(&a->v[i], bits); +- } +/************************************************* -+* Name: poly_basemul_montgomery ++* Name: indcpa_dec +* -+* Description: Multiplication of two polynomials in NTT domain ++* Description: Decryption function of the CPA-secure ++* public-key encryption scheme underlying Kyber. +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const poly *a: pointer to first input polynomial -+* - const poly *b: pointer to second input polynomial ++* Arguments: - uint8_t *m: pointer to output decrypted message ++* (of length KYBER_INDCPA_MSGBYTES) ++* - const uint8_t *c: pointer to input ciphertext ++* (of length KYBER_INDCPA_BYTES) ++* - const uint8_t *sk: pointer to input secret key ++* (of length KYBER_INDCPA_SECRETKEYBYTES) +**************************************************/ -+static void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) ++static void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], ++ const uint8_t c[KYBER_INDCPA_BYTES], ++ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]) +{ -+ unsigned int i; -+ for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); -+ basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); - } ++ polyvec b, skpv; ++ poly v, mp; ++ ++ unpack_ciphertext(&b, &v, c); ++ unpack_sk(&skpv, sk); ++ ++ polyvec_ntt(&b); ++ polyvec_basemul_acc_montgomery(&mp, &skpv, &b); ++ poly_invntt_tomont(&mp); ++ ++ poly_sub(&mp, &v, &mp); ++ poly_reduce(&mp); ++ ++ poly_tomsg(m, &mp); } --static const uint8_t kMasks[8] = {0x01, 0x03, 0x07, 0x0f, -- 0x1f, 0x3f, 0x7f, 0xff}; -- --static void scalar_encode(uint8_t *out, const scalar *s, int bits) { -- assert(bits <= (int)sizeof(*s->c) * 8 && bits != 1); -- -- uint8_t out_byte = 0; -- int out_byte_bits = 0; -- -- for (int i = 0; i < DEGREE; i++) { -- uint16_t element = s->c[i]; -- int element_bits_done = 0; -- -- while (element_bits_done < bits) { -- int chunk_bits = bits - element_bits_done; -- int out_bits_remaining = 8 - out_byte_bits; -- if (chunk_bits >= out_bits_remaining) { -- chunk_bits = out_bits_remaining; -- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; -- *out = out_byte; -- out++; -- out_byte_bits = 0; -- out_byte = 0; -- } else { -- out_byte |= (element & kMasks[chunk_bits - 1]) << out_byte_bits; -- out_byte_bits += chunk_bits; +-struct public_key { +- vector t; +- uint8_t rho[32]; +- uint8_t public_key_hash[32]; +- matrix m; +-}; ++// ++// fips202.c ++// ++ ++/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from ++ * http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202" ++ * implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein, ++ * and Peter Schwabe */ ++ ++#define NROUNDS 24 ++#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) ++ +/************************************************* -+* Name: poly_tomont ++* Name: load64 +* -+* Description: Inplace conversion of all coefficients of a polynomial -+* from normal domain to Montgomery domain ++* Description: Load 8 bytes into uint64_t in little-endian order +* -+* Arguments: - poly *r: pointer to input/output polynomial ++* Arguments: - const uint8_t *x: pointer to input byte array ++* ++* Returns the loaded 64-bit unsigned integer +**************************************************/ -+static void poly_tomont(poly *r) -+{ ++static uint64_t load64(const uint8_t x[8]) { + unsigned int i; -+ const int16_t f = (1ULL << 32) % KYBER_Q; -+ for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); -+} ++ uint64_t r = 0; ++ ++ for(i=0;i<8;i++) ++ r |= (uint64_t)x[i] << 8*i; + ++ return r; ++} + +-static struct public_key *public_key_from_external( +- const struct KYBER_public_key *external) { +- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), +- "Kyber public key is too small"); +- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), +- "Kyber public key align incorrect"); +- return (struct public_key *)external; +/************************************************* -+* Name: poly_reduce ++* Name: store64 +* -+* Description: Applies Barrett reduction to all coefficients of a polynomial -+* for details of the Barrett reduction see comments in reduce.c ++* Description: Store a 64-bit integer to array of 8 bytes in little-endian order +* -+* Arguments: - poly *r: pointer to input/output polynomial ++* Arguments: - uint8_t *x: pointer to the output byte array (allocated) ++* - uint64_t u: input 64-bit unsigned integer +**************************************************/ -+static void poly_reduce(poly *r) -+{ ++static void store64(uint8_t x[8], uint64_t u) { + unsigned int i; -+ for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); -+} + ++ for(i=0;i<8;i++) ++ x[i] = u >> 8*i; + } + +-struct private_key { +- struct public_key pub; +- vector s; +- uint8_t fo_failure_secret[32]; ++/* Keccak round constants */ ++static const uint64_t KeccakF_RoundConstants[NROUNDS] = { ++ (uint64_t)0x0000000000000001ULL, ++ (uint64_t)0x0000000000008082ULL, ++ (uint64_t)0x800000000000808aULL, ++ (uint64_t)0x8000000080008000ULL, ++ (uint64_t)0x000000000000808bULL, ++ (uint64_t)0x0000000080000001ULL, ++ (uint64_t)0x8000000080008081ULL, ++ (uint64_t)0x8000000000008009ULL, ++ (uint64_t)0x000000000000008aULL, ++ (uint64_t)0x0000000000000088ULL, ++ (uint64_t)0x0000000080008009ULL, ++ (uint64_t)0x000000008000000aULL, ++ (uint64_t)0x000000008000808bULL, ++ (uint64_t)0x800000000000008bULL, ++ (uint64_t)0x8000000000008089ULL, ++ (uint64_t)0x8000000000008003ULL, ++ (uint64_t)0x8000000000008002ULL, ++ (uint64_t)0x8000000000000080ULL, ++ (uint64_t)0x000000000000800aULL, ++ (uint64_t)0x800000008000000aULL, ++ (uint64_t)0x8000000080008081ULL, ++ (uint64_t)0x8000000000008080ULL, ++ (uint64_t)0x0000000080000001ULL, ++ (uint64_t)0x8000000080008008ULL + }; + +-static struct private_key *private_key_from_external( +- const struct KYBER_private_key *external) { +- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), +- "Kyber private key too small"); +- static_assert( +- alignof(struct KYBER_private_key) >= alignof(struct private_key), +- "Kyber private key align incorrect"); +- return (struct private_key *)external; +-} +- +-// Calls |KYBER_generate_key_external_entropy| with random bytes from +-// |RAND_bytes|. +-void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], +- struct KYBER_private_key *out_private_key) { +- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; +- RAND_bytes(entropy, sizeof(entropy)); +- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, +- entropy); +-} +- +-static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { +- uint8_t *vector_output; +- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { +- return 0; +- } +- vector_encode(vector_output, &pub->t, kLog2Prime); +- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { +- return 0; +/************************************************* -+* Name: poly_add ++* Name: KeccakF1600_StatePermute +* -+* Description: Add two polynomials; no modular reduction is performed ++* Description: The Keccak F1600 Permutation +* -+* Arguments: - poly *r: pointer to output polynomial -+* - const poly *a: pointer to first input polynomial -+* - const poly *b: pointer to second input polynomial ++* Arguments: - uint64_t *state: pointer to input/output Keccak state +**************************************************/ -+static void poly_add(poly *r, const poly *a, const poly *b) ++static void KeccakF1600_StatePermute(uint64_t state[25]) +{ -+ unsigned int i; -+ for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; -+} ++ int round; + -+/************************************************* -+* Name: poly_sub -+* -+* Description: Subtract two polynomials; no modular reduction is performed -+* -+* Arguments: - poly *r: pointer to output polynomial -+* - const poly *a: pointer to first input polynomial -+* - const poly *b: pointer to second input polynomial -+**************************************************/ -+static void poly_sub(poly *r, const poly *a, const poly *b) -+{ -+ unsigned int i; -+ for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; -+} ++ uint64_t Aba, Abe, Abi, Abo, Abu; ++ uint64_t Aga, Age, Agi, Ago, Agu; ++ uint64_t Aka, Ake, Aki, Ako, Aku; ++ uint64_t Ama, Ame, Ami, Amo, Amu; ++ uint64_t Asa, Ase, Asi, Aso, Asu; ++ uint64_t BCa, BCe, BCi, BCo, BCu; ++ uint64_t Da, De, Di, Do, Du; ++ uint64_t Eba, Ebe, Ebi, Ebo, Ebu; ++ uint64_t Ega, Ege, Egi, Ego, Egu; ++ uint64_t Eka, Eke, Eki, Eko, Eku; ++ uint64_t Ema, Eme, Emi, Emo, Emu; ++ uint64_t Esa, Ese, Esi, Eso, Esu; + -+// -+// polyvec.c -+// ++ //copyFromState(A, state) ++ Aba = state[ 0]; ++ Abe = state[ 1]; ++ Abi = state[ 2]; ++ Abo = state[ 3]; ++ Abu = state[ 4]; ++ Aga = state[ 5]; ++ Age = state[ 6]; ++ Agi = state[ 7]; ++ Ago = state[ 8]; ++ Agu = state[ 9]; ++ Aka = state[10]; ++ Ake = state[11]; ++ Aki = state[12]; ++ Ako = state[13]; ++ Aku = state[14]; ++ Ama = state[15]; ++ Ame = state[16]; ++ Ami = state[17]; ++ Amo = state[18]; ++ Amu = state[19]; ++ Asa = state[20]; ++ Ase = state[21]; ++ Asi = state[22]; ++ Aso = state[23]; ++ Asu = state[24]; + -+/************************************************* -+* Name: polyvec_compress -+* -+* Description: Compress and serialize vector of polynomials -+* -+* Arguments: - uint8_t *r: pointer to output byte array -+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -+* - const polyvec *a: pointer to input vector of polynomials -+**************************************************/ -+static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) -+{ -+ unsigned int i,j,k; -+ uint64_t d0; ++ for(round = 0; round < NROUNDS; round += 2) { ++ // prepareTheta ++ BCa = Aba^Aga^Aka^Ama^Asa; ++ BCe = Abe^Age^Ake^Ame^Ase; ++ BCi = Abi^Agi^Aki^Ami^Asi; ++ BCo = Abo^Ago^Ako^Amo^Aso; ++ BCu = Abu^Agu^Aku^Amu^Asu; + -+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) -+ uint16_t t[8]; -+ for(i=0;ivec[i].coeffs[8*j+k]; -+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -+ d0 = t[k]; -+ d0 <<= 11; -+ d0 += 1664; -+ d0 *= 645084; -+ d0 >>= 31; -+ t[k] = d0 & 0x7ff; - } - -- element_bits_done += chunk_bits; -- element >>= chunk_bits; -+ r[ 0] = (t[0] >> 0); -+ r[ 1] = (t[0] >> 8) | (t[1] << 3); -+ r[ 2] = (t[1] >> 5) | (t[2] << 6); -+ r[ 3] = (t[2] >> 2); -+ r[ 4] = (t[2] >> 10) | (t[3] << 1); -+ r[ 5] = (t[3] >> 7) | (t[4] << 4); -+ r[ 6] = (t[4] >> 4) | (t[5] << 7); -+ r[ 7] = (t[5] >> 1); -+ r[ 8] = (t[5] >> 9) | (t[6] << 2); -+ r[ 9] = (t[6] >> 6) | (t[7] << 5); -+ r[10] = (t[7] >> 3); -+ r += 11; - } - } -+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) -+ uint16_t t[4]; -+ for(i=0;ivec[i].coeffs[4*j+k]; -+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -+ d0 = t[k]; -+ d0 <<= 10; -+ d0 += 1665; -+ d0 *= 1290167; -+ d0 >>= 32; -+ t[k] = d0 & 0x3ff; -+ } - -- if (out_byte_bits > 0) { -- *out = out_byte; -+ r[0] = (t[0] >> 0); -+ r[1] = (t[0] >> 8) | (t[1] << 2); -+ r[2] = (t[1] >> 6) | (t[2] << 4); -+ r[3] = (t[2] >> 4) | (t[3] << 6); -+ r[4] = (t[3] >> 2); -+ r += 5; -+ } - } -+#else -+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -+#endif - } - --// scalar_encode_1 is |scalar_encode| specialised for |bits| == 1. --static void scalar_encode_1(uint8_t out[32], const scalar *s) { -- for (int i = 0; i < DEGREE; i += 8) { -- uint8_t out_byte = 0; -- for (int j = 0; j < 8; j++) { -- out_byte |= (s->c[i + j] & 1) << j; -+/************************************************* -+* Name: polyvec_decompress -+* -+* Description: De-serialize and decompress vector of polynomials; -+* approximate inverse of polyvec_compress -+* -+* Arguments: - polyvec *r: pointer to output vector of polynomials -+* - const uint8_t *a: pointer to input byte array -+* (of length KYBER_POLYVECCOMPRESSEDBYTES) -+**************************************************/ -+static void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) -+{ -+ unsigned int i,j,k; ++ //thetaRhoPiChiIotaPrepareTheta(round, A, E) ++ Da = BCu^ROL(BCe, 1); ++ De = BCa^ROL(BCi, 1); ++ Di = BCe^ROL(BCo, 1); ++ Do = BCi^ROL(BCu, 1); ++ Du = BCo^ROL(BCa, 1); + -+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) -+ uint16_t t[8]; -+ for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); -+ t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); -+ t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); -+ t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); -+ t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); -+ t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); -+ t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); -+ t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); -+ a += 11; ++ Aba ^= Da; ++ BCa = Aba; ++ Age ^= De; ++ BCe = ROL(Age, 44); ++ Aki ^= Di; ++ BCi = ROL(Aki, 43); ++ Amo ^= Do; ++ BCo = ROL(Amo, 21); ++ Asu ^= Du; ++ BCu = ROL(Asu, 14); ++ Eba = BCa ^((~BCe)& BCi ); ++ Eba ^= (uint64_t)KeccakF_RoundConstants[round]; ++ Ebe = BCe ^((~BCi)& BCo ); ++ Ebi = BCi ^((~BCo)& BCu ); ++ Ebo = BCo ^((~BCu)& BCa ); ++ Ebu = BCu ^((~BCa)& BCe ); + -+ for(k=0;k<8;k++) -+ r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; -+ } -+ } -+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) -+ uint16_t t[4]; -+ for(i=0;i> 0) | ((uint16_t)a[1] << 8); -+ t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); -+ t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); -+ t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); -+ a += 5; ++ Abo ^= Do; ++ BCa = ROL(Abo, 28); ++ Agu ^= Du; ++ BCe = ROL(Agu, 20); ++ Aka ^= Da; ++ BCi = ROL(Aka, 3); ++ Ame ^= De; ++ BCo = ROL(Ame, 45); ++ Asi ^= Di; ++ BCu = ROL(Asi, 61); ++ Ega = BCa ^((~BCe)& BCi ); ++ Ege = BCe ^((~BCi)& BCo ); ++ Egi = BCi ^((~BCo)& BCu ); ++ Ego = BCo ^((~BCu)& BCa ); ++ Egu = BCu ^((~BCa)& BCe ); ++ ++ Abe ^= De; ++ BCa = ROL(Abe, 1); ++ Agi ^= Di; ++ BCe = ROL(Agi, 6); ++ Ako ^= Do; ++ BCi = ROL(Ako, 25); ++ Amu ^= Du; ++ BCo = ROL(Amu, 8); ++ Asa ^= Da; ++ BCu = ROL(Asa, 18); ++ Eka = BCa ^((~BCe)& BCi ); ++ Eke = BCe ^((~BCi)& BCo ); ++ Eki = BCi ^((~BCo)& BCu ); ++ Eko = BCo ^((~BCu)& BCa ); ++ Eku = BCu ^((~BCa)& BCe ); ++ ++ Abu ^= Du; ++ BCa = ROL(Abu, 27); ++ Aga ^= Da; ++ BCe = ROL(Aga, 36); ++ Ake ^= De; ++ BCi = ROL(Ake, 10); ++ Ami ^= Di; ++ BCo = ROL(Ami, 15); ++ Aso ^= Do; ++ BCu = ROL(Aso, 56); ++ Ema = BCa ^((~BCe)& BCi ); ++ Eme = BCe ^((~BCi)& BCo ); ++ Emi = BCi ^((~BCo)& BCu ); ++ Emo = BCo ^((~BCu)& BCa ); ++ Emu = BCu ^((~BCa)& BCe ); + -+ for(k=0;k<4;k++) -+ r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; - } -- *out = out_byte; -- out++; - } -+#else -+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" -+#endif -+} ++ Abi ^= Di; ++ BCa = ROL(Abi, 62); ++ Ago ^= Do; ++ BCe = ROL(Ago, 55); ++ Aku ^= Du; ++ BCi = ROL(Aku, 39); ++ Ama ^= Da; ++ BCo = ROL(Ama, 41); ++ Ase ^= De; ++ BCu = ROL(Ase, 2); ++ Esa = BCa ^((~BCe)& BCi ); ++ Ese = BCe ^((~BCi)& BCo ); ++ Esi = BCi ^((~BCo)& BCu ); ++ Eso = BCo ^((~BCu)& BCa ); ++ Esu = BCu ^((~BCa)& BCe ); + -+/************************************************* -+* Name: polyvec_tobytes -+* -+* Description: Serialize vector of polynomials -+* -+* Arguments: - uint8_t *r: pointer to output byte array -+* (needs space for KYBER_POLYVECBYTES) -+* - const polyvec *a: pointer to input vector of polynomials -+**************************************************/ -+static void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) -+{ -+ unsigned int i; -+ for(i=0;ivec[i]); - } - --// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 --// (DEGREE) is divisible by 8, the individual vector entries will always fill a --// whole number of bytes, so we do not need to worry about bit packing here. --static void vector_encode(uint8_t *out, const vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); -+/************************************************* -+* Name: polyvec_frombytes -+* -+* Description: De-serialize vector of polynomials; -+* inverse of polyvec_tobytes -+* -+* Arguments: - uint8_t *r: pointer to output byte array -+* - const polyvec *a: pointer to input vector of polynomials -+* (of length KYBER_POLYVECBYTES) -+**************************************************/ -+static void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) -+{ -+ unsigned int i; -+ for(i=0;ivec[i], a+i*KYBER_POLYBYTES); -+} ++ // prepareTheta ++ BCa = Eba^Ega^Eka^Ema^Esa; ++ BCe = Ebe^Ege^Eke^Eme^Ese; ++ BCi = Ebi^Egi^Eki^Emi^Esi; ++ BCo = Ebo^Ego^Eko^Emo^Eso; ++ BCu = Ebu^Egu^Eku^Emu^Esu; + -+/************************************************* -+* Name: polyvec_ntt -+* -+* Description: Apply forward NTT to all elements of a vector of polynomials -+* -+* Arguments: - polyvec *r: pointer to in/output vector of polynomials -+**************************************************/ -+static void polyvec_ntt(polyvec *r) -+{ -+ unsigned int i; -+ for(i=0;ivec[i]); -+} ++ //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) ++ Da = BCu^ROL(BCe, 1); ++ De = BCa^ROL(BCi, 1); ++ Di = BCe^ROL(BCo, 1); ++ Do = BCi^ROL(BCu, 1); ++ Du = BCo^ROL(BCa, 1); + -+/************************************************* -+* Name: polyvec_invntt_tomont -+* -+* Description: Apply inverse NTT to all elements of a vector of polynomials -+* and multiply by Montgomery factor 2^16 -+* -+* Arguments: - polyvec *r: pointer to in/output vector of polynomials -+**************************************************/ -+static void polyvec_invntt_tomont(polyvec *r) -+{ -+ unsigned int i; -+ for(i=0;ivec[i]); -+} ++ Eba ^= Da; ++ BCa = Eba; ++ Ege ^= De; ++ BCe = ROL(Ege, 44); ++ Eki ^= Di; ++ BCi = ROL(Eki, 43); ++ Emo ^= Do; ++ BCo = ROL(Emo, 21); ++ Esu ^= Du; ++ BCu = ROL(Esu, 14); ++ Aba = BCa ^((~BCe)& BCi ); ++ Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; ++ Abe = BCe ^((~BCi)& BCo ); ++ Abi = BCi ^((~BCo)& BCu ); ++ Abo = BCo ^((~BCu)& BCa ); ++ Abu = BCu ^((~BCa)& BCe ); + -+/************************************************* -+* Name: polyvec_basemul_acc_montgomery -+* -+* Description: Multiply elements of a and b in NTT domain, accumulate into r, -+* and multiply by 2^-16. -+* -+* Arguments: - poly *r: pointer to output polynomial -+* - const polyvec *a: pointer to first input vector of polynomials -+* - const polyvec *b: pointer to second input vector of polynomials -+**************************************************/ -+static void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) -+{ -+ unsigned int i; -+ poly t; ++ Ebo ^= Do; ++ BCa = ROL(Ebo, 28); ++ Egu ^= Du; ++ BCe = ROL(Egu, 20); ++ Eka ^= Da; ++ BCi = ROL(Eka, 3); ++ Eme ^= De; ++ BCo = ROL(Eme, 45); ++ Esi ^= Di; ++ BCu = ROL(Esi, 61); ++ Aga = BCa ^((~BCe)& BCi ); ++ Age = BCe ^((~BCi)& BCo ); ++ Agi = BCi ^((~BCo)& BCu ); ++ Ago = BCo ^((~BCu)& BCa ); ++ Agu = BCu ^((~BCa)& BCe ); + -+ poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); -+ for(i=1;ivec[i], &b->vec[i]); -+ poly_add(r, r, &t); - } ++ Ebe ^= De; ++ BCa = ROL(Ebe, 1); ++ Egi ^= Di; ++ BCe = ROL(Egi, 6); ++ Eko ^= Do; ++ BCi = ROL(Eko, 25); ++ Emu ^= Du; ++ BCo = ROL(Emu, 8); ++ Esa ^= Da; ++ BCu = ROL(Esa, 18); ++ Aka = BCa ^((~BCe)& BCi ); ++ Ake = BCe ^((~BCi)& BCo ); ++ Aki = BCi ^((~BCo)& BCu ); ++ Ako = BCo ^((~BCu)& BCa ); ++ Aku = BCu ^((~BCa)& BCe ); + -+ poly_reduce(r); - } - --// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in --// |out|. It returns one on success and zero if any parsed value is >= --// |kPrime|. --static int scalar_decode(scalar *out, const uint8_t *in, int bits) { -- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); -+/************************************************* -+* Name: polyvec_reduce -+* -+* Description: Applies Barrett reduction to each coefficient -+* of each element of a vector of polynomials; -+* for details of the Barrett reduction see comments in reduce.c -+* -+* Arguments: - polyvec *r: pointer to input/output polynomial -+**************************************************/ -+static void polyvec_reduce(polyvec *r) -+{ -+ unsigned int i; -+ for(i=0;ivec[i]); -+} - -- uint8_t in_byte = 0; -- int in_byte_bits_left = 0; -+/************************************************* -+* Name: polyvec_add -+* -+* Description: Add vectors of polynomials -+* -+* Arguments: - polyvec *r: pointer to output vector of polynomials -+* - const polyvec *a: pointer to first input vector of polynomials -+* - const polyvec *b: pointer to second input vector of polynomials -+**************************************************/ -+static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) -+{ -+ unsigned int i; -+ for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -+} - -- for (int i = 0; i < DEGREE; i++) { -- uint16_t element = 0; -- int element_bits_done = 0; -+// -+// indcpa.c -+// - -- while (element_bits_done < bits) { -- if (in_byte_bits_left == 0) { -- in_byte = *in; -- in++; -- in_byte_bits_left = 8; -- } -+/************************************************* -+* Name: pack_pk -+* -+* Description: Serialize the public key as concatenation of the -+* serialized vector of polynomials pk -+* and the public seed used to generate the matrix A. -+* -+* Arguments: uint8_t *r: pointer to the output serialized public key -+* polyvec *pk: pointer to the input public-key polyvec -+* const uint8_t *seed: pointer to the input public seed -+**************************************************/ -+static void pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES], -+ polyvec *pk, -+ const uint8_t seed[KYBER_SYMBYTES]) -+{ -+ size_t i; -+ polyvec_tobytes(r, pk); -+ for(i=0;i in_byte_bits_left) { -- chunk_bits = in_byte_bits_left; -- } ++ ++ +/************************************************* -+* Name: unpack_pk -+* -+* Description: De-serialize public key from a byte array; -+* approximate inverse of pack_pk ++* Name: keccak_squeeze +* -+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -+* - uint8_t *seed: pointer to output seed to generate matrix A -+* - const uint8_t *packedpk: pointer to input serialized public key -+**************************************************/ -+static void unpack_pk(polyvec *pk, -+ uint8_t seed[KYBER_SYMBYTES], -+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) -+{ -+ size_t i; -+ polyvec_frombytes(pk, packedpk); -+ for(i=0;i>= chunk_bits; -+/************************************************* -+* Name: pack_sk ++* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes. ++* Modifies the state. Can be called multiple times to keep ++* squeezing, i.e., is incremental. +* -+* Description: Serialize the secret key ++* Arguments: - uint8_t *out: pointer to output ++* - size_t outlen: number of bytes to be squeezed (written to out) ++* - uint64_t *s: pointer to input/output Keccak state ++* - unsigned int pos: number of bytes in current block already squeezed ++* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) +* -+* Arguments: - uint8_t *r: pointer to output serialized secret key -+* - polyvec *sk: pointer to input vector of polynomials (secret key) ++* Returns new position pos in current block +**************************************************/ -+static void pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk) ++static unsigned int keccak_squeeze(uint8_t *out, ++ size_t outlen, ++ uint64_t s[25], ++ unsigned int pos, ++ unsigned int r) +{ -+ polyvec_tobytes(r, sk); ++ unsigned int i; ++ ++ while(outlen) { ++ if(pos == r) { ++ KeccakF1600_StatePermute(s); ++ pos = 0; ++ } ++ for(i=pos;i < r && i < pos+outlen; i++) ++ *out++ = s[i/8] >> 8*(i%8); ++ outlen -= i-pos; ++ pos = i; + } +- return 1; +-} +- +-// Algorithms 4 and 7 of the Kyber spec. Algorithms are combined since key +-// generation is not part of the FO transform, and the spec uses Algorithm 7 to +-// specify the actual key format. +-void KYBER_generate_key_external_entropy( +- uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], +- struct KYBER_private_key *out_private_key, +- const uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]) { +- struct private_key *priv = private_key_from_external(out_private_key); +- uint8_t hashed[64]; +- BORINGSSL_keccak(hashed, sizeof(hashed), entropy, 32, boringssl_sha3_512); +- const uint8_t *const rho = hashed; +- const uint8_t *const sigma = hashed + 32; +- OPENSSL_memcpy(priv->pub.rho, hashed, sizeof(priv->pub.rho)); +- matrix_expand(&priv->pub.m, rho); +- uint8_t counter = 0; +- vector_generate_secret_eta_2(&priv->s, &counter, sigma); +- vector_ntt(&priv->s); +- vector error; +- vector_generate_secret_eta_2(&error, &counter, sigma); +- vector_ntt(&error); +- matrix_mult_transpose(&priv->pub.t, &priv->pub.m, &priv->s); +- vector_add(&priv->pub.t, &error); +- +- CBB cbb; +- CBB_init_fixed(&cbb, out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES); +- if (!kyber_marshal_public_key(&cbb, &priv->pub)) { +- abort(); ++ ++ return pos; +} - -- element_bits_done += chunk_bits; -- } ++ ++ +/************************************************* -+* Name: unpack_sk ++* Name: keccak_absorb_once +* -+* Description: De-serialize the secret key; inverse of pack_sk ++* Description: Absorb step of Keccak; ++* non-incremental, starts by zeroeing the state. +* -+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -+* - const uint8_t *packedsk: pointer to input serialized secret key ++* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state ++* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes ++* - uint8_t p: domain-separation byte for different Keccak-derived functions +**************************************************/ -+static void unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES]) ++static void keccak_absorb_once(uint64_t s[25], ++ unsigned int r, ++ const uint8_t *in, ++ size_t inlen, ++ uint8_t p) +{ -+ polyvec_frombytes(sk, packedsk); -+} ++ unsigned int i; ++ ++ for(i=0;i<25;i++) ++ s[i] = 0; ++ ++ while(inlen >= r) { ++ for(i=0;i= kPrime) { -- return 0; -- } -- out->c[i] = element; -- } -+/************************************************* -+* Name: pack_ciphertext -+* -+* Description: Serialize the ciphertext as concatenation of the -+* compressed and serialized vector of polynomials b -+* and the compressed and serialized polynomial v -+* -+* Arguments: uint8_t *r: pointer to the output serialized ciphertext -+* poly *pk: pointer to the input vector of polynomials b -+* poly *v: pointer to the input polynomial v -+**************************************************/ -+static void pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v) -+{ -+ polyvec_compress(r, b); -+ poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +- BORINGSSL_keccak(priv->pub.public_key_hash, sizeof(priv->pub.public_key_hash), +- out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES, +- boringssl_sha3_256); +- OPENSSL_memcpy(priv->fo_failure_secret, entropy + 32, 32); +-} +- +-void KYBER_public_from_private(struct KYBER_public_key *out_public_key, +- const struct KYBER_private_key *private_key) { +- struct public_key *const pub = public_key_from_external(out_public_key); +- const struct private_key *const priv = private_key_from_external(private_key); +- *pub = priv->pub; +-} +- +-// Algorithm 5 of the Kyber spec. Encrypts a message with given randomness to +-// the ciphertext in |out|. Without applying the Fujisaki-Okamoto transform this +-// would not result in a CCA secure scheme, since lattice schemes are vulnerable +-// to decryption failure oracles. +-static void encrypt_cpa(uint8_t out[KYBER_CIPHERTEXT_BYTES], +- const struct public_key *pub, const uint8_t message[32], +- const uint8_t randomness[32]) { +- uint8_t counter = 0; +- vector secret; +- vector_generate_secret_eta_2(&secret, &counter, randomness); +- vector_ntt(&secret); +- vector error; +- vector_generate_secret_eta_2(&error, &counter, randomness); +- uint8_t input[33]; +- OPENSSL_memcpy(input, randomness, 32); +- input[32] = counter; +- scalar scalar_error; +- scalar_centered_binomial_distribution_eta_2_with_prf(&scalar_error, input); +- vector u; +- matrix_mult(&u, &pub->m, &secret); +- vector_inverse_ntt(&u); +- vector_add(&u, &error); +- scalar v; +- scalar_inner_product(&v, &pub->t, &secret); +- scalar_inverse_ntt(&v); +- scalar_add(&v, &scalar_error); +- scalar expanded_message; +- scalar_decode_1(&expanded_message, message); +- scalar_decompress(&expanded_message, 1); +- scalar_add(&v, &expanded_message); +- vector_compress(&u, kDU); +- vector_encode(out, &u, kDU); +- scalar_compress(&v, kDV); +- scalar_encode(out + kCompressedVectorSize, &v, kDV); +-} +- +-// Calls KYBER_encap_external_entropy| with random bytes from |RAND_bytes| +-void KYBER_encap(uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], +- uint8_t *out_shared_secret, size_t out_shared_secret_len, +- const struct KYBER_public_key *public_key) { +- uint8_t entropy[KYBER_ENCAP_ENTROPY]; +- RAND_bytes(entropy, KYBER_ENCAP_ENTROPY); +- KYBER_encap_external_entropy(out_ciphertext, out_shared_secret, +- out_shared_secret_len, public_key, entropy); +-} +- +-// Algorithm 8 of the Kyber spec, safe for line 2 of the spec. The spec there +-// hashes the output of the system's random number generator, since the FO +-// transform will reveal it to the decrypting party. There is no reason to do +-// this when a secure random number generator is used. When an insecure random +-// number generator is used, the caller should switch to a secure one before +-// calling this method. +-void KYBER_encap_external_entropy( +- uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t *out_shared_secret, +- size_t out_shared_secret_len, const struct KYBER_public_key *public_key, +- const uint8_t entropy[KYBER_ENCAP_ENTROPY]) { +- const struct public_key *pub = public_key_from_external(public_key); +- uint8_t input[64]; +- OPENSSL_memcpy(input, entropy, KYBER_ENCAP_ENTROPY); +- OPENSSL_memcpy(input + KYBER_ENCAP_ENTROPY, pub->public_key_hash, +- sizeof(input) - KYBER_ENCAP_ENTROPY); +- uint8_t prekey_and_randomness[64]; +- BORINGSSL_keccak(prekey_and_randomness, sizeof(prekey_and_randomness), input, +- sizeof(input), boringssl_sha3_512); +- encrypt_cpa(out_ciphertext, pub, entropy, prekey_and_randomness + 32); +- BORINGSSL_keccak(prekey_and_randomness + 32, 32, out_ciphertext, +- KYBER_CIPHERTEXT_BYTES, boringssl_sha3_256); +- BORINGSSL_keccak(out_shared_secret, out_shared_secret_len, +- prekey_and_randomness, sizeof(prekey_and_randomness), +- boringssl_shake256); +-} +- +-// Algorithm 6 of the Kyber spec. +-static void decrypt_cpa(uint8_t out[32], const struct private_key *priv, +- const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES]) { +- vector u; +- vector_decode(&u, ciphertext, kDU); +- vector_decompress(&u, kDU); +- vector_ntt(&u); +- scalar v; +- scalar_decode(&v, ciphertext + kCompressedVectorSize, kDV); +- scalar_decompress(&v, kDV); +- scalar mask; +- scalar_inner_product(&mask, &priv->s, &u); +- scalar_inverse_ntt(&mask); +- scalar_sub(&v, &mask); +- scalar_compress(&v, 1); +- scalar_encode_1(out, &v); +-} +- +-// Algorithm 9 of the Kyber spec, performing the FO transform by running +-// encrypt_cpa on the decrypted message. The spec does not allow the decryption +-// failure to be passed on to the caller, and instead returns a result that is +-// deterministic but unpredictable to anyone without knowledge of the private +-// key. +-void KYBER_decap(uint8_t *out_shared_secret, size_t out_shared_secret_len, +- const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES], +- const struct KYBER_private_key *private_key) { +- const struct private_key *priv = private_key_from_external(private_key); +- uint8_t decrypted[64]; +- decrypt_cpa(decrypted, priv, ciphertext); +- OPENSSL_memcpy(decrypted + 32, priv->pub.public_key_hash, +- sizeof(decrypted) - 32); +- uint8_t prekey_and_randomness[64]; +- BORINGSSL_keccak(prekey_and_randomness, sizeof(prekey_and_randomness), +- decrypted, sizeof(decrypted), boringssl_sha3_512); +- uint8_t expected_ciphertext[KYBER_CIPHERTEXT_BYTES]; +- encrypt_cpa(expected_ciphertext, &priv->pub, decrypted, +- prekey_and_randomness + 32); +- uint8_t mask = +- constant_time_eq_int_8(CRYPTO_memcmp(ciphertext, expected_ciphertext, +- sizeof(expected_ciphertext)), +- 0); +- uint8_t input[64]; +- for (int i = 0; i < 32; i++) { +- input[i] = constant_time_select_8(mask, prekey_and_randomness[i], +- priv->fo_failure_secret[i]); ++ for(i=0;ic[i + j] = in_byte & 1; -- in_byte >>= 1; -- } ++ +/************************************************* -+* Name: rej_uniform -+* -+* Description: Run rejection sampling on uniform random bytes to generate -+* uniform random integers mod q ++* Name: keccak_squeezeblocks +* -+* Arguments: - int16_t *r: pointer to output buffer -+* - unsigned int len: requested number of 16-bit integers (uniform mod q) -+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -+* - unsigned int buflen: length of input buffer in bytes ++* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. ++* Modifies the state. Can be called multiple times to keep ++* squeezing, i.e., is incremental. Assumes zero bytes of current ++* block have already been squeezed. +* -+* Returns number of sampled 16-bit integers (at most len) ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t nblocks: number of blocks to be squeezed (written to out) ++* - uint64_t *s: pointer to input/output Keccak state ++* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) +**************************************************/ -+static unsigned int rej_uniform(int16_t *r, -+ unsigned int len, -+ const uint8_t *buf, -+ unsigned int buflen) ++static void keccak_squeezeblocks(uint8_t *out, ++ size_t nblocks, ++ uint64_t s[25], ++ unsigned int r) +{ -+ unsigned int ctr, pos; -+ uint16_t val0, val1; -+ -+ ctr = pos = 0; -+ while(ctr < len && pos + 3 <= buflen) { -+ val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; -+ val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; -+ pos += 3; ++ unsigned int i; + -+ if(val0 < KYBER_Q) -+ r[ctr++] = val0; -+ if(ctr < len && val1 < KYBER_Q) -+ r[ctr++] = val1; ++ while(nblocks) { ++ KeccakF1600_StatePermute(s); ++ for(i=0;i= |kPrime|. --static int vector_decode(vector *out, const uint8_t *in, int bits) { -- for (int i = 0; i < RANK; i++) { -- if (!scalar_decode(&out->v[i], in + i * bits * DEGREE / 8, bits)) { -- return 0; -+#define gen_a(A,B) gen_matrix(A,B,0) -+#define gen_at(A,B) gen_matrix(A,B,1) +-int KYBER_marshal_public_key(CBB *out, +- const struct KYBER_public_key *public_key) { +- return kyber_marshal_public_key(out, public_key_from_external(public_key)); + +/************************************************* -+* Name: gen_matrix ++* Name: shake128_absorb_once +* -+* Description: Deterministically generate matrix A (or the transpose of A) -+* from a seed. Entries of the matrix are polynomials that look -+* uniformly random. Performs rejection sampling on output of -+* a XOF ++* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental. +* -+* Arguments: - polyvec *a: pointer to ouptput matrix A -+* - const uint8_t *seed: pointer to input seed -+* - int transposed: boolean deciding whether A or A^T is generated ++* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes +**************************************************/ -+#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -+// Not static for benchmarking -+static void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed) ++static void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) +{ -+ unsigned int ctr, i, j, k; -+ unsigned int buflen, off; -+ uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2]; -+ xof_state state; -+ -+ for(i=0;i> kBarrettShift; -- uint32_t remainder = product - quotient * kPrime; -- -- // Adjust the quotient to round correctly: -- // 0 <= remainder <= kHalfPrime round to 0 -- // kHalfPrime < remainder <= kPrime + kHalfPrime round to 1 -- // kPrime + kHalfPrime < remainder < 2 * kPrime round to 2 -- assert(remainder < 2u * kPrime); -- quotient += 1 & constant_time_lt_w(kHalfPrime, remainder); -- quotient += 1 & constant_time_lt_w(kPrime + kHalfPrime, remainder); -- return quotient & ((1 << bits) - 1); --} -- --// Decompresses |x| by using an equi-distant representative. The formula is --// round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to --// implement this logic using only bit operations. --static uint16_t decompress(uint16_t x, int bits) { -- uint32_t product = (uint32_t)x * kPrime; -- uint32_t power = 1 << bits; -- // This is |product| % power, since |power| is a power of 2. -- uint32_t remainder = product & (power - 1); -- // This is |product| / power, since |power| is a power of 2. -- uint32_t lower = product >> bits; -- // The rounding logic works since the first half of numbers mod |power| have a -- // 0 as first bit, and the second half has a 1 as first bit, since |power| is -- // a power of 2. As a 12 bit number, |remainder| is always positive, so we -- // will shift in 0s for a right shift. -- return lower + (remainder >> (bits - 1)); --} -- --static void scalar_compress(scalar *s, int bits) { -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = compress(s->c[i], bits); ++ keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F); ++ state->pos = SHAKE128_RATE; + } + +-// kyber_parse_public_key_no_hash parses |in| into |pub| but doesn't calculate +-// the value of |pub->public_key_hash|. +-static int kyber_parse_public_key_no_hash(struct public_key *pub, CBS *in) { +- CBS t_bytes; +- if (!CBS_get_bytes(in, &t_bytes, kEncodedVectorSize) || +- !vector_decode(&pub->t, CBS_data(&t_bytes), kLog2Prime) || +- !CBS_copy_bytes(in, pub->rho, sizeof(pub->rho))) { +- return 0; - } +- matrix_expand(&pub->m, pub->rho); +- return 1; ++/************************************************* ++* Name: shake128_squeezeblocks ++* ++* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of ++* SHAKE128_RATE bytes each. Can be called multiple times ++* to keep squeezing. Assumes new block has not yet been ++* started (state->pos = SHAKE128_RATE). ++* ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t nblocks: number of blocks to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state ++**************************************************/ ++static void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) ++{ ++ keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE); } --static void scalar_decompress(scalar *s, int bits) { -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = decompress(s->c[i], bits); +-int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { +- struct public_key *pub = public_key_from_external(public_key); +- CBS orig_in = *in; +- if (!kyber_parse_public_key_no_hash(pub, in) || // +- CBS_len(in) != 0) { +- return 0; +- } +- BORINGSSL_keccak(pub->public_key_hash, sizeof(pub->public_key_hash), +- CBS_data(&orig_in), CBS_len(&orig_in), boringssl_sha3_256); +- return 1; +/************************************************* -+* Name: indcpa_keypair ++* Name: shake256_squeeze +* -+* Description: Generates public and private key for the CPA-secure -+* public-key encryption scheme underlying Kyber ++* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many ++* bytes. Can be called multiple times to keep squeezing. +* -+* Arguments: - uint8_t *pk: pointer to output public key -+* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -+* - uint8_t *sk: pointer to output private key -+ (of length KYBER_INDCPA_SECRETKEYBYTES bytes) ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t outlen : number of bytes to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state +**************************************************/ -+static void indcpa_keypair(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], -+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], -+ const uint8_t seed[KYBER_SYMBYTES]) ++static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state) +{ -+ unsigned int i; -+ uint8_t buf[2*KYBER_SYMBYTES]; -+ const uint8_t *publicseed = buf; -+ const uint8_t *noiseseed = buf+KYBER_SYMBYTES; -+ uint8_t nonce = 0; -+ polyvec a[KYBER_K], e, pkpv, skpv; -+ -+ memcpy(buf, seed, KYBER_SYMBYTES); -+ hash_g(buf, buf, KYBER_SYMBYTES); -+ -+ gen_a(a, publicseed); -+ -+ for(i=0;ipos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE); } --static void vector_compress(vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_compress(&a->v[i], bits); +-int KYBER_marshal_private_key(CBB *out, +- const struct KYBER_private_key *private_key) { +- const struct private_key *const priv = private_key_from_external(private_key); +- uint8_t *s_output; +- if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) { +- return 0; +- } +- vector_encode(s_output, &priv->s, kLog2Prime); +- if (!kyber_marshal_public_key(out, &priv->pub) || +- !CBB_add_bytes(out, priv->pub.public_key_hash, +- sizeof(priv->pub.public_key_hash)) || +- !CBB_add_bytes(out, priv->fo_failure_secret, +- sizeof(priv->fo_failure_secret))) { +- return 0; - } +- return 1; +-} +- +-int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, +- CBS *in) { +- struct private_key *const priv = private_key_from_external(out_private_key); +- +- CBS s_bytes; +- if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) || +- !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) || +- !kyber_parse_public_key_no_hash(&priv->pub, in) || +- !CBS_copy_bytes(in, priv->pub.public_key_hash, +- sizeof(priv->pub.public_key_hash)) || +- !CBS_copy_bytes(in, priv->fo_failure_secret, +- sizeof(priv->fo_failure_secret)) || +- CBS_len(in) != 0) { +- return 0; +/************************************************* -+* Name: indcpa_enc ++* Name: shake256_absorb_once +* -+* Description: Encryption function of the CPA-secure -+* public-key encryption scheme underlying Kyber. ++* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental. +* -+* Arguments: - uint8_t *c: pointer to output ciphertext -+* (of length KYBER_INDCPA_BYTES bytes) -+* - const uint8_t *m: pointer to input message -+* (of length KYBER_INDCPA_MSGBYTES bytes) -+* - const uint8_t *pk: pointer to input public key -+* (of length KYBER_INDCPA_PUBLICKEYBYTES) -+* - const uint8_t *coins: pointer to input random coins used as seed -+* (of length KYBER_SYMBYTES) to deterministically -+* generate all randomness ++* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes +**************************************************/ -+static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], -+ const uint8_t m[KYBER_INDCPA_MSGBYTES], -+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], -+ const uint8_t coins[KYBER_SYMBYTES]) ++static void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen) +{ -+ unsigned int i; -+ uint8_t seed[KYBER_SYMBYTES]; -+ uint8_t nonce = 0; -+ polyvec sp, pkpv, ep, at[KYBER_K], b; -+ poly v, k, epp; -+ -+ unpack_pk(&pkpv, seed, pk); -+ poly_frommsg(&k, m); -+ gen_at(at, seed); -+ -+ for(i=0;is, SHAKE256_RATE, in, inlen, 0x1F); ++ state->pos = SHAKE256_RATE; ++} + -+ polyvec_basemul_acc_montgomery(&v, &pkpv, &sp); ++/************************************************* ++* Name: shake256_squeezeblocks ++* ++* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of ++* SHAKE256_RATE bytes each. Can be called multiple times ++* to keep squeezing. Assumes next block has not yet been ++* started (state->pos = SHAKE256_RATE). ++* ++* Arguments: - uint8_t *out: pointer to output blocks ++* - size_t nblocks: number of blocks to be squeezed (written to output) ++* - keccak_state *s: pointer to input/output Keccak state ++**************************************************/ ++static void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) ++{ ++ keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE); ++} + -+ polyvec_invntt_tomont(&b); -+ poly_invntt_tomont(&v); ++/************************************************* ++* Name: shake256 ++* ++* Description: SHAKE256 XOF with non-incremental API ++* ++* Arguments: - uint8_t *out: pointer to output ++* - size_t outlen: requested output length in bytes ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen) ++{ ++ size_t nblocks; ++ keccak_state state; + -+ polyvec_add(&b, &b, &ep); -+ poly_add(&v, &v, &epp); -+ poly_add(&v, &v, &k); -+ polyvec_reduce(&b); -+ poly_reduce(&v); ++ shake256_absorb_once(&state, in, inlen); ++ nblocks = outlen/SHAKE256_RATE; ++ shake256_squeezeblocks(out, nblocks, &state); ++ outlen -= nblocks*SHAKE256_RATE; ++ out += nblocks*SHAKE256_RATE; ++ shake256_squeeze(out, outlen, &state); ++} + -+ pack_ciphertext(c, &b, &v); - } - --static void vector_decompress(vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_decompress(&a->v[i], bits); -- } +/************************************************* -+* Name: indcpa_dec ++* Name: sha3_256 +* -+* Description: Decryption function of the CPA-secure -+* public-key encryption scheme underlying Kyber. ++* Description: SHA3-256 with non-incremental API +* -+* Arguments: - uint8_t *m: pointer to output decrypted message -+* (of length KYBER_INDCPA_MSGBYTES) -+* - const uint8_t *c: pointer to input ciphertext -+* (of length KYBER_INDCPA_BYTES) -+* - const uint8_t *sk: pointer to input secret key -+* (of length KYBER_INDCPA_SECRETKEYBYTES) ++* Arguments: - uint8_t *h: pointer to output (32 bytes) ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes +**************************************************/ -+static void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], -+ const uint8_t c[KYBER_INDCPA_BYTES], -+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]) ++static void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen) +{ -+ polyvec b, skpv; -+ poly v, mp; ++ unsigned int i; ++ uint64_t s[25]; + -+ unpack_ciphertext(&b, &v, c); -+ unpack_sk(&skpv, sk); ++ keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06); ++ KeccakF1600_StatePermute(s); ++ for(i=0;i<4;i++) ++ store64(h+8*i,s[i]); ++} + -+ polyvec_ntt(&b); -+ polyvec_basemul_acc_montgomery(&mp, &skpv, &b); -+ poly_invntt_tomont(&mp); ++/************************************************* ++* Name: sha3_512 ++* ++* Description: SHA3-512 with non-incremental API ++* ++* Arguments: - uint8_t *h: pointer to output (64 bytes) ++* - const uint8_t *in: pointer to input ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen) ++{ ++ unsigned int i; ++ uint64_t s[25]; + -+ poly_sub(&mp, &v, &mp); -+ poly_reduce(&mp); ++ keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06); ++ KeccakF1600_StatePermute(s); ++ for(i=0;i<8;i++) ++ store64(h+8*i,s[i]); ++} ++ ++// ++// symmetric-shake.c ++// + -+ poly_tomsg(m, &mp); - } - --struct public_key { -- vector t; -- uint8_t rho[32]; -- uint8_t public_key_hash[32]; -- matrix m; --}; +/************************************************* +* Name: kyber_shake128_absorb +* @@ -3360,7 +3336,7 @@ index 776c085f9..5acd45cd9 100644 +* - uint8_t i: additional byte of input +* - uint8_t j: additional byte of input +**************************************************/ -+void kyber_shake128_absorb(keccak_state *state, ++static void kyber_shake128_absorb(keccak_state *state, + const uint8_t seed[KYBER_SYMBYTES], + uint8_t x, + uint8_t y) @@ -3373,14 +3349,7 @@ index 776c085f9..5acd45cd9 100644 + + shake128_absorb_once(state, extseed, sizeof(extseed)); +} - --static struct public_key *public_key_from_external( -- const struct KYBER_public_key *external) { -- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), -- "Kyber public key is too small"); -- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), -- "Kyber public key align incorrect"); -- return (struct public_key *)external; ++ +/************************************************* +* Name: kyber_shake256_prf +* @@ -3400,76 +3369,12 @@ index 776c085f9..5acd45cd9 100644 + extkey[KYBER_SYMBYTES] = nonce; + + shake256(out, outlen, extkey, sizeof(extkey)); - } - --struct private_key { -- struct public_key pub; -- vector s; -- uint8_t fo_failure_secret[32]; --}; ++} ++ +// +// kem.c +// - --static struct private_key *private_key_from_external( -- const struct KYBER_private_key *external) { -- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), -- "Kyber private key too small"); -- static_assert( -- alignof(struct KYBER_private_key) >= alignof(struct private_key), -- "Kyber private key align incorrect"); -- return (struct private_key *)external; --} -- --// Calls |KYBER_generate_key_external_entropy| with random bytes from --// |RAND_bytes|. --void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], -- struct KYBER_private_key *out_private_key) { -- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; -- RAND_bytes(entropy, sizeof(entropy)); -- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, -- entropy); --} -- --static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { -- uint8_t *vector_output; -- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { -- return 0; -- } -- vector_encode(vector_output, &pub->t, kLog2Prime); -- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { -- return 0; -- } -- return 1; --} -- --// Algorithms 4 and 7 of the Kyber spec. Algorithms are combined since key --// generation is not part of the FO transform, and the spec uses Algorithm 7 to --// specify the actual key format. --void KYBER_generate_key_external_entropy( -- uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], -- struct KYBER_private_key *out_private_key, -- const uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]) { -- struct private_key *priv = private_key_from_external(out_private_key); -- uint8_t hashed[64]; -- BORINGSSL_keccak(hashed, sizeof(hashed), entropy, 32, boringssl_sha3_512); -- const uint8_t *const rho = hashed; -- const uint8_t *const sigma = hashed + 32; -- OPENSSL_memcpy(priv->pub.rho, hashed, sizeof(priv->pub.rho)); -- matrix_expand(&priv->pub.m, rho); -- uint8_t counter = 0; -- vector_generate_secret_eta_2(&priv->s, &counter, sigma); -- vector_ntt(&priv->s); -- vector error; -- vector_generate_secret_eta_2(&error, &counter, sigma); -- vector_ntt(&error); -- matrix_mult_transpose(&priv->pub.t, &priv->pub.m, &priv->s); -- vector_add(&priv->pub.t, &error); -- -- CBB cbb; -- CBB_init_fixed(&cbb, out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES); -- if (!kyber_marshal_public_key(&cbb, &priv->pub)) { -- abort(); ++ +// Modified crypto_kem_keypair to BoringSSL style API +void generate_key(struct public_key *out_pub, struct private_key *out_priv, + const uint8_t seed[KYBER_GENERATE_KEY_BYTES]) @@ -3486,13 +3391,11 @@ index 776c085f9..5acd45cd9 100644 + memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, seed+KYBER_SYMBYTES, KYBER_SYMBYTES); +} + -+// Internal version that allows us to select between initial public draft -+// (when ipd=1) and round3 kyber (ipd=0). -+void encap2(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], ++// Modified crypto_kem_enc to BoringSSL style API ++void encap(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], + uint8_t ss[KYBER_KEY_BYTES], + const struct public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES], -+ int ipd) ++ const uint8_t seed[KYBER_ENCAP_BYTES]) +{ + const uint8_t *pk = &in_pub->opaque[0]; + uint8_t *ct = out_ciphertext; @@ -3502,142 +3405,9 @@ index 776c085f9..5acd45cd9 100644 + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, seed, KYBER_SYMBYTES); ++ /* Don't release system RNG output */ ++ hash_h(buf, buf, KYBER_SYMBYTES); + -+ if (ipd == 0) { -+ /* Don't release system RNG output */ -+ hash_h(buf, buf, KYBER_SYMBYTES); - } - -- BORINGSSL_keccak(priv->pub.public_key_hash, sizeof(priv->pub.public_key_hash), -- out_encoded_public_key, KYBER_PUBLIC_KEY_BYTES, -- boringssl_sha3_256); -- OPENSSL_memcpy(priv->fo_failure_secret, entropy + 32, 32); --} -- --void KYBER_public_from_private(struct KYBER_public_key *out_public_key, -- const struct KYBER_private_key *private_key) { -- struct public_key *const pub = public_key_from_external(out_public_key); -- const struct private_key *const priv = private_key_from_external(private_key); -- *pub = priv->pub; --} -- --// Algorithm 5 of the Kyber spec. Encrypts a message with given randomness to --// the ciphertext in |out|. Without applying the Fujisaki-Okamoto transform this --// would not result in a CCA secure scheme, since lattice schemes are vulnerable --// to decryption failure oracles. --static void encrypt_cpa(uint8_t out[KYBER_CIPHERTEXT_BYTES], -- const struct public_key *pub, const uint8_t message[32], -- const uint8_t randomness[32]) { -- uint8_t counter = 0; -- vector secret; -- vector_generate_secret_eta_2(&secret, &counter, randomness); -- vector_ntt(&secret); -- vector error; -- vector_generate_secret_eta_2(&error, &counter, randomness); -- uint8_t input[33]; -- OPENSSL_memcpy(input, randomness, 32); -- input[32] = counter; -- scalar scalar_error; -- scalar_centered_binomial_distribution_eta_2_with_prf(&scalar_error, input); -- vector u; -- matrix_mult(&u, &pub->m, &secret); -- vector_inverse_ntt(&u); -- vector_add(&u, &error); -- scalar v; -- scalar_inner_product(&v, &pub->t, &secret); -- scalar_inverse_ntt(&v); -- scalar_add(&v, &scalar_error); -- scalar expanded_message; -- scalar_decode_1(&expanded_message, message); -- scalar_decompress(&expanded_message, 1); -- scalar_add(&v, &expanded_message); -- vector_compress(&u, kDU); -- vector_encode(out, &u, kDU); -- scalar_compress(&v, kDV); -- scalar_encode(out + kCompressedVectorSize, &v, kDV); --} -- --// Calls KYBER_encap_external_entropy| with random bytes from |RAND_bytes| --void KYBER_encap(uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], -- uint8_t *out_shared_secret, size_t out_shared_secret_len, -- const struct KYBER_public_key *public_key) { -- uint8_t entropy[KYBER_ENCAP_ENTROPY]; -- RAND_bytes(entropy, KYBER_ENCAP_ENTROPY); -- KYBER_encap_external_entropy(out_ciphertext, out_shared_secret, -- out_shared_secret_len, public_key, entropy); --} -- --// Algorithm 8 of the Kyber spec, safe for line 2 of the spec. The spec there --// hashes the output of the system's random number generator, since the FO --// transform will reveal it to the decrypting party. There is no reason to do --// this when a secure random number generator is used. When an insecure random --// number generator is used, the caller should switch to a secure one before --// calling this method. --void KYBER_encap_external_entropy( -- uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t *out_shared_secret, -- size_t out_shared_secret_len, const struct KYBER_public_key *public_key, -- const uint8_t entropy[KYBER_ENCAP_ENTROPY]) { -- const struct public_key *pub = public_key_from_external(public_key); -- uint8_t input[64]; -- OPENSSL_memcpy(input, entropy, KYBER_ENCAP_ENTROPY); -- OPENSSL_memcpy(input + KYBER_ENCAP_ENTROPY, pub->public_key_hash, -- sizeof(input) - KYBER_ENCAP_ENTROPY); -- uint8_t prekey_and_randomness[64]; -- BORINGSSL_keccak(prekey_and_randomness, sizeof(prekey_and_randomness), input, -- sizeof(input), boringssl_sha3_512); -- encrypt_cpa(out_ciphertext, pub, entropy, prekey_and_randomness + 32); -- BORINGSSL_keccak(prekey_and_randomness + 32, 32, out_ciphertext, -- KYBER_CIPHERTEXT_BYTES, boringssl_sha3_256); -- BORINGSSL_keccak(out_shared_secret, out_shared_secret_len, -- prekey_and_randomness, sizeof(prekey_and_randomness), -- boringssl_shake256); --} -- --// Algorithm 6 of the Kyber spec. --static void decrypt_cpa(uint8_t out[32], const struct private_key *priv, -- const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES]) { -- vector u; -- vector_decode(&u, ciphertext, kDU); -- vector_decompress(&u, kDU); -- vector_ntt(&u); -- scalar v; -- scalar_decode(&v, ciphertext + kCompressedVectorSize, kDV); -- scalar_decompress(&v, kDV); -- scalar mask; -- scalar_inner_product(&mask, &priv->s, &u); -- scalar_inverse_ntt(&mask); -- scalar_sub(&v, &mask); -- scalar_compress(&v, 1); -- scalar_encode_1(out, &v); --} -- --// Algorithm 9 of the Kyber spec, performing the FO transform by running --// encrypt_cpa on the decrypted message. The spec does not allow the decryption --// failure to be passed on to the caller, and instead returns a result that is --// deterministic but unpredictable to anyone without knowledge of the private --// key. --void KYBER_decap(uint8_t *out_shared_secret, size_t out_shared_secret_len, -- const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES], -- const struct KYBER_private_key *private_key) { -- const struct private_key *priv = private_key_from_external(private_key); -- uint8_t decrypted[64]; -- decrypt_cpa(decrypted, priv, ciphertext); -- OPENSSL_memcpy(decrypted + 32, priv->pub.public_key_hash, -- sizeof(decrypted) - 32); -- uint8_t prekey_and_randomness[64]; -- BORINGSSL_keccak(prekey_and_randomness, sizeof(prekey_and_randomness), -- decrypted, sizeof(decrypted), boringssl_sha3_512); -- uint8_t expected_ciphertext[KYBER_CIPHERTEXT_BYTES]; -- encrypt_cpa(expected_ciphertext, &priv->pub, decrypted, -- prekey_and_randomness + 32); -- uint8_t mask = -- constant_time_eq_int_8(CRYPTO_memcmp(ciphertext, expected_ciphertext, -- sizeof(expected_ciphertext)), -- 0); -- uint8_t input[64]; -- for (int i = 0; i < 32; i++) { -- input[i] = constant_time_select_8(mask, prekey_and_randomness[i], -- priv->fo_failure_secret[i]); + /* Multitarget countermeasure for coins + contributory KEM */ + hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + hash_g(kr, buf, 2*KYBER_SYMBYTES); @@ -3645,44 +3415,16 @@ index 776c085f9..5acd45cd9 100644 + /* coins are in kr+KYBER_SYMBYTES */ + indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); + -+ if (ipd == 1) { -+ memcpy(ss, kr, KYBER_SYMBYTES); -+ } else { -+ /* overwrite coins in kr with H(c) */ -+ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); -+ /* hash concatenation of pre-k and H(c) to k */ -+ kdf(ss, kr, 2*KYBER_SYMBYTES); - } -- BORINGSSL_keccak(input + 32, 32, ciphertext, KYBER_CIPHERTEXT_BYTES, -- boringssl_sha3_256); -- BORINGSSL_keccak(out_shared_secret, out_shared_secret_len, input, -- sizeof(input), boringssl_shake256); - } - --int KYBER_marshal_public_key(CBB *out, -- const struct KYBER_public_key *public_key) { -- return kyber_marshal_public_key(out, public_key_from_external(public_key)); -+// Modified crypto_kem_enc to BoringSSL style API. -+void encap(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], -+ uint8_t ss[KYBER_KEY_BYTES], -+ const struct public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES]) { -+ encap2(out_ciphertext, ss, in_pub, seed, 0); - } - --// kyber_parse_public_key_no_hash parses |in| into |pub| but doesn't calculate --// the value of |pub->public_key_hash|. --static int kyber_parse_public_key_no_hash(struct public_key *pub, CBS *in) { -- CBS t_bytes; -- if (!CBS_get_bytes(in, &t_bytes, kEncodedVectorSize) || -- !vector_decode(&pub->t, CBS_data(&t_bytes), kLog2Prime) || -- !CBS_copy_bytes(in, pub->rho, sizeof(pub->rho))) { -- return 0; -+// Internal version that allows us to select between initial public draft -+// (when ipd=1) and round3 kyber (ipd=0). -+void decap2(uint8_t out_shared_key[KYBER_SSBYTES], ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); ++} ++ ++// Modified crypto_kem_decap to BoringSSL style API ++void decap(uint8_t out_shared_key[KYBER_SSBYTES], + const struct private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len, int ipd) ++ const uint8_t *ct, size_t ciphertext_len) +{ + uint8_t *ss = out_shared_key; + const uint8_t *sk = &in_priv->opaque[0]; @@ -3708,89 +3450,17 @@ index 776c085f9..5acd45cd9 100644 + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); } -- matrix_expand(&pub->m, pub->rho); -- return 1; --} - --int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { -- struct public_key *pub = public_key_from_external(public_key); -- CBS orig_in = *in; -- if (!kyber_parse_public_key_no_hash(pub, in) || // -- CBS_len(in) != 0) { -- return 0; -+ if (ipd == 1) { -+ /* Compute shared secret in case of rejection: ss2 = PRF(z || c). */ -+ uint8_t ss2[KYBER_SYMBYTES]; -+ keccak_state ks; -+ shake256_init(&ks); -+ shake256_absorb( -+ &ks, -+ sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, -+ KYBER_SYMBYTES -+ ); -+ shake256_absorb(&ks, ct, ciphertext_len); -+ shake256_finalize(&ks); -+ shake256_squeeze(ss2, KYBER_SYMBYTES, &ks); -+ -+ /* Set ss2 to the real shared secret if c = c' */ -+ cmov(ss2, kr, KYBER_SYMBYTES, 1-fail); -+ memcpy(ss, ss2, KYBER_SYMBYTES); -+ } else { -+ /* overwrite coins in kr with H(c) */ -+ hash_h(kr+KYBER_SYMBYTES, ct, ciphertext_len); -+ -+ /* Overwrite pre-k with z on re-encryption failure */ -+ cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, fail); -+ -+ /* hash concatenation of pre-k and H(c) to k */ -+ kdf(ss, kr, 2*KYBER_SYMBYTES); - } -- BORINGSSL_keccak(pub->public_key_hash, sizeof(pub->public_key_hash), -- CBS_data(&orig_in), CBS_len(&orig_in), boringssl_sha3_256); -- return 1; - } - --int KYBER_marshal_private_key(CBB *out, -- const struct KYBER_private_key *private_key) { -- const struct private_key *const priv = private_key_from_external(private_key); -- uint8_t *s_output; -- if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) { -- return 0; -- } -- vector_encode(s_output, &priv->s, kLog2Prime); -- if (!kyber_marshal_public_key(out, &priv->pub) || -- !CBB_add_bytes(out, priv->pub.public_key_hash, -- sizeof(priv->pub.public_key_hash)) || -- !CBB_add_bytes(out, priv->fo_failure_secret, -- sizeof(priv->fo_failure_secret))) { -- return 0; -- } -- return 1; --} -- --int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, -- CBS *in) { -- struct private_key *const priv = private_key_from_external(out_private_key); -- -- CBS s_bytes; -- if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) || -- !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) || -- !kyber_parse_public_key_no_hash(&priv->pub, in) || -- !CBS_copy_bytes(in, priv->pub.public_key_hash, -- sizeof(priv->pub.public_key_hash)) || -- !CBS_copy_bytes(in, priv->fo_failure_secret, -- sizeof(priv->fo_failure_secret)) || -- CBS_len(in) != 0) { -- return 0; -- } - return 1; -+// Modified crypto_kem_decap to BoringSSL style API -+void decap(uint8_t out_shared_key[KYBER_SSBYTES], -+ const struct private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len) { -+ decap2(out_shared_key, in_priv, ct, ciphertext_len, 0); -+} + ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, ciphertext_len); ++ ++ /* Overwrite pre-k with z on re-encryption failure */ ++ cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, fail); ++ ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); ++} + +void marshal_public_key(uint8_t out[KYBER_PUBLICKEYBYTES], + const struct public_key *in_pub) { @@ -3801,41 +3471,6 @@ index 776c085f9..5acd45cd9 100644 + const uint8_t in[KYBER_PUBLICKEYBYTES]) { + memcpy(&out->opaque, in, KYBER_PUBLICKEYBYTES); } -diff --git a/src/crypto/kyber/kyber.h b/src/crypto/kyber/kyber.h -new file mode 100644 -index 000000000..16e47d582 ---- /dev/null -+++ b/src/crypto/kyber/kyber.h -@@ -0,0 +1,29 @@ -+#ifndef OPENSSL_HEADER_KYBER_KYBER_H -+#define OPENSSL_HEADER_KYBER_KYBER_H -+ -+#include -+ -+#include -+#include -+ -+void KYBER512_encap2(uint8_t out_ciphertext[KYBER512_CIPHERTEXT_BYTES], -+ uint8_t ss[KYBER_KEY_BYTES], -+ const struct KYBER512_public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES], -+ int ipd); -+ -+void KYBER512_decap2(uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER512_private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len, int ipd); -+ -+void KYBER768_encap2(uint8_t out_ciphertext[KYBER768_CIPHERTEXT_BYTES], -+ uint8_t ss[KYBER_KEY_BYTES], -+ const struct KYBER768_public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES], -+ int ipd); -+ -+void KYBER768_decap2(uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct KYBER768_private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len, int ipd); -+ -+#endif diff --git a/src/crypto/kyber/kyber512.c b/src/crypto/kyber/kyber512.c new file mode 100644 index 000000000..21eed11a2 @@ -4093,7 +3728,7 @@ index eb76b5bd7..000000000 - FileTestGTest("crypto/kyber/kyber_tests.txt", KyberFileTest); -} diff --git a/src/crypto/obj/obj_dat.h b/src/crypto/obj/obj_dat.h -index 654b3c08e..0d3d9f24f 100644 +index 654b3c08e..06f80f971 100644 --- a/src/crypto/obj/obj_dat.h +++ b/src/crypto/obj/obj_dat.h @@ -57,7 +57,7 @@ @@ -4101,11 +3736,11 @@ index 654b3c08e..0d3d9f24f 100644 -#define NUM_NID 965 -+#define NUM_NID 969 ++#define NUM_NID 968 static const uint8_t kObjectData[] = { /* NID_rsadsi */ -@@ -8784,6 +8784,13 @@ static const ASN1_OBJECT kObjects[NUM_NID] = { +@@ -8784,6 +8784,12 @@ static const ASN1_OBJECT kObjects[NUM_NID] = { {"HKDF", "hkdf", NID_hkdf, 0, NULL, 0}, {"X25519Kyber768Draft00", "X25519Kyber768Draft00", NID_X25519Kyber768Draft00, 0, NULL, 0}, @@ -4115,19 +3750,10 @@ index 654b3c08e..0d3d9f24f 100644 + NULL, 0}, + {"X25519Kyber768Draft00Old", "X25519Kyber768Draft00Old", + NID_X25519Kyber768Draft00Old, 0, NULL, 0}, -+ {"IPDWing", "IPDWing", NID_IPDWing, 0, NULL, 0}, }; static const uint16_t kNIDsInShortNameOrder[] = { -@@ -8889,6 +8896,7 @@ static const uint16_t kNIDsInShortNameOrder[] = { - 35 /* IDEA-CFB */, - 36 /* IDEA-ECB */, - 46 /* IDEA-OFB */, -+ 968 /* IPDWing */, - 181 /* ISO */, - 183 /* ISO-US */, - 645 /* ITU-T */, -@@ -8916,6 +8924,7 @@ static const uint16_t kNIDsInShortNameOrder[] = { +@@ -8916,6 +8922,7 @@ static const uint16_t kNIDsInShortNameOrder[] = { 18 /* OU */, 749 /* Oakley-EC2N-3 */, 750 /* Oakley-EC2N-4 */, @@ -4135,7 +3761,7 @@ index 654b3c08e..0d3d9f24f 100644 9 /* PBE-MD2-DES */, 168 /* PBE-MD2-RC2-64 */, 10 /* PBE-MD5-DES */, -@@ -8982,7 +8991,9 @@ static const uint16_t kNIDsInShortNameOrder[] = { +@@ -8982,7 +8989,9 @@ static const uint16_t kNIDsInShortNameOrder[] = { 458 /* UID */, 0 /* UNDEF */, 948 /* X25519 */, @@ -4145,15 +3771,7 @@ index 654b3c08e..0d3d9f24f 100644 961 /* X448 */, 11 /* X500 */, 378 /* X500algorithms */, -@@ -9787,6 +9798,7 @@ static const uint16_t kNIDsInLongNameOrder[] = { - 431 /* Hold Instruction None */, - 433 /* Hold Instruction Reject */, - 634 /* ICC or token signature */, -+ 968 /* IPDWing */, - 294 /* IPSec End System */, - 295 /* IPSec Tunnel */, - 296 /* IPSec User */, -@@ -9829,6 +9841,7 @@ static const uint16_t kNIDsInLongNameOrder[] = { +@@ -9829,6 +9838,7 @@ static const uint16_t kNIDsInLongNameOrder[] = { 366 /* OCSP Nonce */, 371 /* OCSP Service Locator */, 180 /* OCSP Signing */, @@ -4161,7 +3779,7 @@ index 654b3c08e..0d3d9f24f 100644 161 /* PBES2 */, 69 /* PBKDF2 */, 162 /* PBMAC1 */, -@@ -9853,7 +9866,9 @@ static const uint16_t kNIDsInLongNameOrder[] = { +@@ -9853,7 +9863,9 @@ static const uint16_t kNIDsInLongNameOrder[] = { 133 /* Time Stamping */, 375 /* Trust Root */, 948 /* X25519 */, @@ -4172,22 +3790,21 @@ index 654b3c08e..0d3d9f24f 100644 12 /* X509 */, 402 /* X509v3 AC Targeting */, diff --git a/src/crypto/obj/obj_mac.num b/src/crypto/obj/obj_mac.num -index a0519acee..019770f16 100644 +index a0519acee..caeb5eaed 100644 --- a/src/crypto/obj/obj_mac.num +++ b/src/crypto/obj/obj_mac.num -@@ -952,3 +952,7 @@ X448 961 +@@ -952,3 +952,6 @@ X448 961 sha512_256 962 hkdf 963 X25519Kyber768Draft00 964 +X25519Kyber512Draft00 965 +P256Kyber768Draft00 966 +X25519Kyber768Draft00Old 967 -+IPDWing 968 diff --git a/src/crypto/obj/objects.txt b/src/crypto/obj/objects.txt -index 3ad32ea3d..475d278df 100644 +index 3ad32ea3d..aa1404d83 100644 --- a/src/crypto/obj/objects.txt +++ b/src/crypto/obj/objects.txt -@@ -1332,8 +1332,12 @@ secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme +@@ -1332,8 +1332,11 @@ secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme : dh-std-kdf : dh-cofactor-kdf @@ -4197,12 +3814,11 @@ index 3ad32ea3d..475d278df 100644 : X25519Kyber768Draft00 + : P256Kyber768Draft00 + : X25519Kyber768Draft00Old -+ : IPDWing # See RFC 8410. 1 3 101 110 : X25519 diff --git a/src/include/openssl/kyber.h b/src/include/openssl/kyber.h -index cafae9d17..1c889a075 100644 +index cafae9d17..074ac5906 100644 --- a/src/include/openssl/kyber.h +++ b/src/include/openssl/kyber.h @@ -1,17 +1,3 @@ @@ -4223,23 +3839,20 @@ index cafae9d17..1c889a075 100644 #ifndef OPENSSL_HEADER_KYBER_H #define OPENSSL_HEADER_KYBER_H -@@ -21,105 +7,157 @@ +@@ -21,105 +7,100 @@ extern "C" { #endif -- --// Kyber768. +#define KYBER512_PUBLIC_KEY_BYTES 800 +#define KYBER512_CIPHERTEXT_BYTES 768 +#define KYBER512_PRIVATE_KEY_BYTES 1632 +#define KYBER768_PUBLIC_KEY_BYTES 1184 +#define KYBER768_CIPHERTEXT_BYTES 1088 +#define KYBER768_PRIVATE_KEY_BYTES 2400 -+#define IPDWING_PUBLIC_KEY_BYTES 1216 -+#define IPDWING_CIPHERTEXT_BYTES 1120 -+#define IPDWING_PRIVATE_KEY_BYTES 2464 - +-// Kyber768. +- +- -// KYBER_public_key contains a Kyber768 public key. The contents of this -// object should never leave the address space since the format is unstable. -struct KYBER_public_key { @@ -4261,10 +3874,11 @@ index cafae9d17..1c889a075 100644 +struct KYBER768_private_key { + uint8_t opaque[KYBER768_PRIVATE_KEY_BYTES]; +}; -+struct IPDWING_private_key { -+ struct KYBER768_private_key m; -+ uint8_t x[32]; -+ uint8_t xpub[32]; ++struct KYBER512_public_key { ++ uint8_t opaque[KYBER512_PUBLIC_KEY_BYTES]; ++}; ++struct KYBER768_public_key { ++ uint8_t opaque[KYBER768_PUBLIC_KEY_BYTES]; }; -// KYBER_PUBLIC_KEY_BYTES is the number of bytes in an encoded Kyber768 public @@ -4343,39 +3957,18 @@ index cafae9d17..1c889a075 100644 -// there are trailing bytes in |in|. -OPENSSL_EXPORT int KYBER_parse_private_key( - struct KYBER_private_key *out_private_key, CBS *in); -+struct KYBER512_public_key { -+ uint8_t opaque[KYBER512_PUBLIC_KEY_BYTES]; -+}; -+struct KYBER768_public_key { -+ uint8_t opaque[KYBER768_PUBLIC_KEY_BYTES]; -+}; -+struct IPDWING_public_key { -+ struct KYBER768_public_key m; -+ uint8_t x[32]; -+}; - +- +// KYBER_GENERATE_KEY_BYTES is the number of bytes of entropy needed to +// generate a keypair. +#define KYBER_GENERATE_KEY_BYTES 64 + -+// IPDWING_GENERATE_KEY_BYTES is the number of bytes of entropy needed to -+// generate a keypair. -+#define IPDWING_GENERATE_KEY_BYTES 96 -+ +// KYBER_ENCAP_BYTES is the number of bytes of entropy needed to encapsulate a +// session key. +#define KYBER_ENCAP_BYTES 32 + -+// IPDWING_ENCAP_BYTES is the number of bytes of entropy needed to encapsulate a -+// session key. -+#define IPDWING_ENCAP_BYTES 64 -+ +// KYBER_KEY_BYTES is the number of bytes in a shared key. +#define KYBER_KEY_BYTES 32 + -+// IPDWING_KEY_BYTES is the number of bytes in a shared key. -+#define IPDWING_KEY_BYTES 32 -+ +// KYBER512_generate_key is a deterministic function that outputs a public and +// private key based on the given entropy. +OPENSSL_EXPORT void KYBER512_generate_key( @@ -4388,12 +3981,6 @@ index cafae9d17..1c889a075 100644 + struct KYBER768_public_key *out_pub, struct KYBER768_private_key *out_priv, + const uint8_t input[KYBER_GENERATE_KEY_BYTES]); + -+// IPDWING_generate_key is a deterministic function that outputs a public and -+// private key based on the given entropy. -+OPENSSL_EXPORT void IPDWING_generate_key( -+ struct IPDWING_public_key *out_pub, struct IPDWING_private_key *out_priv, -+ const uint8_t input[IPDWING_GENERATE_KEY_BYTES]); -+ +// KYBER512_encap is a deterministic function the generates and encrypts a random +// session key from the given entropy, writing those values to |out_shared_key| +// and |out_ciphertext|, respectively. @@ -4410,14 +3997,6 @@ index cafae9d17..1c889a075 100644 + const struct KYBER768_public_key *in_pub, + const uint8_t in[KYBER_ENCAP_BYTES]); + -+// IPDWING_encap is a deterministic function the generates and encrypts a random -+// session key from the given entropy, writing those values to |out_shared_key| -+// and |out_ciphertext|, respectively. -+OPENSSL_EXPORT void IPDWING_encap(uint8_t out_ciphertext[IPDWING_CIPHERTEXT_BYTES], -+ uint8_t out_shared_key[IPDWING_KEY_BYTES], -+ const struct IPDWING_public_key *in_pub, -+ const uint8_t in[IPDWING_ENCAP_BYTES]); -+ +// KYBER_decap decrypts a session key from |ciphertext_len| bytes of +// |ciphertext|. If the ciphertext is valid, the decrypted key is written to +// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept @@ -4428,7 +4007,7 @@ index cafae9d17..1c889a075 100644 + const struct KYBER512_private_key *in_priv, + const uint8_t *ciphertext, size_t ciphertext_len); + -+// KYBER768_decap decrypts a session key from |ciphertext_len| bytes of ++// KYBER_decap decrypts a session key from |ciphertext_len| bytes of +// |ciphertext|. If the ciphertext is valid, the decrypted key is written to +// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept +// in |in_priv|) is written. If the ciphertext is the wrong length then it will @@ -4438,16 +4017,6 @@ index cafae9d17..1c889a075 100644 + const struct KYBER768_private_key *in_priv, + const uint8_t *ciphertext, size_t ciphertext_len); + -+// IPDWING_decap decrypts a session key from IPDWING_CIPHERTEXT_BYTES bytes of -+// |ciphertext|. If the ciphertext is valid, the decrypted key is written to -+// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept -+// in |in_priv|) is written. If the ciphertext is the wrong length then it will -+// leak which was done via side-channels. Otherwise it should perform either -+// action in constant-time. -+OPENSSL_EXPORT void IPDWING_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], -+ const struct IPDWING_private_key *in_priv, -+ const uint8_t *ciphertext); -+ +// KYBER512_marshal_public_key serialises |in_pub| to |out|. +OPENSSL_EXPORT void KYBER512_marshal_public_key( + uint8_t out[KYBER512_PUBLIC_KEY_BYTES], const struct KYBER512_public_key *in_pub); @@ -4456,10 +4025,6 @@ index cafae9d17..1c889a075 100644 +OPENSSL_EXPORT void KYBER768_marshal_public_key( + uint8_t out[KYBER768_PUBLIC_KEY_BYTES], const struct KYBER768_public_key *in_pub); + -+// IPDWING_marshal_public_key serialises |in_pub| to |out|. -+OPENSSL_EXPORT void IPDWING_marshal_public_key( -+ uint8_t out[IPDWING_PUBLIC_KEY_BYTES], const struct IPDWING_public_key *in_pub); -+ +// KYBER512_parse_public_key sets |*out| to the public-key encoded in |in|. +OPENSSL_EXPORT void KYBER512_parse_public_key( + struct KYBER512_public_key *out, const uint8_t in[KYBER512_PUBLIC_KEY_BYTES]); @@ -4467,18 +4032,14 @@ index cafae9d17..1c889a075 100644 +// KYBER768_parse_public_key sets |*out| to the public-key encoded in |in|. +OPENSSL_EXPORT void KYBER768_parse_public_key( + struct KYBER768_public_key *out, const uint8_t in[KYBER768_PUBLIC_KEY_BYTES]); -+ -+// IPDWING_parse_public_key sets |*out| to the public-key encoded in |in|. -+OPENSSL_EXPORT void IPDWING_parse_public_key( -+ struct IPDWING_public_key *out, const uint8_t in[IPDWING_PUBLIC_KEY_BYTES]); #if defined(__cplusplus) } // extern C diff --git a/src/include/openssl/nid.h b/src/include/openssl/nid.h -index 4dd8841b1..09912d8bb 100644 +index 4dd8841b1..8237efb74 100644 --- a/src/include/openssl/nid.h +++ b/src/include/openssl/nid.h -@@ -4255,6 +4255,18 @@ extern "C" { +@@ -4255,6 +4255,15 @@ extern "C" { #define SN_X25519Kyber768Draft00 "X25519Kyber768Draft00" #define NID_X25519Kyber768Draft00 964 @@ -4490,25 +4051,21 @@ index 4dd8841b1..09912d8bb 100644 + +#define SN_X25519Kyber768Draft00Old "X25519Kyber768Draft00Old" +#define NID_X25519Kyber768Draft00Old 967 -+ -+#define SN_IPDWing "IPDWing" -+#define NID_IPDWing 968 + #if defined(__cplusplus) } /* extern C */ diff --git a/src/include/openssl/ssl.h b/src/include/openssl/ssl.h -index 53aa9b453..3091c6849 100644 +index 53aa9b453..8233ad210 100644 --- a/src/include/openssl/ssl.h +++ b/src/include/openssl/ssl.h -@@ -2378,6 +2378,10 @@ OPENSSL_EXPORT int SSL_set1_curves_list(SSL *ssl, const char *curves); +@@ -2378,6 +2378,9 @@ OPENSSL_EXPORT int SSL_set1_curves_list(SSL *ssl, const char *curves); #define SSL_CURVE_SECP521R1 25 #define SSL_CURVE_X25519 29 #define SSL_CURVE_X25519_KYBER768_DRAFT00 0x6399 +#define SSL_CURVE_X25519_KYBER512_DRAFT00 0xfe30 +#define SSL_CURVE_X25519_KYBER768_DRAFT00_OLD 0xfe31 +#define SSL_CURVE_P256_KYBER768_DRAFT00 0xfe32 -+#define SSL_CURVE_IPDWING 0xfe41 // SSL_get_curve_id returns the ID of the curve used by |ssl|'s most recently // completed handshake or 0 if not applicable. @@ -4526,36 +4083,40 @@ index 5c7e881bf..3c0770cf3 100644 crypto/pkcs8/test/no_encryption.p12 crypto/pkcs8/test/nss.p12 diff --git a/src/ssl/extensions.cc b/src/ssl/extensions.cc -index 5ee280221..cf165df1f 100644 +index 5ee280221..0a706c411 100644 --- a/src/ssl/extensions.cc +++ b/src/ssl/extensions.cc -@@ -207,6 +207,10 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { +@@ -207,6 +207,9 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { static bool is_post_quantum_group(uint16_t id) { switch (id) { case SSL_CURVE_X25519_KYBER768_DRAFT00: + case SSL_CURVE_X25519_KYBER768_DRAFT00_OLD: + case SSL_CURVE_X25519_KYBER512_DRAFT00: + case SSL_CURVE_P256_KYBER768_DRAFT00: -+ case SSL_CURVE_IPDWING: return true; default: return false; diff --git a/src/ssl/ssl_key_share.cc b/src/ssl/ssl_key_share.cc -index 09a9ad380..d9d3b9032 100644 +index 09a9ad380..f7d2226e3 100644 --- a/src/ssl/ssl_key_share.cc +++ b/src/ssl/ssl_key_share.cc -@@ -193,63 +193,384 @@ class X25519KeyShare : public SSLKeyShare { +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -193,63 +194,384 @@ class X25519KeyShare : public SSLKeyShare { uint8_t private_key_[32]; }; -class X25519Kyber768KeyShare : public SSLKeyShare { +class P256Kyber768Draft00KeyShare : public SSLKeyShare { - public: -- X25519Kyber768KeyShare() {} ++ public: + P256Kyber768Draft00KeyShare() {} - -- uint16_t GroupID() const override { -- return SSL_CURVE_X25519_KYBER768_DRAFT00; ++ + uint16_t GroupID() const override { return SSL_CURVE_P256_KYBER768_DRAFT00; } + + bool Generate(CBB *out) override { @@ -4605,8 +4166,8 @@ index 09a9ad380..d9d3b9032 100644 + } + + return true; - } - ++ } ++ + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { + assert(!p256_private_key_); @@ -4769,7 +4330,8 @@ index 09a9ad380..d9d3b9032 100644 +}; + +class X25519Kyber768Draft00KeyShare : public SSLKeyShare { -+ public: + public: +- X25519Kyber768KeyShare() {} + X25519Kyber768Draft00KeyShare(uint16_t group_id) : group_id_(group_id) { + assert(group_id == SSL_CURVE_X25519_KYBER768_DRAFT00 + || group_id == SSL_CURVE_X25519_KYBER768_DRAFT00_OLD); @@ -4777,12 +4339,10 @@ index 09a9ad380..d9d3b9032 100644 + + uint16_t GroupID() const override { return group_id_; } + - bool Generate(CBB *out) override { - uint8_t x25519_public_key[32]; - X25519_keypair(x25519_public_key, x25519_private_key_); - -- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; -- KYBER_generate_key(kyber_public_key, &kyber_private_key_); ++ bool Generate(CBB *out) override { ++ uint8_t x25519_public_key[32]; ++ X25519_keypair(x25519_public_key, x25519_private_key_); ++ + uint8_t kyber_entropy[KYBER_GENERATE_KEY_BYTES]; + KYBER768_public_key kyber_public_key; + RAND_bytes(kyber_entropy, sizeof(kyber_entropy)); @@ -4790,19 +4350,16 @@ index 09a9ad380..d9d3b9032 100644 + + uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; + KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); - - if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || -- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { ++ ++ if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || + !CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { - return false; - } - - return true; - } - -- bool Encap(CBB *out_ciphertext, Array *out_secret, -- uint8_t *out_alert, Span peer_key) override { ++ return false; ++ } ++ ++ return true; ++ } ++ + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { + Array secret; @@ -4863,11 +4420,13 @@ index 09a9ad380..d9d3b9032 100644 + + KYBER768_decap(secret.data() + 32, &kyber_private_key_, + peer_key.data() + 32, peer_key.size() - 32); -+ + +- uint16_t GroupID() const override { +- return SSL_CURVE_X25519_KYBER768_DRAFT00; + *out_secret = std::move(secret); + return true; -+ } -+ + } + + private: + uint8_t x25519_private_key_[32]; + KYBER768_private_key kyber_private_key_; @@ -4880,10 +4439,12 @@ index 09a9ad380..d9d3b9032 100644 + + uint16_t GroupID() const override { return SSL_CURVE_X25519_KYBER512_DRAFT00; } + -+ bool Generate(CBB *out) override { -+ uint8_t x25519_public_key[32]; -+ X25519_keypair(x25519_public_key, x25519_private_key_); -+ + bool Generate(CBB *out) override { + uint8_t x25519_public_key[32]; + X25519_keypair(x25519_public_key, x25519_private_key_); + +- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; +- KYBER_generate_key(kyber_public_key, &kyber_private_key_); + uint8_t kyber_entropy[KYBER_GENERATE_KEY_BYTES]; + KYBER512_public_key kyber_public_key; + RAND_bytes(kyber_entropy, sizeof(kyber_entropy)); @@ -4891,16 +4452,19 @@ index 09a9ad380..d9d3b9032 100644 + + uint8_t kyber_public_key_bytes[KYBER512_PUBLIC_KEY_BYTES]; + KYBER512_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); -+ -+ if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || + + if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || +- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { + !CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { -+ return false; -+ } -+ -+ return true; -+ } -+ + return false; + } + + return true; + } + +- bool Encap(CBB *out_ciphertext, Array *out_secret, +- uint8_t *out_alert, Span peer_key) override { + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { Array secret; @@ -4957,7 +4521,7 @@ index 09a9ad380..d9d3b9032 100644 return false; } -@@ -258,30 +579,111 @@ class X25519Kyber768KeyShare : public SSLKeyShare { +@@ -258,30 +580,32 @@ class X25519Kyber768KeyShare : public SSLKeyShare { } bool Decap(Array *out_secret, uint8_t *out_alert, @@ -4994,89 +4558,10 @@ index 09a9ad380..d9d3b9032 100644 uint8_t x25519_private_key_[32]; - KYBER_private_key kyber_private_key_; + KYBER512_private_key kyber_private_key_; -+}; -+ -+class IPDWingKeyShare : public SSLKeyShare { -+ public: -+ IPDWingKeyShare() {} -+ -+ uint16_t GroupID() const override { return SSL_CURVE_IPDWING; } -+ -+ bool Generate(CBB *out) override { -+ uint8_t entropy[IPDWING_GENERATE_KEY_BYTES]; -+ IPDWING_public_key public_key; -+ RAND_bytes(entropy, sizeof(entropy)); -+ IPDWING_generate_key(&public_key, &private_key_, entropy); -+ -+ uint8_t public_key_bytes[IPDWING_PUBLIC_KEY_BYTES]; -+ IPDWING_marshal_public_key(public_key_bytes, &public_key); -+ -+ if(!CBB_add_bytes(out, public_key_bytes, sizeof(public_key_bytes))) { -+ return false; -+ } -+ -+ return true; -+ } -+ -+ bool Encap(CBB *out_public_key, Array *out_secret, -+ uint8_t *out_alert, Span peer_key) override { -+ Array secret; -+ *out_alert = SSL_AD_INTERNAL_ERROR; -+ if (!secret.Init(IPDWING_KEY_BYTES)) { -+ OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); -+ return false; -+ } -+ -+ IPDWING_public_key peer_public_key; -+ if (peer_key.size() != IPDWING_PUBLIC_KEY_BYTES) { -+ *out_alert = SSL_AD_DECODE_ERROR; -+ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); -+ return false; -+ } -+ -+ IPDWING_parse_public_key(&peer_public_key, peer_key.data()); -+ -+ uint8_t ciphertext[IPDWING_CIPHERTEXT_BYTES]; -+ uint8_t entropy[IPDWING_ENCAP_BYTES]; -+ RAND_bytes(entropy, sizeof(entropy)); -+ -+ IPDWING_encap(ciphertext, secret.data(), &peer_public_key, entropy); -+ if(!CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { -+ return false; -+ } -+ -+ *out_secret = std::move(secret); -+ return true; -+ } -+ -+ bool Decap(Array *out_secret, uint8_t *out_alert, -+ Span peer_key) override { -+ *out_alert = SSL_AD_INTERNAL_ERROR; -+ -+ Array secret; -+ if (!secret.Init(IPDWING_KEY_BYTES)) { -+ OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); -+ return false; -+ } -+ -+ if (peer_key.size() != IPDWING_CIPHERTEXT_BYTES) { -+ *out_alert = SSL_AD_DECODE_ERROR; -+ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); -+ return false; -+ } -+ -+ IPDWING_decap(secret.data(), &private_key_, peer_key.data()); -+ -+ *out_secret = std::move(secret); -+ return true; -+ } -+ -+ private: -+ IPDWING_private_key private_key_; }; constexpr NamedGroup kNamedGroups[] = { -@@ -290,8 +692,15 @@ constexpr NamedGroup kNamedGroups[] = { +@@ -290,8 +614,14 @@ constexpr NamedGroup kNamedGroups[] = { {NID_secp384r1, SSL_CURVE_SECP384R1, "P-384", "secp384r1"}, {NID_secp521r1, SSL_CURVE_SECP521R1, "P-521", "secp521r1"}, {NID_X25519, SSL_CURVE_X25519, "X25519", "x25519"}, @@ -5088,12 +4573,11 @@ index 09a9ad380..d9d3b9032 100644 + {NID_X25519Kyber768Draft00Old, SSL_CURVE_X25519_KYBER768_DRAFT00_OLD, + "X25519Kyber768Draft00Old", "Xyber768D00Old"}, + {NID_P256Kyber768Draft00, SSL_CURVE_P256_KYBER768_DRAFT00, -+ "P256Kyber768Draft00", "P256Kyber768D00"}, -+ {NID_IPDWing, SSL_CURVE_IPDWING, "IPDWing", ""} ++ "P256Kyber768Draft00", "P256Kyber768D00"} }; } // namespace -@@ -312,8 +721,18 @@ UniquePtr SSLKeyShare::Create(uint16_t group_id) { +@@ -312,8 +642,16 @@ UniquePtr SSLKeyShare::Create(uint16_t group_id) { return MakeUnique(NID_secp521r1, SSL_CURVE_SECP521R1); case SSL_CURVE_X25519: return MakeUnique(); @@ -5108,8 +4592,6 @@ index 09a9ad380..d9d3b9032 100644 + group_id)); + case SSL_CURVE_P256_KYBER768_DRAFT00: + return UniquePtr(New()); -+ case SSL_CURVE_IPDWING: -+ return UniquePtr(New()); default: return nullptr; } @@ -5127,10 +4609,10 @@ index 838761af5..9eb201d37 100644 static const uint16_t kSigAlgs[] = { SSL_SIGN_RSA_PKCS1_SHA256, diff --git a/src/ssl/ssl_test.cc b/src/ssl/ssl_test.cc -index ef43a9e98..bb79509ea 100644 +index ef43a9e98..9756fd2a0 100644 --- a/src/ssl/ssl_test.cc +++ b/src/ssl/ssl_test.cc -@@ -409,7 +409,34 @@ static const CurveTest kCurveTests[] = { +@@ -409,7 +409,30 @@ static const CurveTest kCurveTests[] = { "P-256:X25519Kyber768Draft00", { SSL_CURVE_SECP256R1, SSL_CURVE_X25519_KYBER768_DRAFT00 }, }, @@ -5144,10 +4626,6 @@ index ef43a9e98..bb79509ea 100644 + { SSL_CURVE_X25519_KYBER768_DRAFT00 }, + }, + { -+ "IPDWing", -+ { SSL_CURVE_IPDWING }, -+ }, -+ { + "Xyber768D00:Xyber768D00Old", + { SSL_CURVE_X25519_KYBER768_DRAFT00, SSL_CURVE_X25519_KYBER768_DRAFT00_OLD }, + }, @@ -5354,5 +4832,5 @@ index 5b0205953..831875514 100644 !SpeedTrustToken("TrustToken-Exp1-Batch1", TRUST_TOKEN_experiment_v1(), 1, selected) || -- -2.45.2 +2.41.0 diff --git a/boring/src/lib.rs b/boring/src/lib.rs index 4d477a72..6b3ade0f 100644 --- a/boring/src/lib.rs +++ b/boring/src/lib.rs @@ -83,10 +83,6 @@ //! boxes. //! - `P256Kyber768Draft00`. Similar again to `X25519Kyber768Draft00`, but uses P256 as classical //! part. It uses a non-standard codepoint. Not recommended. -//! - `IPDWing`. A preliminary version of -//! [X-Wing](https://datatracker.ietf.org/doc/draft-connolly-cfrg-xwing-kem/02/). -//! Similar to `X25519Kyber768Draft00Old`, but uses a newer (but not yet final) version of Kyber -//! called ML-KEM-ipd. Not recommended. //! //! Presently all these key agreements are deployed by Cloudflare, but we do not guarantee continued //! support for them. diff --git a/boring/src/ssl/mod.rs b/boring/src/ssl/mod.rs index 04e0ee43..283c90b1 100644 --- a/boring/src/ssl/mod.rs +++ b/boring/src/ssl/mod.rs @@ -723,9 +723,6 @@ impl SslCurve { #[cfg(feature = "pq-experimental")] pub const P256_KYBER768_DRAFT00: SslCurve = SslCurve(ffi::SSL_CURVE_P256_KYBER768_DRAFT00 as _); - #[cfg(feature = "pq-experimental")] - pub const IPD_WING: SslCurve = SslCurve(ffi::SSL_CURVE_IPDWING); - /// Returns the curve name /// /// This corresponds to [`SSL_get_curve_name`] @@ -768,8 +765,6 @@ impl SslCurve { ffi::SSL_CURVE_X25519_KYBER512_DRAFT00 => Some(ffi::NID_X25519Kyber512Draft00), #[cfg(feature = "pq-experimental")] ffi::SSL_CURVE_P256_KYBER768_DRAFT00 => Some(ffi::NID_P256Kyber768Draft00), - #[cfg(feature = "pq-experimental")] - ffi::SSL_CURVE_IPDWING => Some(ffi::NID_IPDWing), _ => None, } } From 211aa242a1abaceda76e91b1b827c4d92db256e8 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 17 Sep 2024 16:07:57 +0200 Subject: [PATCH 2/2] Add post-quantum key agreement X25519MLKEM768 This is the successor of X25519Kyber768Draft00. Spec: https://datatracker.ietf.org/doc/draft-kwiatkowski-tls-ecdhe-mlkem/02/ IANA has assigned the codepoint. https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8 Upstream BoringSSL support landed in. https://github.com/google/boringssl/commit/7fb4d3da5082225c7180267e9daad291887ce982 The version of BoringSSL we patch does not include it, so we add it manually. Chrome and Firefox are planning to enable in October. This PR is based on the IPD-Wing patch reviewed here: https://github.com/cloudflare/boring/pull/243 There are two changes. First we simplify the patch a bit as we do not need IPD-Wing. Secondly, we perform the encapsulation key check, which was a last minute addition of NIST. We perform this check also for Kyber. --- boring-sys/patches/boring-pq.patch | 1203 ++++++++++++++++++---------- boring/src/lib.rs | 4 +- boring/src/ssl/mod.rs | 12 +- 3 files changed, 775 insertions(+), 444 deletions(-) diff --git a/boring-sys/patches/boring-pq.patch b/boring-sys/patches/boring-pq.patch index 2ffeee6c..e6601d91 100644 --- a/boring-sys/patches/boring-pq.patch +++ b/boring-sys/patches/boring-pq.patch @@ -1,59 +1,71 @@ -From 4cba2164726c8d2647e38548a266a70c4942d567 Mon Sep 17 00:00:00 2001 +From b98d803dbecc9d6848d8cbffa62b5c943fb75f70 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Fri, 22 Jul 2022 16:43:48 +0200 -Subject: [PATCH] Add temporary post-quantum key agreements +Subject: [PATCH] Add additional post-quantum key agreements -BoringSSL upstream support X25519Kyber768Draft00 already under -codepoint 0x6399, which is the recommended post-quantum key -agreement to use +BoringSSL upstream has supported the temporary post-quantum +key agreement X25519Kyber768Draft00 (0x6399) for a while. +At the time of writing X25519Kyber768Draft00 is widely deployed by browsers. + +Recent BoringSSL adds support for X25519MLKEM768 (0x11ec), +which will be the long term post-quantum key agreement of choice, +and many browsers are expected to switch to it before the end of 2024. This patch adds: -1. Supports for P256Kyber768Draft00 under 0xfe32, which we temporarily +1. Support for MLKEM768X25519 under the codepoint 0x11ec. The version + of BoringSSL we patch against did not support it yet. + +2. Supports for P256Kyber768Draft00 under 0xfe32, which we temporarily need for compliance reasons. (Note that this is not the codepoint allocated for that exchange in the IANA table.) It also enables it in FIPS mode. -2. Support for X25519Kyber768Draft00 under the old codepoint 0xfe31. +3. Support for X25519Kyber768Draft00 under the old codepoint 0xfe31. -3. Support for X25519Kyber512Draft00 under the codepoint 0xfe30. This +4. Support for X25519Kyber512Draft00 under the codepoint 0xfe30. This key agreement should only be used for testing: to see if the smaller keyshare makes a difference. The patch also replaces Google's implementation of Kyber, by the portable reference implementation, so as to support Kyber512. -Cf RTG-2076 RTG-2051 RTG-2508 RTG-2707 RTG-2607 +Cf RTG-2076 RTG-2051 RTG-2508 RTG-2707 RTG-2607 RTG-3239 --- BUILD.generated.bzl | 5 +- BUILD.generated_tests.bzl | 4 - CMakeLists.txt | 4 +- + crypto_test_data.cc | 4 - sources.json | 9 +- src/crypto/CMakeLists.txt | 5 +- src/crypto/kyber/internal.h | 91 - src/crypto/kyber/keccak.c | 204 -- - src/crypto/kyber/kyber.c | 2865 ++++++++++++++++++++------- + src/crypto/kyber/keccak_tests.txt | 3071 ----------------------------- + src/crypto/kyber/kyber.c | 3011 +++++++++++++++++++++------- src/crypto/kyber/kyber512.c | 5 + src/crypto/kyber/kyber768.c | 4 + src/crypto/kyber/kyber_test.cc | 229 --- - src/crypto/obj/obj_dat.h | 14 +- - src/crypto/obj/obj_mac.num | 3 + - src/crypto/obj/objects.txt | 5 +- - src/include/openssl/kyber.h | 199 +- - src/include/openssl/nid.h | 9 + - src/include/openssl/ssl.h | 3 + + src/crypto/kyber/kyber_tests.txt | 905 --------- + src/crypto/obj/obj_dat.h | 17 +- + src/crypto/obj/obj_mac.num | 4 + + src/crypto/obj/objects.txt | 6 +- + src/include/openssl/kyber.h | 203 +- + src/include/openssl/nid.h | 12 + + src/include/openssl/ssl.h | 4 + src/sources.cmake | 2 - - src/ssl/extensions.cc | 3 + - src/ssl/ssl_key_share.cc | 412 +++- + src/ssl/extensions.cc | 4 + + src/ssl/ssl_key_share.cc | 525 ++++- src/ssl/ssl_lib.cc | 2 +- - src/ssl/ssl_test.cc | 25 +- + src/ssl/ssl_test.cc | 29 +- src/tool/speed.cc | 162 +- - 26 files changed, 2797 insertions(+), 5447 deletions(-) + 26 files changed, 3088 insertions(+), 5433 deletions(-) delete mode 100644 src/crypto/kyber/internal.h delete mode 100644 src/crypto/kyber/keccak.c + delete mode 100644 src/crypto/kyber/keccak_tests.txt create mode 100644 src/crypto/kyber/kyber512.c create mode 100644 src/crypto/kyber/kyber768.c delete mode 100644 src/crypto/kyber/kyber_test.cc + delete mode 100644 src/crypto/kyber/kyber_tests.txt diff --git a/BUILD.generated.bzl b/BUILD.generated.bzl index 738e1055f..9466757a2 100644 @@ -122,6 +134,28 @@ index faed2befa..931c0e3a8 100644 src/crypto/lhash/lhash.c src/crypto/mem.c src/crypto/obj/obj.c +diff --git a/crypto_test_data.cc b/crypto_test_data.cc +index 2268533f8..19b344af1 100644 +--- a/crypto_test_data.cc ++++ b/crypto_test_data.cc +@@ -74,7 +74,6 @@ + * crypto/fipsmodule/rand/ctrdrbg_vectors.txt \ + * crypto/hmac_extra/hmac_tests.txt \ + * crypto/hpke/hpke_test_vectors.txt \ +- * crypto/kyber/keccak_tests.txt \ + * crypto/kyber/kyber_tests.txt \ + * crypto/pkcs8/test/empty_password.p12 \ + * crypto/pkcs8/test/no_encryption.p12 \ +@@ -5269,9 +5268,6 @@ std::string GetTestData(const char *path) { + if (strcmp(path, "crypto/hpke/hpke_test_vectors.txt") == 0) { + return AssembleString(kData59, kLen59); + } +- if (strcmp(path, "crypto/kyber/keccak_tests.txt") == 0) { +- return AssembleString(kData60, kLen60); +- } + if (strcmp(path, "crypto/kyber/kyber_tests.txt") == 0) { + return AssembleString(kData61, kLen61); + } diff --git a/sources.json b/sources.json index 4c0048e1d..f6ea5c40f 100644 --- a/sources.json @@ -492,10 +526,10 @@ index f1c012d11..000000000 - } -} diff --git a/src/crypto/kyber/kyber.c b/src/crypto/kyber/kyber.c -index 776c085f9..346d4daec 100644 +index 776c085f9..ccb5b3d9b 100644 --- a/src/crypto/kyber/kyber.c +++ b/src/crypto/kyber/kyber.c -@@ -1,833 +1,2252 @@ +@@ -1,833 +1,2426 @@ -/* Copyright (c) 2023, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any @@ -520,6 +554,8 @@ index 776c085f9..346d4daec 100644 +// - Removed 90s version. +// - Seeds are passed as paramters. +// - Changed the API to be more BoringSSL-like ++// - Mitigated timing sidechannels (Kyberslash 1 and 2). ++// (Note that these do not affect ephemeral usage as in TLS.) +// +// TODO +// @@ -534,21 +570,24 @@ index 776c085f9..346d4daec 100644 +// implementation or https://github.com/cloudflare/circl/tree/main/pke/kyber +// +// - Option to keep A stored in private key. -+ + +-#include +#ifndef KYBER_K +#error "Don't compile this file direcly" +#endif - #include -+#include - -#include -#include -- ++#include ++#include + -#include -#include -- --#include "../internal.h" ++#include ++#include ++#include + + #include "../internal.h" -#include "./internal.h" - - @@ -612,9 +651,6 @@ index 776c085f9..346d4daec 100644 - 2099, 561, 2466, 2594, 2804, 1092, 403, 1026, 1143, 2150, 2775, 886, - 1722, 1212, 1874, 1029, 2110, 2935, 885, 2154, -}; -+#include -+#include -+#include -// kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] -static const uint16_t kInverseNTTRoots[128] = { @@ -844,7 +880,7 @@ index 776c085f9..346d4daec 100644 + uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], + const uint8_t seed[KYBER_SYMBYTES]); + -+static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], ++static int indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]); @@ -873,6 +909,9 @@ index 776c085f9..346d4daec 100644 +static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state); +static void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); +static void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); ++static void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen); ++static void shake256_finalize(keccak_state *state); ++static void shake256_init(keccak_state *state); + +static void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); +static void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen); @@ -1064,13 +1103,10 @@ index 776c085f9..346d4daec 100644 + a = (d >> (4*j+0)) & 0x3; + b = (d >> (4*j+2)) & 0x3; + r->coeffs[8*i+j] = a - b; - } - } - } - --static void vector_ntt(vector *a) { -- for (int i = 0; i < RANK; i++) { -- scalar_ntt(&a->v[i]); ++ } ++ } ++} ++ +/************************************************* +* Name: cbd3 +* @@ -1099,7 +1135,7 @@ index 776c085f9..346d4daec 100644 + a = (d >> (6*j+0)) & 0x7; + b = (d >> (6*j+3)) & 0x7; + r->coeffs[4*i+j] = a - b; -+ } + } } } +#endif @@ -1115,25 +1151,9 @@ index 776c085f9..346d4daec 100644 +#endif +} --// In place inverse number theoretic transform of a given scalar, with pairs of --// entries of s->v being interpreted as elements of GF(3329^2). Just as with the --// number theoretic transform, this leaves off the first step of the normal iFFT --// to account for the fact that 3329 does not have a 512th root of unity, using --// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. --static void scalar_inverse_ntt(scalar *s) { -- int step = DEGREE / 2; -- // `int` is used here because using `size_t` throughout caused a ~5% slowdown -- // with Clang 14 on Aarch64. -- for (int offset = 2; offset < DEGREE; offset <<= 1) { -- step >>= 1; -- int k = 0; -- for (int i = 0; i < step; i++) { -- uint32_t step_root = kInverseNTTRoots[i + step]; -- for (int j = k; j < k + offset; j++) { -- uint16_t odd = s->c[j + offset]; -- uint16_t even = s->c[j]; -- s->c[j] = reduce_once(odd + even); -- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); +-static void vector_ntt(vector *a) { +- for (int i = 0; i < RANK; i++) { +- scalar_ntt(&a->v[i]); +static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) +{ +#if KYBER_ETA2 == 2 @@ -1176,8 +1196,8 @@ index 776c085f9..346d4daec 100644 + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; -+ } -+} + } + } +*/ + +static const int16_t zetas[128] = { @@ -1212,7 +1232,26 @@ index 776c085f9..346d4daec 100644 +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} -+ + +-// In place inverse number theoretic transform of a given scalar, with pairs of +-// entries of s->v being interpreted as elements of GF(3329^2). Just as with the +-// number theoretic transform, this leaves off the first step of the normal iFFT +-// to account for the fact that 3329 does not have a 512th root of unity, using +-// the precomputed 128 roots of unity stored in |kInverseNTTRoots|. +-static void scalar_inverse_ntt(scalar *s) { +- int step = DEGREE / 2; +- // `int` is used here because using `size_t` throughout caused a ~5% slowdown +- // with Clang 14 on Aarch64. +- for (int offset = 2; offset < DEGREE; offset <<= 1) { +- step >>= 1; +- int k = 0; +- for (int i = 0; i < step; i++) { +- uint32_t step_root = kInverseNTTRoots[i + step]; +- for (int j = k; j < k + offset; j++) { +- uint16_t odd = s->c[j + offset]; +- uint16_t even = s->c[j]; +- s->c[j] = reduce_once(odd + even); +- s->c[j + offset] = reduce(step_root * (even - odd + kPrime)); +/************************************************* +* Name: ntt +* @@ -1316,6 +1355,7 @@ index 776c085f9..346d4daec 100644 +{ + unsigned int i,j; + int16_t u; ++ uint32_t d0; + uint8_t t[8]; + +#if (KYBER_POLYCOMPRESSEDBYTES == 128) @@ -1324,7 +1364,11 @@ index 776c085f9..346d4daec 100644 + // map to positive standard representatives + u = a->coeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; -+ t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; ++ d0 = u << 4; ++ d0 += 1665; ++ d0 *= 80635; ++ d0 >>= 28; ++ t[j] = d0 & 0xf; + } + + r[0] = t[0] | (t[1] << 4); @@ -1339,7 +1383,11 @@ index 776c085f9..346d4daec 100644 + // map to positive standard representatives + u = a->coeffs[8*i+j]; + u += (u >> 15) & KYBER_Q; -+ t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; ++ d0 = u << 5; ++ d0 += 1664; ++ d0 *= 40318; ++ d0 >>= 27; ++ t[j] = d0 & 0x1f; + } + + r[0] = (t[0] >> 0) | (t[1] << 5); @@ -1490,7 +1538,7 @@ index 776c085f9..346d4daec 100644 + + for(i=0;i> j)&1); ++ mask = -(int16_t)value_barrier_u32((msg[i] >> j)&1); + r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); } } @@ -1515,14 +1563,17 @@ index 776c085f9..346d4daec 100644 +static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) +{ + unsigned int i,j; -+ uint16_t t; ++ uint32_t t; + + for(i=0;icoeffs[8*i+j]; -+ t += ((int16_t)t >> 15) & KYBER_Q; -+ t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; ++ t <<= 1; ++ t += 1665; ++ t *= 80635; ++ t >>= 28; ++ t &= 1; + msg[i] |= t << j; } } @@ -1801,6 +1852,7 @@ index 776c085f9..346d4daec 100644 +static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) +{ + unsigned int i,j,k; ++ uint64_t d0; + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) + uint16_t t[8]; @@ -1809,7 +1861,12 @@ index 776c085f9..346d4daec 100644 + for(k=0;k<8;k++) { + t[k] = a->vec[i].coeffs[8*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -+ t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; ++ d0 = t[k]; ++ d0 <<= 11; ++ d0 += 1664; ++ d0 *= 645084; ++ d0 >>= 31; ++ t[k] = d0 & 0x7ff; } - element_bits_done += chunk_bits; @@ -1835,7 +1892,12 @@ index 776c085f9..346d4daec 100644 + for(k=0;k<4;k++) { + t[k] = a->vec[i].coeffs[4*j+k]; + t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; -+ t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; ++ d0 = t[k]; ++ d0 <<= 10; ++ d0 += 1665; ++ d0 *= 1290167; ++ d0 >>= 32; ++ t[k] = d0 & 0x3ff; + } - if (out_byte_bits > 0) { @@ -1910,8 +1972,15 @@ index 776c085f9..346d4daec 100644 +#else +#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" +#endif -+} -+ + } + +-// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 +-// (DEGREE) is divisible by 8, the individual vector entries will always fill a +-// whole number of bytes, so we do not need to worry about bit packing here. +-static void vector_encode(uint8_t *out, const vector *a, int bits) { +- for (int i = 0; i < RANK; i++) { +- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); +- } +/************************************************* +* Name: polyvec_tobytes +* @@ -1926,8 +1995,13 @@ index 776c085f9..346d4daec 100644 + unsigned int i; + for(i=0;ivec[i]); -+} -+ + } + +-// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in +-// |out|. It returns one on success and zero if any parsed value is >= +-// |kPrime|. +-static int scalar_decode(scalar *out, const uint8_t *in, int bits) { +- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); +/************************************************* +* Name: polyvec_frombytes +* @@ -1943,14 +2017,10 @@ index 776c085f9..346d4daec 100644 + unsigned int i; + for(i=0;ivec[i], a+i*KYBER_POLYBYTES); - } ++} --// Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 --// (DEGREE) is divisible by 8, the individual vector entries will always fill a --// whole number of bytes, so we do not need to worry about bit packing here. --static void vector_encode(uint8_t *out, const vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_encode(out + i * bits * DEGREE / 8, &a->v[i], bits); +- uint8_t in_byte = 0; +- int in_byte_bits_left = 0; +/************************************************* +* Name: polyvec_ntt +* @@ -1964,7 +2034,10 @@ index 776c085f9..346d4daec 100644 + for(i=0;ivec[i]); +} -+ + +- for (int i = 0; i < DEGREE; i++) { +- uint16_t element = 0; +- int element_bits_done = 0; +/************************************************* +* Name: polyvec_invntt_tomont +* @@ -1979,7 +2052,13 @@ index 776c085f9..346d4daec 100644 + for(i=0;ivec[i]); +} -+ + +- while (element_bits_done < bits) { +- if (in_byte_bits_left == 0) { +- in_byte = *in; +- in++; +- in_byte_bits_left = 8; +- } +/************************************************* +* Name: polyvec_basemul_acc_montgomery +* @@ -1999,16 +2078,18 @@ index 776c085f9..346d4daec 100644 + for(i=1;ivec[i], &b->vec[i]); + poly_add(r, r, &t); - } -+ ++ } + +- int chunk_bits = bits - element_bits_done; +- if (chunk_bits > in_byte_bits_left) { +- chunk_bits = in_byte_bits_left; +- } + poly_reduce(r); - } ++} --// scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in --// |out|. It returns one on success and zero if any parsed value is >= --// |kPrime|. --static int scalar_decode(scalar *out, const uint8_t *in, int bits) { -- assert(bits <= (int)sizeof(*out->c) * 8 && bits != 1); +- element |= (in_byte & kMasks[chunk_bits - 1]) << element_bits_done; +- in_byte_bits_left -= chunk_bits; +- in_byte >>= chunk_bits; +/************************************************* +* Name: polyvec_reduce +* @@ -2025,8 +2106,8 @@ index 776c085f9..346d4daec 100644 + poly_reduce(&r->vec[i]); +} -- uint8_t in_byte = 0; -- int in_byte_bits_left = 0; +- element_bits_done += chunk_bits; +- } +/************************************************* +* Name: polyvec_add +* @@ -2043,19 +2124,15 @@ index 776c085f9..346d4daec 100644 + poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); +} -- for (int i = 0; i < DEGREE; i++) { -- uint16_t element = 0; -- int element_bits_done = 0; +- if (element >= kPrime) { +- return 0; +- } +- out->c[i] = element; +- } +// +// indcpa.c +// - -- while (element_bits_done < bits) { -- if (in_byte_bits_left == 0) { -- in_byte = *in; -- in++; -- in_byte_bits_left = 8; -- } ++ +/************************************************* +* Name: pack_pk +* @@ -2076,11 +2153,7 @@ index 776c085f9..346d4daec 100644 + for(i=0;i in_byte_bits_left) { -- chunk_bits = in_byte_bits_left; -- } ++ +/************************************************* +* Name: unpack_pk +* @@ -2091,19 +2164,34 @@ index 776c085f9..346d4daec 100644 +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ -+static void unpack_pk(polyvec *pk, ++static int unpack_pk(polyvec *pk, + uint8_t seed[KYBER_SYMBYTES], + const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES]) +{ + size_t i; + polyvec_frombytes(pk, packedpk); ++ ++ // FIPS 203 encapsulation key check. We'll perform it even for Kyber. ++ uint8_t repacked[KYBER_POLYVECBYTES]; ++ polyvec_tobytes(repacked, pk); ++ ++ if(verify(repacked, packedpk, KYBER_POLYVECBYTES) != 0) ++ return 0; + + for(i=0;i>= chunk_bits; +-// scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. +-static void scalar_decode_1(scalar *out, const uint8_t in[32]) { +- for (int i = 0; i < DEGREE; i += 8) { +- uint8_t in_byte = *in; +- in++; +- for (int j = 0; j < 8; j++) { +- out->c[i + j] = in_byte & 1; +- in_byte >>= 1; +- } +/************************************************* +* Name: pack_sk +* @@ -2116,9 +2204,7 @@ index 776c085f9..346d4daec 100644 +{ + polyvec_tobytes(r, sk); +} - -- element_bits_done += chunk_bits; -- } ++ +/************************************************* +* Name: unpack_sk +* @@ -2131,12 +2217,7 @@ index 776c085f9..346d4daec 100644 +{ + polyvec_frombytes(sk, packedsk); +} - -- if (element >= kPrime) { -- return 0; -- } -- out->c[i] = element; -- } ++ +/************************************************* +* Name: pack_ciphertext +* @@ -2153,8 +2234,7 @@ index 776c085f9..346d4daec 100644 + polyvec_compress(r, b); + poly_compress(r+KYBER_POLYVECCOMPRESSEDBYTES, v); +} - -- return 1; ++ +/************************************************* +* Name: unpack_ciphertext +* @@ -2169,17 +2249,8 @@ index 776c085f9..346d4daec 100644 +{ + polyvec_decompress(b, c); + poly_decompress(v, c+KYBER_POLYVECCOMPRESSEDBYTES); - } - --// scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. --static void scalar_decode_1(scalar *out, const uint8_t in[32]) { -- for (int i = 0; i < DEGREE; i += 8) { -- uint8_t in_byte = *in; -- in++; -- for (int j = 0; j < 8; j++) { -- out->c[i + j] = in_byte & 1; -- in_byte >>= 1; -- } ++} ++ +/************************************************* +* Name: rej_uniform +* @@ -2268,8 +2339,8 @@ index 776c085f9..346d4daec 100644 } } - return 1; --} -- + } + -// Compresses (lossily) an input |x| mod 3329 into |bits| many bits by grouping -// numbers close to each other together. The formula used is -// round(2^|bits|/kPrime*x) mod 2^|bits|. @@ -2311,12 +2382,6 @@ index 776c085f9..346d4daec 100644 -static void scalar_compress(scalar *s, int bits) { - for (int i = 0; i < DEGREE; i++) { - s->c[i] = compress(s->c[i], bits); -- } - } - --static void scalar_decompress(scalar *s, int bits) { -- for (int i = 0; i < DEGREE; i++) { -- s->c[i] = decompress(s->c[i], bits); +/************************************************* +* Name: indcpa_keypair +* @@ -2365,9 +2430,9 @@ index 776c085f9..346d4daec 100644 + pack_pk(pk, &pkpv, publicseed); } --static void vector_compress(vector *a, int bits) { -- for (int i = 0; i < RANK; i++) { -- scalar_compress(&a->v[i], bits); +-static void scalar_decompress(scalar *s, int bits) { +- for (int i = 0; i < DEGREE; i++) { +- s->c[i] = decompress(s->c[i], bits); - } +/************************************************* +* Name: indcpa_enc @@ -2385,7 +2450,7 @@ index 776c085f9..346d4daec 100644 +* (of length KYBER_SYMBYTES) to deterministically +* generate all randomness +**************************************************/ -+static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], ++static int indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], + const uint8_t m[KYBER_INDCPA_MSGBYTES], + const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[KYBER_SYMBYTES]) @@ -2396,7 +2461,9 @@ index 776c085f9..346d4daec 100644 + polyvec sp, pkpv, ep, at[KYBER_K], b; + poly v, k, epp; + -+ unpack_pk(&pkpv, seed, pk); ++ if (!unpack_pk(&pkpv, seed, pk)) ++ return 0; ++ + poly_frommsg(&k, m); + gen_at(at, seed); + @@ -2424,11 +2491,12 @@ index 776c085f9..346d4daec 100644 + poly_reduce(&v); + + pack_ciphertext(c, &b, &v); ++ return 1; } --static void vector_decompress(vector *a, int bits) { +-static void vector_compress(vector *a, int bits) { - for (int i = 0; i < RANK; i++) { -- scalar_decompress(&a->v[i], bits); +- scalar_compress(&a->v[i], bits); - } +/************************************************* +* Name: indcpa_dec @@ -2463,12 +2531,10 @@ index 776c085f9..346d4daec 100644 + poly_tomsg(m, &mp); } --struct public_key { -- vector t; -- uint8_t rho[32]; -- uint8_t public_key_hash[32]; -- matrix m; --}; +-static void vector_decompress(vector *a, int bits) { +- for (int i = 0; i < RANK; i++) { +- scalar_decompress(&a->v[i], bits); +- } +// +// fips202.c +// @@ -2498,16 +2564,14 @@ index 776c085f9..346d4daec 100644 + r |= (uint64_t)x[i] << 8*i; + + return r; -+} + } --static struct public_key *public_key_from_external( -- const struct KYBER_public_key *external) { -- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), -- "Kyber public key is too small"); -- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), -- "Kyber public key align incorrect"); -- return (struct public_key *)external; -+/************************************************* +-struct public_key { +- vector t; +- uint8_t rho[32]; +- uint8_t public_key_hash[32]; +- matrix m; ++/************************************************* +* Name: store64 +* +* Description: Store a 64-bit integer to array of 8 bytes in little-endian order @@ -2520,12 +2584,8 @@ index 776c085f9..346d4daec 100644 + + for(i=0;i<8;i++) + x[i] = u >> 8*i; - } - --struct private_key { -- struct public_key pub; -- vector s; -- uint8_t fo_failure_secret[32]; ++} ++ +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + (uint64_t)0x0000000000000001ULL, @@ -2554,34 +2614,13 @@ index 776c085f9..346d4daec 100644 + (uint64_t)0x8000000080008008ULL }; --static struct private_key *private_key_from_external( -- const struct KYBER_private_key *external) { -- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), -- "Kyber private key too small"); -- static_assert( -- alignof(struct KYBER_private_key) >= alignof(struct private_key), -- "Kyber private key align incorrect"); -- return (struct private_key *)external; --} -- --// Calls |KYBER_generate_key_external_entropy| with random bytes from --// |RAND_bytes|. --void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], -- struct KYBER_private_key *out_private_key) { -- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; -- RAND_bytes(entropy, sizeof(entropy)); -- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, -- entropy); --} -- --static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { -- uint8_t *vector_output; -- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { -- return 0; -- } -- vector_encode(vector_output, &pub->t, kLog2Prime); -- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { -- return 0; +-static struct public_key *public_key_from_external( +- const struct KYBER_public_key *external) { +- static_assert(sizeof(struct KYBER_public_key) >= sizeof(struct public_key), +- "Kyber public key is too small"); +- static_assert(alignof(struct KYBER_public_key) >= alignof(struct public_key), +- "Kyber public key align incorrect"); +- return (struct public_key *)external; +/************************************************* +* Name: KeccakF1600_StatePermute +* @@ -2851,9 +2890,38 @@ index 776c085f9..346d4daec 100644 + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; -+} -+ -+ + } + +-struct private_key { +- struct public_key pub; +- vector s; +- uint8_t fo_failure_secret[32]; +-}; + +-static struct private_key *private_key_from_external( +- const struct KYBER_private_key *external) { +- static_assert(sizeof(struct KYBER_private_key) >= sizeof(struct private_key), +- "Kyber private key too small"); +- static_assert( +- alignof(struct KYBER_private_key) >= alignof(struct private_key), +- "Kyber private key align incorrect"); +- return (struct private_key *)external; +-} +- +-// Calls |KYBER_generate_key_external_entropy| with random bytes from +-// |RAND_bytes|. +-void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], +- struct KYBER_private_key *out_private_key) { +- uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]; +- RAND_bytes(entropy, sizeof(entropy)); +- KYBER_generate_key_external_entropy(out_encoded_public_key, out_private_key, +- entropy); +-} +- +-static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { +- uint8_t *vector_output; +- if (!CBB_add_space(out, &vector_output, kEncodedVectorSize)) { +- return 0; +/************************************************* +* Name: keccak_squeeze +* @@ -2887,9 +2955,65 @@ index 776c085f9..346d4daec 100644 + outlen -= i-pos; + pos = i; } +- vector_encode(vector_output, &pub->t, kLog2Prime); +- if (!CBB_add_bytes(out, pub->rho, sizeof(pub->rho))) { +- return 0; ++ ++ return pos; ++} ++ ++/************************************************* ++* Name: keccak_absorb ++* ++* Description: Absorb step of Keccak; incremental. ++* ++* Arguments: - uint64_t *s: pointer to Keccak state ++* - unsigned int pos: position in current block to be absorbed ++* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128) ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes ++* ++* Returns new position pos in current block ++**************************************************/ ++static unsigned int keccak_absorb(uint64_t s[25], ++ unsigned int pos, ++ unsigned int r, ++ const uint8_t *in, ++ size_t inlen) ++{ ++ unsigned int i; ++ ++ while(pos+inlen >= r) { ++ for(i=pos;ipub)) { - abort(); + -+ return pos; -+} -+ -+ +/************************************************* +* Name: keccak_absorb_once +* @@ -3168,18 +3288,8 @@ index 776c085f9..346d4daec 100644 +static void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state) +{ + keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE); - } - --int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { -- struct public_key *pub = public_key_from_external(public_key); -- CBS orig_in = *in; -- if (!kyber_parse_public_key_no_hash(pub, in) || // -- CBS_len(in) != 0) { -- return 0; -- } -- BORINGSSL_keccak(pub->public_key_hash, sizeof(pub->public_key_hash), -- CBS_data(&orig_in), CBS_len(&orig_in), boringssl_sha3_256); -- return 1; ++} ++ +/************************************************* +* Name: shake256_squeeze +* @@ -3193,40 +3303,8 @@ index 776c085f9..346d4daec 100644 +static void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state) +{ + state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE); - } - --int KYBER_marshal_private_key(CBB *out, -- const struct KYBER_private_key *private_key) { -- const struct private_key *const priv = private_key_from_external(private_key); -- uint8_t *s_output; -- if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) { -- return 0; -- } -- vector_encode(s_output, &priv->s, kLog2Prime); -- if (!kyber_marshal_public_key(out, &priv->pub) || -- !CBB_add_bytes(out, priv->pub.public_key_hash, -- sizeof(priv->pub.public_key_hash)) || -- !CBB_add_bytes(out, priv->fo_failure_secret, -- sizeof(priv->fo_failure_secret))) { -- return 0; -- } -- return 1; --} -- --int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, -- CBS *in) { -- struct private_key *const priv = private_key_from_external(out_private_key); -- -- CBS s_bytes; -- if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) || -- !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) || -- !kyber_parse_public_key_no_hash(&priv->pub, in) || -- !CBS_copy_bytes(in, priv->pub.public_key_hash, -- sizeof(priv->pub.public_key_hash)) || -- !CBS_copy_bytes(in, priv->fo_failure_secret, -- sizeof(priv->fo_failure_secret)) || -- CBS_len(in) != 0) { -- return 0; ++} ++ +/************************************************* +* Name: shake256_absorb_once +* @@ -3260,6 +3338,61 @@ index 776c085f9..346d4daec 100644 +} + +/************************************************* ++* Name: shake256_absorb ++* ++* Description: Absorb step of the SHAKE256 XOF; incremental. ++* ++* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state ++* - const uint8_t *in: pointer to input to be absorbed into s ++* - size_t inlen: length of input in bytes ++**************************************************/ ++static void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen) ++{ ++ state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen); ++} ++ ++/************************************************* ++* Name: shake256_finalize ++* ++* Description: Finalize absorb step of the SHAKE256 XOF. ++* ++* Arguments: - keccak_state *state: pointer to Keccak state ++**************************************************/ ++static void shake256_finalize(keccak_state *state) ++{ ++ keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F); ++ state->pos = SHAKE256_RATE; ++} ++ ++/************************************************* ++* Name: keccak_init ++* ++* Description: Initializes the Keccak state. ++* ++* Arguments: - uint64_t *s: pointer to Keccak state ++**************************************************/ ++static void keccak_init(uint64_t s[25]) ++{ ++ unsigned int i; ++ for(i=0;i<25;i++) ++ s[i] = 0; ++} ++ ++/************************************************* ++* Name: shake256_init ++* ++* Description: Initilizes Keccak state for use as SHAKE256 XOF ++* ++* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state ++**************************************************/ ++static void shake256_init(keccak_state *state) ++{ ++ keccak_init(state->s); ++ state->pos = 0; ++} ++ ++ ++/************************************************* +* Name: shake256 +* +* Description: SHAKE256 XOF with non-incremental API @@ -3348,8 +3481,13 @@ index 776c085f9..346d4daec 100644 + extseed[KYBER_SYMBYTES+1] = y; + + shake128_absorb_once(state, extseed, sizeof(extseed)); -+} -+ + } + +-int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { +- struct public_key *pub = public_key_from_external(public_key); +- CBS orig_in = *in; +- if (!kyber_parse_public_key_no_hash(pub, in) || // +- CBS_len(in) != 0) { +/************************************************* +* Name: kyber_shake256_prf +* @@ -3392,10 +3530,10 @@ index 776c085f9..346d4daec 100644 +} + +// Modified crypto_kem_enc to BoringSSL style API -+void encap(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], ++int encap(uint8_t out_ciphertext[KYBER_CIPHERTEXTBYTES], + uint8_t ss[KYBER_KEY_BYTES], + const struct public_key *in_pub, -+ const uint8_t seed[KYBER_ENCAP_BYTES]) ++ const uint8_t seed[KYBER_ENCAP_BYTES], int mlkem) +{ + const uint8_t *pk = &in_pub->opaque[0]; + uint8_t *ct = out_ciphertext; @@ -3405,6 +3543,7 @@ index 776c085f9..346d4daec 100644 + uint8_t kr[2*KYBER_SYMBYTES]; + + memcpy(buf, seed, KYBER_SYMBYTES); ++ + /* Don't release system RNG output */ + hash_h(buf, buf, KYBER_SYMBYTES); + @@ -3413,18 +3552,32 @@ index 776c085f9..346d4daec 100644 + hash_g(kr, buf, 2*KYBER_SYMBYTES); + + /* coins are in kr+KYBER_SYMBYTES */ -+ indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); -+ -+ /* overwrite coins in kr with H(c) */ -+ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); -+ /* hash concatenation of pre-k and H(c) to k */ -+ kdf(ss, kr, 2*KYBER_SYMBYTES); -+} -+ ++ if(!indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES)) + return 0; ++ ++ if (mlkem == 1) { ++ memcpy(ss, kr, KYBER_SYMBYTES); ++ } else { ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); + } +- BORINGSSL_keccak(pub->public_key_hash, sizeof(pub->public_key_hash), +- CBS_data(&orig_in), CBS_len(&orig_in), boringssl_sha3_256); + return 1; + } + +-int KYBER_marshal_private_key(CBB *out, +- const struct KYBER_private_key *private_key) { +- const struct private_key *const priv = private_key_from_external(private_key); +- uint8_t *s_output; +- if (!CBB_add_space(out, &s_output, kEncodedVectorSize)) { +- return 0; +// Modified crypto_kem_decap to BoringSSL style API +void decap(uint8_t out_shared_key[KYBER_SSBYTES], + const struct private_key *in_priv, -+ const uint8_t *ct, size_t ciphertext_len) ++ const uint8_t *ct, size_t ciphertext_len, int mlkem) +{ + uint8_t *ss = out_shared_key; + const uint8_t *sk = &in_priv->opaque[0]; @@ -3450,27 +3603,68 @@ index 776c085f9..346d4daec 100644 + + fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); } -- return 1; -+ -+ /* overwrite coins in kr with H(c) */ -+ hash_h(kr+KYBER_SYMBYTES, ct, ciphertext_len); -+ -+ /* Overwrite pre-k with z on re-encryption failure */ -+ cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, fail); -+ -+ /* hash concatenation of pre-k and H(c) to k */ -+ kdf(ss, kr, 2*KYBER_SYMBYTES); -+} +- vector_encode(s_output, &priv->s, kLog2Prime); +- if (!kyber_marshal_public_key(out, &priv->pub) || +- !CBB_add_bytes(out, priv->pub.public_key_hash, +- sizeof(priv->pub.public_key_hash)) || +- !CBB_add_bytes(out, priv->fo_failure_secret, +- sizeof(priv->fo_failure_secret))) { +- return 0; + ++ if (mlkem == 1) { ++ /* Compute shared secret in case of rejection: ss2 = PRF(z || c). */ ++ uint8_t ss2[KYBER_SYMBYTES]; ++ keccak_state ks; ++ shake256_init(&ks); ++ shake256_absorb( ++ &ks, ++ sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, ++ KYBER_SYMBYTES ++ ); ++ shake256_absorb(&ks, ct, ciphertext_len); ++ shake256_finalize(&ks); ++ shake256_squeeze(ss2, KYBER_SYMBYTES, &ks); ++ ++ /* Set ss2 to the real shared secret if c = c' */ ++ cmov(ss2, kr, KYBER_SYMBYTES, 1-fail); ++ memcpy(ss, ss2, KYBER_SYMBYTES); ++ } else { ++ /* overwrite coins in kr with H(c) */ ++ hash_h(kr+KYBER_SYMBYTES, ct, ciphertext_len); ++ ++ /* Overwrite pre-k with z on re-encryption failure */ ++ cmov(kr, sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, KYBER_SYMBYTES, fail); ++ ++ /* hash concatenation of pre-k and H(c) to k */ ++ kdf(ss, kr, 2*KYBER_SYMBYTES); + } +- return 1; + } + +-int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, +- CBS *in) { +- struct private_key *const priv = private_key_from_external(out_private_key); +void marshal_public_key(uint8_t out[KYBER_PUBLICKEYBYTES], + const struct public_key *in_pub) { + memcpy(out, &in_pub->opaque, KYBER_PUBLICKEYBYTES); +} -+ -+void parse_public_key(struct public_key *out, -+ const uint8_t in[KYBER_PUBLICKEYBYTES]) { -+ memcpy(&out->opaque, in, KYBER_PUBLICKEYBYTES); - } + +- CBS s_bytes; +- if (!CBS_get_bytes(in, &s_bytes, kEncodedVectorSize) || +- !vector_decode(&priv->s, CBS_data(&s_bytes), kLog2Prime) || +- !kyber_parse_public_key_no_hash(&priv->pub, in) || +- !CBS_copy_bytes(in, priv->pub.public_key_hash, +- sizeof(priv->pub.public_key_hash)) || +- !CBS_copy_bytes(in, priv->fo_failure_secret, +- sizeof(priv->fo_failure_secret)) || +- CBS_len(in) != 0) { +- return 0; +- } +- return 1; ++void parse_public_key(struct public_key *out, ++ const uint8_t in[KYBER_PUBLICKEYBYTES]) { ++ memcpy(&out->opaque, in, KYBER_PUBLICKEYBYTES); + } diff --git a/src/crypto/kyber/kyber512.c b/src/crypto/kyber/kyber512.c new file mode 100644 index 000000000..21eed11a2 @@ -3728,7 +3922,7 @@ index eb76b5bd7..000000000 - FileTestGTest("crypto/kyber/kyber_tests.txt", KyberFileTest); -} diff --git a/src/crypto/obj/obj_dat.h b/src/crypto/obj/obj_dat.h -index 654b3c08e..06f80f971 100644 +index 654b3c08e..6cef2c079 100644 --- a/src/crypto/obj/obj_dat.h +++ b/src/crypto/obj/obj_dat.h @@ -57,7 +57,7 @@ @@ -3736,11 +3930,11 @@ index 654b3c08e..06f80f971 100644 -#define NUM_NID 965 -+#define NUM_NID 968 ++#define NUM_NID 969 static const uint8_t kObjectData[] = { /* NID_rsadsi */ -@@ -8784,6 +8784,12 @@ static const ASN1_OBJECT kObjects[NUM_NID] = { +@@ -8784,6 +8784,13 @@ static const ASN1_OBJECT kObjects[NUM_NID] = { {"HKDF", "hkdf", NID_hkdf, 0, NULL, 0}, {"X25519Kyber768Draft00", "X25519Kyber768Draft00", NID_X25519Kyber768Draft00, 0, NULL, 0}, @@ -3750,10 +3944,11 @@ index 654b3c08e..06f80f971 100644 + NULL, 0}, + {"X25519Kyber768Draft00Old", "X25519Kyber768Draft00Old", + NID_X25519Kyber768Draft00Old, 0, NULL, 0}, ++ {"X25519MLKEM768", "X25519MLKEM768", NID_X25519MLKEM768, 0, NULL, 0}, }; static const uint16_t kNIDsInShortNameOrder[] = { -@@ -8916,6 +8922,7 @@ static const uint16_t kNIDsInShortNameOrder[] = { +@@ -8916,6 +8923,7 @@ static const uint16_t kNIDsInShortNameOrder[] = { 18 /* OU */, 749 /* Oakley-EC2N-3 */, 750 /* Oakley-EC2N-4 */, @@ -3761,17 +3956,18 @@ index 654b3c08e..06f80f971 100644 9 /* PBE-MD2-DES */, 168 /* PBE-MD2-RC2-64 */, 10 /* PBE-MD5-DES */, -@@ -8982,7 +8989,9 @@ static const uint16_t kNIDsInShortNameOrder[] = { +@@ -8982,7 +8990,10 @@ static const uint16_t kNIDsInShortNameOrder[] = { 458 /* UID */, 0 /* UNDEF */, 948 /* X25519 */, + 965 /* X25519Kyber512Draft00 */, 964 /* X25519Kyber768Draft00 */, + 967 /* X25519Kyber768Draft00Old */, ++ 968 /* X25519MLKEM768 */, 961 /* X448 */, 11 /* X500 */, 378 /* X500algorithms */, -@@ -9829,6 +9838,7 @@ static const uint16_t kNIDsInLongNameOrder[] = { +@@ -9829,6 +9840,7 @@ static const uint16_t kNIDsInLongNameOrder[] = { 366 /* OCSP Nonce */, 371 /* OCSP Service Locator */, 180 /* OCSP Signing */, @@ -3779,32 +3975,34 @@ index 654b3c08e..06f80f971 100644 161 /* PBES2 */, 69 /* PBKDF2 */, 162 /* PBMAC1 */, -@@ -9853,7 +9863,9 @@ static const uint16_t kNIDsInLongNameOrder[] = { +@@ -9853,7 +9865,10 @@ static const uint16_t kNIDsInLongNameOrder[] = { 133 /* Time Stamping */, 375 /* Trust Root */, 948 /* X25519 */, + 965 /* X25519Kyber512Draft00 */, 964 /* X25519Kyber768Draft00 */, + 967 /* X25519Kyber768Draft00Old */, ++ 968 /* X25519MLKEM768 */, 961 /* X448 */, 12 /* X509 */, 402 /* X509v3 AC Targeting */, diff --git a/src/crypto/obj/obj_mac.num b/src/crypto/obj/obj_mac.num -index a0519acee..caeb5eaed 100644 +index a0519acee..2a46adfe8 100644 --- a/src/crypto/obj/obj_mac.num +++ b/src/crypto/obj/obj_mac.num -@@ -952,3 +952,6 @@ X448 961 +@@ -952,3 +952,7 @@ X448 961 sha512_256 962 hkdf 963 X25519Kyber768Draft00 964 +X25519Kyber512Draft00 965 +P256Kyber768Draft00 966 +X25519Kyber768Draft00Old 967 ++X25519MLKEM768 968 diff --git a/src/crypto/obj/objects.txt b/src/crypto/obj/objects.txt -index 3ad32ea3d..aa1404d83 100644 +index 3ad32ea3d..347fc556a 100644 --- a/src/crypto/obj/objects.txt +++ b/src/crypto/obj/objects.txt -@@ -1332,8 +1332,11 @@ secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme +@@ -1332,8 +1332,12 @@ secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme : dh-std-kdf : dh-cofactor-kdf @@ -3814,11 +4012,12 @@ index 3ad32ea3d..aa1404d83 100644 : X25519Kyber768Draft00 + : P256Kyber768Draft00 + : X25519Kyber768Draft00Old ++ : X25519MLKEM768 # See RFC 8410. 1 3 101 110 : X25519 diff --git a/src/include/openssl/kyber.h b/src/include/openssl/kyber.h -index cafae9d17..074ac5906 100644 +index cafae9d17..a05eb8957 100644 --- a/src/include/openssl/kyber.h +++ b/src/include/openssl/kyber.h @@ -1,17 +1,3 @@ @@ -3839,7 +4038,7 @@ index cafae9d17..074ac5906 100644 #ifndef OPENSSL_HEADER_KYBER_H #define OPENSSL_HEADER_KYBER_H -@@ -21,105 +7,100 @@ +@@ -21,105 +7,104 @@ extern "C" { #endif @@ -3983,39 +4182,43 @@ index cafae9d17..074ac5906 100644 + +// KYBER512_encap is a deterministic function the generates and encrypts a random +// session key from the given entropy, writing those values to |out_shared_key| -+// and |out_ciphertext|, respectively. -+OPENSSL_EXPORT void KYBER512_encap(uint8_t out_ciphertext[KYBER512_CIPHERTEXT_BYTES], ++// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-512. ++OPENSSL_EXPORT int KYBER512_encap(uint8_t out_ciphertext[KYBER512_CIPHERTEXT_BYTES], + uint8_t out_shared_key[KYBER_KEY_BYTES], + const struct KYBER512_public_key *in_pub, -+ const uint8_t in[KYBER_ENCAP_BYTES]); ++ const uint8_t in[KYBER_ENCAP_BYTES], ++ int mlkem); + +// KYBER768_encap is a deterministic function the generates and encrypts a random +// session key from the given entropy, writing those values to |out_shared_key| -+// and |out_ciphertext|, respectively. -+OPENSSL_EXPORT void KYBER768_encap(uint8_t out_ciphertext[KYBER768_CIPHERTEXT_BYTES], ++// and |out_ciphertext|, respectively. If |mlkem| is 1, will use ML-KEM-768. ++OPENSSL_EXPORT int KYBER768_encap(uint8_t out_ciphertext[KYBER768_CIPHERTEXT_BYTES], + uint8_t out_shared_key[KYBER_KEY_BYTES], + const struct KYBER768_public_key *in_pub, -+ const uint8_t in[KYBER_ENCAP_BYTES]); ++ const uint8_t in[KYBER_ENCAP_BYTES], ++ int mlkem); + +// KYBER_decap decrypts a session key from |ciphertext_len| bytes of +// |ciphertext|. If the ciphertext is valid, the decrypted key is written to +// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept +// in |in_priv|) is written. If the ciphertext is the wrong length then it will +// leak which was done via side-channels. Otherwise it should perform either -+// action in constant-time. ++// action in constant-time. If |mlkem| is 1, will use ML-KEM-512. +OPENSSL_EXPORT void KYBER512_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], + const struct KYBER512_private_key *in_priv, -+ const uint8_t *ciphertext, size_t ciphertext_len); ++ const uint8_t *ciphertext, size_t ciphertext_len, ++ int mlkem); + +// KYBER_decap decrypts a session key from |ciphertext_len| bytes of +// |ciphertext|. If the ciphertext is valid, the decrypted key is written to +// |out_shared_key|. Otherwise a key dervied from |ciphertext| and a secret key (kept +// in |in_priv|) is written. If the ciphertext is the wrong length then it will +// leak which was done via side-channels. Otherwise it should perform either -+// action in constant-time. ++// action in constant-time. If |mlkem| is 1, will use ML-KEM-768. +OPENSSL_EXPORT void KYBER768_decap(uint8_t out_shared_key[KYBER_KEY_BYTES], + const struct KYBER768_private_key *in_priv, -+ const uint8_t *ciphertext, size_t ciphertext_len); ++ const uint8_t *ciphertext, size_t ciphertext_len, ++ int mlkem); + +// KYBER512_marshal_public_key serialises |in_pub| to |out|. +OPENSSL_EXPORT void KYBER512_marshal_public_key( @@ -4036,10 +4239,10 @@ index cafae9d17..074ac5906 100644 #if defined(__cplusplus) } // extern C diff --git a/src/include/openssl/nid.h b/src/include/openssl/nid.h -index 4dd8841b1..8237efb74 100644 +index 4dd8841b1..5b102c610 100644 --- a/src/include/openssl/nid.h +++ b/src/include/openssl/nid.h -@@ -4255,6 +4255,15 @@ extern "C" { +@@ -4255,6 +4255,18 @@ extern "C" { #define SN_X25519Kyber768Draft00 "X25519Kyber768Draft00" #define NID_X25519Kyber768Draft00 964 @@ -4051,21 +4254,25 @@ index 4dd8841b1..8237efb74 100644 + +#define SN_X25519Kyber768Draft00Old "X25519Kyber768Draft00Old" +#define NID_X25519Kyber768Draft00Old 967 ++ ++#define SN_X25519MLKEM768 "X25519MLKEM768" ++#define NID_X25519MLKEM768 968 + #if defined(__cplusplus) } /* extern C */ diff --git a/src/include/openssl/ssl.h b/src/include/openssl/ssl.h -index 53aa9b453..8233ad210 100644 +index 53aa9b453..f9683f4cf 100644 --- a/src/include/openssl/ssl.h +++ b/src/include/openssl/ssl.h -@@ -2378,6 +2378,9 @@ OPENSSL_EXPORT int SSL_set1_curves_list(SSL *ssl, const char *curves); +@@ -2378,6 +2378,10 @@ OPENSSL_EXPORT int SSL_set1_curves_list(SSL *ssl, const char *curves); #define SSL_CURVE_SECP521R1 25 #define SSL_CURVE_X25519 29 #define SSL_CURVE_X25519_KYBER768_DRAFT00 0x6399 +#define SSL_CURVE_X25519_KYBER512_DRAFT00 0xfe30 +#define SSL_CURVE_X25519_KYBER768_DRAFT00_OLD 0xfe31 +#define SSL_CURVE_P256_KYBER768_DRAFT00 0xfe32 ++#define SSL_CURVE_X25519_MLKEM768 0x11ec // SSL_get_curve_id returns the ID of the curve used by |ssl|'s most recently // completed handshake or 0 if not applicable. @@ -4083,21 +4290,22 @@ index 5c7e881bf..3c0770cf3 100644 crypto/pkcs8/test/no_encryption.p12 crypto/pkcs8/test/nss.p12 diff --git a/src/ssl/extensions.cc b/src/ssl/extensions.cc -index 5ee280221..0a706c411 100644 +index 5ee280221..aae3e6a7f 100644 --- a/src/ssl/extensions.cc +++ b/src/ssl/extensions.cc -@@ -207,6 +207,9 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { +@@ -207,6 +207,10 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { static bool is_post_quantum_group(uint16_t id) { switch (id) { case SSL_CURVE_X25519_KYBER768_DRAFT00: + case SSL_CURVE_X25519_KYBER768_DRAFT00_OLD: + case SSL_CURVE_X25519_KYBER512_DRAFT00: + case SSL_CURVE_P256_KYBER768_DRAFT00: ++ case SSL_CURVE_X25519_MLKEM768: return true; default: return false; diff --git a/src/ssl/ssl_key_share.cc b/src/ssl/ssl_key_share.cc -index 09a9ad380..f7d2226e3 100644 +index 09a9ad380..d7a8f0a80 100644 --- a/src/ssl/ssl_key_share.cc +++ b/src/ssl/ssl_key_share.cc @@ -26,6 +26,7 @@ @@ -4108,13 +4316,14 @@ index 09a9ad380..f7d2226e3 100644 #include #include #include -@@ -193,63 +194,384 @@ class X25519KeyShare : public SSLKeyShare { +@@ -193,63 +194,292 @@ class X25519KeyShare : public SSLKeyShare { uint8_t private_key_[32]; }; -class X25519Kyber768KeyShare : public SSLKeyShare { +class P256Kyber768Draft00KeyShare : public SSLKeyShare { -+ public: + public: +- X25519Kyber768KeyShare() {} + P256Kyber768Draft00KeyShare() {} + + uint16_t GroupID() const override { return SSL_CURVE_P256_KYBER768_DRAFT00; } @@ -4159,15 +4368,17 @@ index 09a9ad380..f7d2226e3 100644 + + uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; + KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); -+ + +- uint16_t GroupID() const override { +- return SSL_CURVE_X25519_KYBER768_DRAFT00; + if (!CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { + return false; + } + + return true; -+ } -+ + } + + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { + assert(!p256_private_key_); @@ -4247,7 +4458,10 @@ index 09a9ad380..f7d2226e3 100644 + uint8_t entropy[KYBER_ENCAP_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); + -+ KYBER768_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy); ++ if(!KYBER768_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy, 0)) { ++ *out_alert = SSL_AD_ILLEGAL_PARAMETER; ++ return false; ++ } + if(!CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { + return false; + } @@ -4318,7 +4532,7 @@ index 09a9ad380..f7d2226e3 100644 + } + + KYBER768_decap(secret.data() + 32, &kyber_private_key_, -+ peer_key.data() + 65, peer_key.size() - 65); ++ peer_key.data() + 65, peer_key.size() - 65, 0); + + *out_secret = std::move(secret); + return true; @@ -4330,14 +4544,138 @@ index 09a9ad380..f7d2226e3 100644 +}; + +class X25519Kyber768Draft00KeyShare : public SSLKeyShare { - public: -- X25519Kyber768KeyShare() {} ++ public: + X25519Kyber768Draft00KeyShare(uint16_t group_id) : group_id_(group_id) { + assert(group_id == SSL_CURVE_X25519_KYBER768_DRAFT00 + || group_id == SSL_CURVE_X25519_KYBER768_DRAFT00_OLD); + } + + uint16_t GroupID() const override { return group_id_; } ++ + bool Generate(CBB *out) override { + uint8_t x25519_public_key[32]; + X25519_keypair(x25519_public_key, x25519_private_key_); + +- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; +- KYBER_generate_key(kyber_public_key, &kyber_private_key_); ++ uint8_t kyber_entropy[KYBER_GENERATE_KEY_BYTES]; ++ KYBER768_public_key kyber_public_key; ++ RAND_bytes(kyber_entropy, sizeof(kyber_entropy)); ++ KYBER768_generate_key(&kyber_public_key, &kyber_private_key_, kyber_entropy); ++ ++ uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; ++ KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); + + if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || +- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { ++ !CBB_add_bytes(out, kyber_public_key_bytes, ++ sizeof(kyber_public_key_bytes))) { + return false; + } + + return true; + } + +- bool Encap(CBB *out_ciphertext, Array *out_secret, +- uint8_t *out_alert, Span peer_key) override { ++ bool Encap(CBB *out_public_key, Array *out_secret, ++ uint8_t *out_alert, Span peer_key) override { + Array secret; +- if (!secret.Init(32 + 32)) { ++ if (!secret.Init(32 + KYBER_KEY_BYTES)) { ++ OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); + return false; + } + + uint8_t x25519_public_key[32]; + X25519_keypair(x25519_public_key, x25519_private_key_); +- KYBER_public_key peer_kyber_pub; +- CBS peer_key_cbs; +- CBS peer_x25519_cbs; +- CBS peer_kyber_cbs; +- CBS_init(&peer_key_cbs, peer_key.data(), peer_key.size()); +- if (!CBS_get_bytes(&peer_key_cbs, &peer_x25519_cbs, 32) || +- !CBS_get_bytes(&peer_key_cbs, &peer_kyber_cbs, +- KYBER_PUBLIC_KEY_BYTES) || +- CBS_len(&peer_key_cbs) != 0 || +- !X25519(secret.data(), x25519_private_key_, +- CBS_data(&peer_x25519_cbs)) || +- !KYBER_parse_public_key(&peer_kyber_pub, &peer_kyber_cbs)) { ++ ++ KYBER768_public_key peer_public_key; ++ if (peer_key.size() != 32 + KYBER768_PUBLIC_KEY_BYTES) { ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ ++ KYBER768_parse_public_key(&peer_public_key, peer_key.data() + 32); ++ ++ if (!X25519(secret.data(), x25519_private_key_, peer_key.data())) { + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; + } + +- uint8_t kyber_ciphertext[KYBER_CIPHERTEXT_BYTES]; +- KYBER_encap(kyber_ciphertext, secret.data() + 32, secret.size() - 32, +- &peer_kyber_pub); ++ uint8_t ciphertext[KYBER768_CIPHERTEXT_BYTES]; ++ uint8_t entropy[KYBER_ENCAP_BYTES]; ++ RAND_bytes(entropy, sizeof(entropy)); + +- if (!CBB_add_bytes(out_ciphertext, x25519_public_key, ++ if(!KYBER768_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy, 0)) { ++ *out_alert = SSL_AD_ILLEGAL_PARAMETER; ++ return false; ++ } ++ if(!CBB_add_bytes(out_public_key, x25519_public_key, + sizeof(x25519_public_key)) || +- !CBB_add_bytes(out_ciphertext, kyber_ciphertext, +- sizeof(kyber_ciphertext))) { ++ !CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { + return false; + } + +@@ -258,30 +488,233 @@ class X25519Kyber768KeyShare : public SSLKeyShare { + } + + bool Decap(Array *out_secret, uint8_t *out_alert, +- Span ciphertext) override { ++ Span peer_key) override { ++ *out_alert = SSL_AD_INTERNAL_ERROR; ++ ++ Array secret; ++ if (!secret.Init(32 + KYBER_KEY_BYTES)) { ++ OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); ++ return false; ++ } ++ ++ if (peer_key.size() != 32 + KYBER768_CIPHERTEXT_BYTES || ++ !X25519(secret.data(), x25519_private_key_, peer_key.data())) { ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ ++ KYBER768_decap(secret.data() + 32, &kyber_private_key_, ++ peer_key.data() + 32, peer_key.size() - 32, 0); ++ ++ *out_secret = std::move(secret); ++ return true; ++ } ++ ++ private: ++ uint8_t x25519_private_key_[32]; ++ KYBER768_private_key kyber_private_key_; ++ uint16_t group_id_; ++}; ++ ++class X25519MLKEM768KeyShare : public SSLKeyShare { ++ public: ++ X25519MLKEM768KeyShare() {} ++ ++ uint16_t GroupID() const override { return SSL_CURVE_X25519_MLKEM768; } + + bool Generate(CBB *out) override { + uint8_t x25519_public_key[32]; @@ -4351,9 +4689,8 @@ index 09a9ad380..f7d2226e3 100644 + uint8_t kyber_public_key_bytes[KYBER768_PUBLIC_KEY_BYTES]; + KYBER768_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); + -+ if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || -+ !CBB_add_bytes(out, kyber_public_key_bytes, -+ sizeof(kyber_public_key_bytes))) { ++ if (!CBB_add_bytes(out, kyber_public_key_bytes, sizeof(kyber_public_key_bytes)) || ++ !CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key))) { + return false; + } + @@ -4372,15 +4709,16 @@ index 09a9ad380..f7d2226e3 100644 + X25519_keypair(x25519_public_key, x25519_private_key_); + + KYBER768_public_key peer_public_key; -+ if (peer_key.size() != 32 + KYBER768_PUBLIC_KEY_BYTES) { ++ if (peer_key.size() != KYBER768_PUBLIC_KEY_BYTES + 32) { + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; + } + -+ KYBER768_parse_public_key(&peer_public_key, peer_key.data() + 32); ++ KYBER768_parse_public_key(&peer_public_key, peer_key.data()); + -+ if (!X25519(secret.data(), x25519_private_key_, peer_key.data())) { ++ if (!X25519(secret.data() + 32, x25519_private_key_, ++ peer_key.data() + KYBER768_PUBLIC_KEY_BYTES)) { + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; @@ -4390,10 +4728,12 @@ index 09a9ad380..f7d2226e3 100644 + uint8_t entropy[KYBER_ENCAP_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); + -+ KYBER768_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy); -+ if(!CBB_add_bytes(out_public_key, x25519_public_key, -+ sizeof(x25519_public_key)) || -+ !CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { ++ if(!KYBER768_encap(ciphertext, secret.data(), &peer_public_key, entropy, 1)) { ++ *out_alert = SSL_AD_ILLEGAL_PARAMETER; ++ return false; ++ } ++ if(!CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext)) || ++ !CBB_add_bytes(out_public_key, x25519_public_key, sizeof(x25519_public_key))) { + return false; + } + @@ -4403,34 +4743,37 @@ index 09a9ad380..f7d2226e3 100644 + + bool Decap(Array *out_secret, uint8_t *out_alert, + Span peer_key) override { -+ *out_alert = SSL_AD_INTERNAL_ERROR; -+ -+ Array secret; + *out_alert = SSL_AD_INTERNAL_ERROR; + + Array secret; +- if (!secret.Init(32 + 32)) { + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); -+ return false; -+ } -+ -+ if (peer_key.size() != 32 + KYBER768_CIPHERTEXT_BYTES || -+ !X25519(secret.data(), x25519_private_key_, peer_key.data())) { -+ *out_alert = SSL_AD_DECODE_ERROR; -+ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); -+ return false; -+ } -+ -+ KYBER768_decap(secret.data() + 32, &kyber_private_key_, -+ peer_key.data() + 32, peer_key.size() - 32); + return false; + } -- uint16_t GroupID() const override { -- return SSL_CURVE_X25519_KYBER768_DRAFT00; +- if (ciphertext.size() != 32 + KYBER_CIPHERTEXT_BYTES || +- !X25519(secret.data(), x25519_private_key_, ciphertext.data())) { ++ if (peer_key.size() != KYBER768_CIPHERTEXT_BYTES + 32 || ++ !X25519(secret.data() + 32, x25519_private_key_, ++ peer_key.data() + KYBER768_CIPHERTEXT_BYTES )) { + *out_alert = SSL_AD_DECODE_ERROR; + OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); + return false; + } + +- KYBER_decap(secret.data() + 32, secret.size() - 32, ciphertext.data() + 32, +- &kyber_private_key_); ++ KYBER768_decap(secret.data(), &kyber_private_key_, ++ peer_key.data(), peer_key.size() - 32, 1); ++ + *out_secret = std::move(secret); + return true; - } - ++ } ++ + private: + uint8_t x25519_private_key_[32]; + KYBER768_private_key kyber_private_key_; -+ uint16_t group_id_; +}; + +class X25519Kyber512Draft00KeyShare : public SSLKeyShare { @@ -4439,12 +4782,10 @@ index 09a9ad380..f7d2226e3 100644 + + uint16_t GroupID() const override { return SSL_CURVE_X25519_KYBER512_DRAFT00; } + - bool Generate(CBB *out) override { - uint8_t x25519_public_key[32]; - X25519_keypair(x25519_public_key, x25519_private_key_); - -- uint8_t kyber_public_key[KYBER_PUBLIC_KEY_BYTES]; -- KYBER_generate_key(kyber_public_key, &kyber_private_key_); ++ bool Generate(CBB *out) override { ++ uint8_t x25519_public_key[32]; ++ X25519_keypair(x25519_public_key, x25519_private_key_); ++ + uint8_t kyber_entropy[KYBER_GENERATE_KEY_BYTES]; + KYBER512_public_key kyber_public_key; + RAND_bytes(kyber_entropy, sizeof(kyber_entropy)); @@ -4452,42 +4793,26 @@ index 09a9ad380..f7d2226e3 100644 + + uint8_t kyber_public_key_bytes[KYBER512_PUBLIC_KEY_BYTES]; + KYBER512_marshal_public_key(kyber_public_key_bytes, &kyber_public_key); - - if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || -- !CBB_add_bytes(out, kyber_public_key, sizeof(kyber_public_key))) { ++ ++ if (!CBB_add_bytes(out, x25519_public_key, sizeof(x25519_public_key)) || + !CBB_add_bytes(out, kyber_public_key_bytes, + sizeof(kyber_public_key_bytes))) { - return false; - } - - return true; - } - -- bool Encap(CBB *out_ciphertext, Array *out_secret, -- uint8_t *out_alert, Span peer_key) override { ++ return false; ++ } ++ ++ return true; ++ } ++ + bool Encap(CBB *out_public_key, Array *out_secret, + uint8_t *out_alert, Span peer_key) override { - Array secret; -- if (!secret.Init(32 + 32)) { ++ Array secret; + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return false; - } - - uint8_t x25519_public_key[32]; - X25519_keypair(x25519_public_key, x25519_private_key_); -- KYBER_public_key peer_kyber_pub; -- CBS peer_key_cbs; -- CBS peer_x25519_cbs; -- CBS peer_kyber_cbs; -- CBS_init(&peer_key_cbs, peer_key.data(), peer_key.size()); -- if (!CBS_get_bytes(&peer_key_cbs, &peer_x25519_cbs, 32) || -- !CBS_get_bytes(&peer_key_cbs, &peer_kyber_cbs, -- KYBER_PUBLIC_KEY_BYTES) || -- CBS_len(&peer_key_cbs) != 0 || -- !X25519(secret.data(), x25519_private_key_, -- CBS_data(&peer_x25519_cbs)) || -- !KYBER_parse_public_key(&peer_kyber_pub, &peer_kyber_cbs)) { ++ return false; ++ } ++ ++ uint8_t x25519_public_key[32]; ++ X25519_keypair(x25519_public_key, x25519_private_key_); + + KYBER512_public_key peer_public_key; + if (peer_key.size() != 32 + KYBER512_PUBLIC_KEY_BYTES) { @@ -4499,56 +4824,48 @@ index 09a9ad380..f7d2226e3 100644 + KYBER512_parse_public_key(&peer_public_key, peer_key.data() + 32); + + if (!X25519(secret.data(), x25519_private_key_, peer_key.data())) { - *out_alert = SSL_AD_DECODE_ERROR; - OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); - return false; - } - -- uint8_t kyber_ciphertext[KYBER_CIPHERTEXT_BYTES]; -- KYBER_encap(kyber_ciphertext, secret.data() + 32, secret.size() - 32, -- &peer_kyber_pub); ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ + uint8_t ciphertext[KYBER512_CIPHERTEXT_BYTES]; + uint8_t entropy[KYBER_ENCAP_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); - -- if (!CBB_add_bytes(out_ciphertext, x25519_public_key, -+ KYBER512_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy); ++ ++ if(!KYBER512_encap(ciphertext, secret.data() + 32, &peer_public_key, entropy, 0)) { ++ *out_alert = SSL_AD_ILLEGAL_PARAMETER; ++ return false; ++ } + if(!CBB_add_bytes(out_public_key, x25519_public_key, - sizeof(x25519_public_key)) || -- !CBB_add_bytes(out_ciphertext, kyber_ciphertext, -- sizeof(kyber_ciphertext))) { ++ sizeof(x25519_public_key)) || + !CBB_add_bytes(out_public_key, ciphertext, sizeof(ciphertext))) { - return false; - } - -@@ -258,30 +580,32 @@ class X25519Kyber768KeyShare : public SSLKeyShare { - } - - bool Decap(Array *out_secret, uint8_t *out_alert, -- Span ciphertext) override { ++ return false; ++ } ++ ++ *out_secret = std::move(secret); ++ return true; ++ } ++ ++ bool Decap(Array *out_secret, uint8_t *out_alert, + Span peer_key) override { - *out_alert = SSL_AD_INTERNAL_ERROR; - - Array secret; -- if (!secret.Init(32 + 32)) { ++ *out_alert = SSL_AD_INTERNAL_ERROR; ++ ++ Array secret; + if (!secret.Init(32 + KYBER_KEY_BYTES)) { + OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return false; - } - -- if (ciphertext.size() != 32 + KYBER_CIPHERTEXT_BYTES || -- !X25519(secret.data(), x25519_private_key_, ciphertext.data())) { ++ return false; ++ } ++ + if (peer_key.size() != 32 + KYBER512_CIPHERTEXT_BYTES || + !X25519(secret.data(), x25519_private_key_, peer_key.data())) { - *out_alert = SSL_AD_DECODE_ERROR; - OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); - return false; - } - -- KYBER_decap(secret.data() + 32, secret.size() - 32, ciphertext.data() + 32, -- &kyber_private_key_); ++ *out_alert = SSL_AD_DECODE_ERROR; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_BAD_ECPOINT); ++ return false; ++ } ++ + KYBER512_decap(secret.data() + 32, &kyber_private_key_, -+ peer_key.data() + 32, peer_key.size() - 32); ++ peer_key.data() + 32, peer_key.size() - 32, 0); + *out_secret = std::move(secret); return true; @@ -4561,7 +4878,7 @@ index 09a9ad380..f7d2226e3 100644 }; constexpr NamedGroup kNamedGroups[] = { -@@ -290,8 +614,14 @@ constexpr NamedGroup kNamedGroups[] = { +@@ -290,8 +723,16 @@ constexpr NamedGroup kNamedGroups[] = { {NID_secp384r1, SSL_CURVE_SECP384R1, "P-384", "secp384r1"}, {NID_secp521r1, SSL_CURVE_SECP521R1, "P-521", "secp521r1"}, {NID_X25519, SSL_CURVE_X25519, "X25519", "x25519"}, @@ -4573,11 +4890,13 @@ index 09a9ad380..f7d2226e3 100644 + {NID_X25519Kyber768Draft00Old, SSL_CURVE_X25519_KYBER768_DRAFT00_OLD, + "X25519Kyber768Draft00Old", "Xyber768D00Old"}, + {NID_P256Kyber768Draft00, SSL_CURVE_P256_KYBER768_DRAFT00, -+ "P256Kyber768Draft00", "P256Kyber768D00"} ++ "P256Kyber768Draft00", "P256Kyber768D00"}, ++ {NID_X25519MLKEM768, SSL_CURVE_X25519_MLKEM768, ++ "X25519MLKEM768", "X25519MLKEM768"} }; } // namespace -@@ -312,8 +642,16 @@ UniquePtr SSLKeyShare::Create(uint16_t group_id) { +@@ -312,8 +753,18 @@ UniquePtr SSLKeyShare::Create(uint16_t group_id) { return MakeUnique(NID_secp521r1, SSL_CURVE_SECP521R1); case SSL_CURVE_X25519: return MakeUnique(); @@ -4592,6 +4911,8 @@ index 09a9ad380..f7d2226e3 100644 + group_id)); + case SSL_CURVE_P256_KYBER768_DRAFT00: + return UniquePtr(New()); ++ case SSL_CURVE_X25519_MLKEM768: ++ return UniquePtr(New()); default: return nullptr; } @@ -4609,10 +4930,10 @@ index 838761af5..9eb201d37 100644 static const uint16_t kSigAlgs[] = { SSL_SIGN_RSA_PKCS1_SHA256, diff --git a/src/ssl/ssl_test.cc b/src/ssl/ssl_test.cc -index ef43a9e98..9756fd2a0 100644 +index ef43a9e98..22178b5f6 100644 --- a/src/ssl/ssl_test.cc +++ b/src/ssl/ssl_test.cc -@@ -409,7 +409,30 @@ static const CurveTest kCurveTests[] = { +@@ -409,7 +409,34 @@ static const CurveTest kCurveTests[] = { "P-256:X25519Kyber768Draft00", { SSL_CURVE_SECP256R1, SSL_CURVE_X25519_KYBER768_DRAFT00 }, }, @@ -4638,6 +4959,10 @@ index ef43a9e98..9756fd2a0 100644 + { SSL_CURVE_P256_KYBER768_DRAFT00 }, + }, + { ++ "X25519MLKEM768", ++ { SSL_CURVE_X25519_MLKEM768 }, ++ }, ++ { + "P-256:P256Kyber768D00", + { SSL_CURVE_SECP256R1, SSL_CURVE_P256_KYBER768_DRAFT00 }, + }, @@ -4645,7 +4970,7 @@ index ef43a9e98..9756fd2a0 100644 "P-256:P-384:P-521:X25519", { diff --git a/src/tool/speed.cc b/src/tool/speed.cc -index 5b0205953..831875514 100644 +index 5b0205953..6b3c67dab 100644 --- a/src/tool/speed.cc +++ b/src/tool/speed.cc @@ -904,6 +904,116 @@ static bool SpeedScrypt(const std::string &selected) { @@ -4684,7 +5009,7 @@ index 5b0205953..831875514 100644 + uint8_t entropy[KYBER_ENCAP_BYTES]; + uint8_t shared_key[KYBER_KEY_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); -+ KYBER768_encap(ciphertext, shared_key, &pub, entropy); ++ KYBER768_encap(ciphertext, shared_key, &pub, entropy, 0); + return true; + })) { + fprintf(stderr, "Failed to time KYBER768_encap.\n"); @@ -4695,7 +5020,7 @@ index 5b0205953..831875514 100644 + + if (!TimeFunction(&results, [&priv, &ciphertext]() -> bool { + uint8_t shared_key[KYBER_KEY_BYTES]; -+ KYBER768_decap(shared_key, &priv, ciphertext, sizeof(ciphertext)); ++ KYBER768_decap(shared_key, &priv, ciphertext, sizeof(ciphertext), 0); + return true; + })) { + fprintf(stderr, "Failed to time KYBER768_decap.\n"); @@ -4739,7 +5064,7 @@ index 5b0205953..831875514 100644 + uint8_t entropy[KYBER_ENCAP_BYTES]; + uint8_t shared_key[KYBER_KEY_BYTES]; + RAND_bytes(entropy, sizeof(entropy)); -+ KYBER512_encap(ciphertext, shared_key, &pub, entropy); ++ KYBER512_encap(ciphertext, shared_key, &pub, entropy, 0); + return true; + })) { + fprintf(stderr, "Failed to time KYBER512_encap.\n"); @@ -4750,7 +5075,7 @@ index 5b0205953..831875514 100644 + + if (!TimeFunction(&results, [&priv, &ciphertext]() -> bool { + uint8_t shared_key[KYBER_KEY_BYTES]; -+ KYBER512_decap(shared_key, &priv, ciphertext, sizeof(ciphertext)); ++ KYBER512_decap(shared_key, &priv, ciphertext, sizeof(ciphertext), 0); + return true; + })) { + fprintf(stderr, "Failed to time KYBER512_decap.\n"); @@ -4832,5 +5157,5 @@ index 5b0205953..831875514 100644 !SpeedTrustToken("TrustToken-Exp1-Batch1", TRUST_TOKEN_experiment_v1(), 1, selected) || -- -2.41.0 +2.46.0 diff --git a/boring/src/lib.rs b/boring/src/lib.rs index 6b3ade0f..6779586a 100644 --- a/boring/src/lib.rs +++ b/boring/src/lib.rs @@ -74,9 +74,11 @@ //! support by turning on `post-quantum` compilation feature. //! //! Upstream BoringSSL support the post-quantum hybrid key agreement `X25519Kyber768Draft00`. Most -//! users should stick to that one. Enabling this feature, adds a few other post-quantum key +//! users should stick to that one for now. Enabling this feature, adds a few other post-quantum key //! agreements: //! +//! - `X25519MLKEM768` is the successor of `X25519Kyber768Draft00`. We expect servers to switch +//! before the end of 2024. //! - `X25519Kyber768Draft00Old` is the same as `X25519Kyber768Draft00`, but under its old codepoint. //! - `X25519Kyber512Draft00`. Similar to `X25519Kyber768Draft00`, but uses level 1 parameter set for //! Kyber. Not recommended. It's useful to test whether the shorter ClientHello upsets fewer middle diff --git a/boring/src/ssl/mod.rs b/boring/src/ssl/mod.rs index 283c90b1..52d07c3c 100644 --- a/boring/src/ssl/mod.rs +++ b/boring/src/ssl/mod.rs @@ -765,6 +765,8 @@ impl SslCurve { ffi::SSL_CURVE_X25519_KYBER512_DRAFT00 => Some(ffi::NID_X25519Kyber512Draft00), #[cfg(feature = "pq-experimental")] ffi::SSL_CURVE_P256_KYBER768_DRAFT00 => Some(ffi::NID_P256Kyber768Draft00), + #[cfg(feature = "pq-experimental")] + ffi::SSL_CURVE_X25519_MLKEM768 => Some(ffi::NID_X25519MLKEM768), _ => None, } } @@ -2602,13 +2604,13 @@ impl SslRef { if cfg!(feature = "kx-client-nist-required") { "P256Kyber768Draft00:P-256:P-384:P-521" } else { - "X25519Kyber768Draft00:X25519:P256Kyber768Draft00:P-256:P-384:P-521" + "X25519Kyber768Draft00:X25519MLKEM768:X25519:P256Kyber768Draft00:P-256:P-384:P-521" } } else if cfg!(feature = "kx-client-pq-supported") { if cfg!(feature = "kx-client-nist-required") { "P-256:P-384:P-521:P256Kyber768Draft00" } else { - "X25519:P-256:P-384:P-521:X25519Kyber768Draft00:P256Kyber768Draft00" + "X25519:P-256:P-384:P-521:X25519MLKEM768:X25519Kyber768Draft00:P256Kyber768Draft00" } } else { if cfg!(feature = "kx-client-nist-required") { @@ -2624,8 +2626,10 @@ impl SslRef { #[cfg(feature = "kx-safe-default")] fn server_set_default_curves_list(&mut self) { - self.set_curves_list("X25519Kyber768Draft00:P256Kyber768Draft00:X25519:P-256:P-384") - .expect("invalid default server curves list"); + self.set_curves_list( + "X25519Kyber768Draft00:X25519MLKEM768:P256Kyber768Draft00:X25519:P-256:P-384", + ) + .expect("invalid default server curves list"); } /// Returns the [`SslCurve`] used for this `SslRef`.