diff --git a/.github/workflows/c.yml b/.github/workflows/c.yml index c3535185b..5345cad14 100644 --- a/.github/workflows/c.yml +++ b/.github/workflows/c.yml @@ -118,7 +118,7 @@ jobs: - name: 🔨 Build run: | - cmake -B build + LIBCRUX_BENCHMARKS=1 cmake -B build cmake --build build - name: 🏃🏻‍♀️ Test @@ -132,7 +132,7 @@ jobs: - name: 🔨 Build Release run: | rm -rf build - cmake -B build -DCMAKE_BUILD_TYPE=Release + LIBCRUX_BENCHMARKS=1 cmake -B build -DCMAKE_BUILD_TYPE=Release cmake --build build --config Release if: ${{ matrix.os != 'windows-latest' }} @@ -159,13 +159,6 @@ jobs: cmake -B build cmake --build build # FIXME: Benchmark build for cg on Windows CI is not working right now. - if: ${{ matrix.os != 'windows-latest' }} - - # FIXME: Benchmark build for cg on Windows CI are not working right now. - # - name: 🏃🏻‍♀️ Test (cg) - # working-directory: libcrux-ml-kem/cg - # run: ./build/Debug/ml_kem_test - # if: ${{ matrix.os == 'windows-latest' }} - name: 🏃🏻‍♀️ Test run: ./build/ml_kem_test diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml new file mode 100644 index 000000000..e76c37b62 --- /dev/null +++ b/.github/workflows/s390x.yml @@ -0,0 +1,44 @@ +name: s390x - Build & Test + +on: + push: + pull_request: + branches: ["main", "dev"] + workflow_dispatch: + merge_group: + +env: + CARGO_TERM_COLOR: always + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + s390x: + runs-on: ubuntu-latest + name: Build on ubuntu-22.04 s390x + steps: + - uses: actions/checkout@v4 + - uses: uraimo/run-on-arch-action@v2 + name: Run + id: runcmd + with: + arch: s390x + distro: ubuntu22.04 + + # Speed up builds by storing container images in + # a GitHub package registry. + githubToken: ${{ github.token }} + + run: | + apt-get -y update + apt-get install -y curl gcc g++ make cmake ninja-build git + cd libcrux-ml-kem/c + cmake -B build -G"Ninja Multi-Config" + cmake --build build + ./build/Debug/ml_kem_test + cd ../cg + cmake -B build -G"Ninja Multi-Config" + cmake --build build + ./build/Debug/ml_kem_test diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Neon.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Neon.fsti index 6805e0d00..9ad6829f1 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Neon.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Neon.fsti @@ -24,19 +24,19 @@ val shake256_x4 Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_block_x4 (x: t_Shake256x4) +val squeeze_first_block_x4 (state: t_Shake256x4) : Prims.Pure (t_Shake256x4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_five_blocks (x: t_Shake128x4) (out0 out1 out2 out3: t_Array u8 (sz 840)) +val squeeze_first_five_blocks (state: t_Shake128x4) (out0 out1 out2 out3: t_Array u8 (sz 840)) : Prims.Pure (t_Shake128x4 & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840)) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_next_block (x: t_Shake128x4) +val squeeze_next_block (state: t_Shake128x4) : Prims.Pure (t_Shake128x4 & (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) @@ -140,7 +140,7 @@ let impl: Libcrux_ml_dsa.Hash_functions.Shake128.t_XofX4 t_Shake128x4 = (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) } -val squeeze_next_block_x4 (x: t_Shake256x4) +val squeeze_next_block_x4 (state: t_Shake256x4) : Prims.Pure (t_Shake256x4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Portable.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Portable.fsti index 2d75db5dd..19bf6bae1 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Portable.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Portable.fsti @@ -21,6 +21,9 @@ val t_Shake256Absorb:Type0 val t_Shake256Squeeze:Type0 +val init_absorb__init_absorb (input: t_Slice u8) + : Prims.Pure Libcrux_sha3.Portable.t_KeccakState Prims.l_True (fun _ -> Prims.l_True) + val init_absorb (input0 input1 input2 input3: t_Slice u8) : Prims.Pure t_Shake128X4 Prims.l_True (fun _ -> Prims.l_True) @@ -69,22 +72,22 @@ val shake256_init: Prims.unit -> Prims.Pure t_Shake256Absorb Prims.l_True (fun _ val shake256_squeeze (st: t_Shake256Squeeze) (out: t_Slice u8) : Prims.Pure (t_Shake256Squeeze & t_Slice u8) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_block_shake256 (x: t_Shake256) +val squeeze_first_block_shake256 (state: t_Shake256) : Prims.Pure (t_Shake256 & t_Array u8 (sz 136)) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_block_x4 (x: t_Shake256X4) +val squeeze_first_block_x4 (state: t_Shake256X4) : Prims.Pure (t_Shake256X4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_five_blocks (x: t_Shake128X4) (out0 out1 out2 out3: t_Array u8 (sz 840)) +val squeeze_first_five_blocks (state: t_Shake128X4) (out0 out1 out2 out3: t_Array u8 (sz 840)) : Prims.Pure (t_Shake128X4 & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840)) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_next_block (x: t_Shake128X4) +val squeeze_next_block (state: t_Shake128X4) : Prims.Pure (t_Shake128X4 & (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) @@ -188,7 +191,7 @@ let impl: Libcrux_ml_dsa.Hash_functions.Shake128.t_XofX4 t_Shake128X4 = (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) } -val squeeze_next_block_shake256 (x: t_Shake256) +val squeeze_next_block_shake256 (state: t_Shake256) : Prims.Pure (t_Shake256 & t_Array u8 (sz 136)) Prims.l_True (fun _ -> Prims.l_True) [@@ FStar.Tactics.Typeclasses.tcinstance] @@ -238,7 +241,7 @@ let impl_2: Libcrux_ml_dsa.Hash_functions.Shake256.t_Xof t_Shake256 = self, hax_temp_output <: (t_Shake256 & t_Array u8 (sz 136)) } -val squeeze_next_block_x4 (x: t_Shake256X4) +val squeeze_next_block_x4 (state: t_Shake256X4) : Prims.Pure (t_Shake256X4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) diff --git a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Simd256.fsti b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Simd256.fsti index 3ff04ac43..a9b24b26a 100644 --- a/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Simd256.fsti +++ b/libcrux-ml-dsa/proofs/fstar/extraction/Libcrux_ml_dsa.Hash_functions.Simd256.fsti @@ -27,19 +27,19 @@ val shake256_x4 Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_block_x4 (x: t_Shake256x4) +val squeeze_first_block_x4 (state: t_Shake256x4) : Prims.Pure (t_Shake256x4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_first_five_blocks (x: t_Shake128x4) (out0 out1 out2 out3: t_Array u8 (sz 840)) +val squeeze_first_five_blocks (state: t_Shake128x4) (out0 out1 out2 out3: t_Array u8 (sz 840)) : Prims.Pure (t_Shake128x4 & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840)) Prims.l_True (fun _ -> Prims.l_True) -val squeeze_next_block (x: t_Shake128x4) +val squeeze_next_block (state: t_Shake128x4) : Prims.Pure (t_Shake128x4 & (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) @@ -143,7 +143,7 @@ let impl: Libcrux_ml_dsa.Hash_functions.Shake128.t_XofX4 t_Shake128x4 = (t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168) & t_Array u8 (sz 168))) } -val squeeze_next_block_x4 (x: t_Shake256x4) +val squeeze_next_block_x4 (state: t_Shake256x4) : Prims.Pure (t_Shake256x4 & (t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136) & t_Array u8 (sz 136))) diff --git a/libcrux-ml-dsa/src/hash_functions.rs b/libcrux-ml-dsa/src/hash_functions.rs index 028b1906c..11b2461c1 100644 --- a/libcrux-ml-dsa/src/hash_functions.rs +++ b/libcrux-ml-dsa/src/hash_functions.rs @@ -76,19 +76,19 @@ pub(crate) mod shake128 { /// A portable implementation of [`shake128::Xof`] and [`shake256::Xof`]. pub(crate) mod portable { - use libcrux_sha3::portable::incremental; - use super::{shake128, shake256}; + use libcrux_sha3::portable::incremental; + use libcrux_sha3::portable::KeccakState; /// Portable SHAKE 128 x4 state. /// /// We're using a portable implementation so this is actually sequential. #[cfg_attr(hax, hax_lib::opaque_type)] pub(crate) struct Shake128X4 { - state0: libcrux_sha3::portable::KeccakState, - state1: libcrux_sha3::portable::KeccakState, - state2: libcrux_sha3::portable::KeccakState, - state3: libcrux_sha3::portable::KeccakState, + state0: KeccakState, + state1: KeccakState, + state2: KeccakState, + state3: KeccakState, } fn init_absorb(input0: &[u8], input1: &[u8], input2: &[u8], input3: &[u8]) -> Shake128X4 { @@ -113,20 +113,20 @@ pub(crate) mod portable { } fn squeeze_first_five_blocks( - x: &mut Shake128X4, + state: &mut Shake128X4, out0: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out1: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out2: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out3: &mut [u8; shake128::FIVE_BLOCKS_SIZE], ) { - incremental::shake128_squeeze_first_five_blocks(&mut x.state0, out0); - incremental::shake128_squeeze_first_five_blocks(&mut x.state1, out1); - incremental::shake128_squeeze_first_five_blocks(&mut x.state2, out2); - incremental::shake128_squeeze_first_five_blocks(&mut x.state3, out3); + incremental::shake128_squeeze_first_five_blocks(&mut state.state0, out0); + incremental::shake128_squeeze_first_five_blocks(&mut state.state1, out1); + incremental::shake128_squeeze_first_five_blocks(&mut state.state2, out2); + incremental::shake128_squeeze_first_five_blocks(&mut state.state3, out3); } fn squeeze_next_block( - x: &mut Shake128X4, + state: &mut Shake128X4, ) -> ( [u8; shake128::BLOCK_SIZE], [u8; shake128::BLOCK_SIZE], @@ -134,13 +134,13 @@ pub(crate) mod portable { [u8; shake128::BLOCK_SIZE], ) { let mut out0 = [0u8; shake128::BLOCK_SIZE]; - incremental::shake128_squeeze_next_block(&mut x.state0, &mut out0); + incremental::shake128_squeeze_next_block(&mut state.state0, &mut out0); let mut out1 = [0u8; shake128::BLOCK_SIZE]; - incremental::shake128_squeeze_next_block(&mut x.state1, &mut out1); + incremental::shake128_squeeze_next_block(&mut state.state1, &mut out1); let mut out2 = [0u8; shake128::BLOCK_SIZE]; - incremental::shake128_squeeze_next_block(&mut x.state2, &mut out2); + incremental::shake128_squeeze_next_block(&mut state.state2, &mut out2); let mut out3 = [0u8; shake128::BLOCK_SIZE]; - incremental::shake128_squeeze_next_block(&mut x.state3, &mut out3); + incremental::shake128_squeeze_next_block(&mut state.state3, &mut out3); (out0, out1, out2, out3) } @@ -187,7 +187,7 @@ pub(crate) mod portable { /// Portable SHAKE 256 state #[cfg_attr(hax, hax_lib::opaque_type)] pub(crate) struct Shake256 { - state: libcrux_sha3::portable::KeccakState, + state: KeccakState, } fn shake256(input: &[u8], out: &mut [u8; OUTPUT_LENGTH]) { @@ -200,15 +200,15 @@ pub(crate) mod portable { Shake256 { state } } - fn squeeze_first_block_shake256(x: &mut Shake256) -> [u8; shake256::BLOCK_SIZE] { + fn squeeze_first_block_shake256(state: &mut Shake256) -> [u8; shake256::BLOCK_SIZE] { let mut out = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_first_block(&mut x.state, &mut out); + incremental::shake256_squeeze_first_block(&mut state.state, &mut out); out } - fn squeeze_next_block_shake256(x: &mut Shake256) -> [u8; shake256::BLOCK_SIZE] { + fn squeeze_next_block_shake256(state: &mut Shake256) -> [u8; shake256::BLOCK_SIZE] { let mut out = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_next_block(&mut x.state, &mut out); + incremental::shake256_squeeze_next_block(&mut state.state, &mut out); out } @@ -262,7 +262,7 @@ pub(crate) mod portable { } fn squeeze_first_block_x4( - x: &mut Shake256X4, + state: &mut Shake256X4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -270,19 +270,19 @@ pub(crate) mod portable { [u8; shake256::BLOCK_SIZE], ) { let mut out0 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_first_block(&mut x.state0, &mut out0); + incremental::shake256_squeeze_first_block(&mut state.state0, &mut out0); let mut out1 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_first_block(&mut x.state1, &mut out1); + incremental::shake256_squeeze_first_block(&mut state.state1, &mut out1); let mut out2 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_first_block(&mut x.state2, &mut out2); + incremental::shake256_squeeze_first_block(&mut state.state2, &mut out2); let mut out3 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_first_block(&mut x.state3, &mut out3); + incremental::shake256_squeeze_first_block(&mut state.state3, &mut out3); (out0, out1, out2, out3) } fn squeeze_next_block_x4( - x: &mut Shake256X4, + state: &mut Shake256X4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -290,13 +290,13 @@ pub(crate) mod portable { [u8; shake256::BLOCK_SIZE], ) { let mut out0 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_next_block(&mut x.state0, &mut out0); + incremental::shake256_squeeze_next_block(&mut state.state0, &mut out0); let mut out1 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_next_block(&mut x.state1, &mut out1); + incremental::shake256_squeeze_next_block(&mut state.state1, &mut out1); let mut out2 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_next_block(&mut x.state2, &mut out2); + incremental::shake256_squeeze_next_block(&mut state.state2, &mut out2); let mut out3 = [0u8; shake256::BLOCK_SIZE]; - incremental::shake256_squeeze_next_block(&mut x.state3, &mut out3); + incremental::shake256_squeeze_next_block(&mut state.state3, &mut out3); (out0, out1, out2, out3) } @@ -399,17 +399,23 @@ pub(crate) mod simd256 { } fn squeeze_first_five_blocks( - x: &mut Shake128x4, + state: &mut Shake128x4, out0: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out1: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out2: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out3: &mut [u8; shake128::FIVE_BLOCKS_SIZE], ) { - x4::incremental::shake128_squeeze_first_five_blocks(&mut x.state, out0, out1, out2, out3); + x4::incremental::shake128_squeeze_first_five_blocks( + &mut state.state, + out0, + out1, + out2, + out3, + ); } fn squeeze_next_block( - x: &mut Shake128x4, + state: &mut Shake128x4, ) -> ( [u8; shake128::BLOCK_SIZE], [u8; shake128::BLOCK_SIZE], @@ -421,7 +427,7 @@ pub(crate) mod simd256 { let mut out2 = [0u8; shake128::BLOCK_SIZE]; let mut out3 = [0u8; shake128::BLOCK_SIZE]; x4::incremental::shake128_squeeze_next_block( - &mut x.state, + &mut state.state, &mut out0, &mut out1, &mut out2, @@ -514,7 +520,7 @@ pub(crate) mod simd256 { } fn squeeze_first_block_x4( - x: &mut Shake256x4, + state: &mut Shake256x4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -526,7 +532,7 @@ pub(crate) mod simd256 { let mut out2 = [0u8; shake256::BLOCK_SIZE]; let mut out3 = [0u8; shake256::BLOCK_SIZE]; x4::incremental::shake256_squeeze_first_block( - &mut x.state, + &mut state.state, &mut out0, &mut out1, &mut out2, @@ -537,7 +543,7 @@ pub(crate) mod simd256 { } fn squeeze_next_block_x4( - x: &mut Shake256x4, + state: &mut Shake256x4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -549,7 +555,7 @@ pub(crate) mod simd256 { let mut out2 = [0u8; shake256::BLOCK_SIZE]; let mut out3 = [0u8; shake256::BLOCK_SIZE]; x4::incremental::shake256_squeeze_next_block( - &mut x.state, + &mut state.state, &mut out0, &mut out1, &mut out2, @@ -641,18 +647,18 @@ pub(crate) mod neon { } fn squeeze_first_five_blocks( - x: &mut Shake128x4, + state: &mut Shake128x4, out0: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out1: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out2: &mut [u8; shake128::FIVE_BLOCKS_SIZE], out3: &mut [u8; shake128::FIVE_BLOCKS_SIZE], ) { - x2::incremental::shake128_squeeze_first_five_blocks(&mut x.state[0], out0, out1); - x2::incremental::shake128_squeeze_first_five_blocks(&mut x.state[1], out2, out3); + x2::incremental::shake128_squeeze_first_five_blocks(&mut state.state[0], out0, out1); + x2::incremental::shake128_squeeze_first_five_blocks(&mut state.state[1], out2, out3); } fn squeeze_next_block( - x: &mut Shake128x4, + state: &mut Shake128x4, ) -> ( [u8; shake128::BLOCK_SIZE], [u8; shake128::BLOCK_SIZE], @@ -663,8 +669,8 @@ pub(crate) mod neon { let mut out1 = [0u8; shake128::BLOCK_SIZE]; let mut out2 = [0u8; shake128::BLOCK_SIZE]; let mut out3 = [0u8; shake128::BLOCK_SIZE]; - x2::incremental::shake128_squeeze_next_block(&mut x.state[0], &mut out0, &mut out1); - x2::incremental::shake128_squeeze_next_block(&mut x.state[1], &mut out2, &mut out3); + x2::incremental::shake128_squeeze_next_block(&mut state.state[0], &mut out0, &mut out1); + x2::incremental::shake128_squeeze_next_block(&mut state.state[1], &mut out2, &mut out3); (out0, out1, out2, out3) } @@ -711,7 +717,7 @@ pub(crate) mod neon { } fn squeeze_first_block_x4( - x: &mut Shake256x4, + state: &mut Shake256x4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -722,14 +728,14 @@ pub(crate) mod neon { let mut out1 = [0u8; shake256::BLOCK_SIZE]; let mut out2 = [0u8; shake256::BLOCK_SIZE]; let mut out3 = [0u8; shake256::BLOCK_SIZE]; - x2::incremental::shake256_squeeze_first_block(&mut x.state[0], &mut out0, &mut out1); - x2::incremental::shake256_squeeze_first_block(&mut x.state[1], &mut out2, &mut out3); + x2::incremental::shake256_squeeze_first_block(&mut state.state[0], &mut out0, &mut out1); + x2::incremental::shake256_squeeze_first_block(&mut state.state[1], &mut out2, &mut out3); (out0, out1, out2, out3) } fn squeeze_next_block_x4( - x: &mut Shake256x4, + state: &mut Shake256x4, ) -> ( [u8; shake256::BLOCK_SIZE], [u8; shake256::BLOCK_SIZE], @@ -740,8 +746,8 @@ pub(crate) mod neon { let mut out1 = [0u8; shake256::BLOCK_SIZE]; let mut out2 = [0u8; shake256::BLOCK_SIZE]; let mut out3 = [0u8; shake256::BLOCK_SIZE]; - x2::incremental::shake256_squeeze_next_block(&mut x.state[0], &mut out0, &mut out1); - x2::incremental::shake256_squeeze_next_block(&mut x.state[1], &mut out2, &mut out3); + x2::incremental::shake256_squeeze_next_block(&mut state.state[0], &mut out0, &mut out1); + x2::incremental::shake256_squeeze_next_block(&mut state.state[1], &mut out2, &mut out3); (out0, out1, out2, out3) } diff --git a/libcrux-ml-kem/c/CMakeLists.txt b/libcrux-ml-kem/c/CMakeLists.txt index 121558310..7eb5cd5ca 100644 --- a/libcrux-ml-kem/c/CMakeLists.txt +++ b/libcrux-ml-kem/c/CMakeLists.txt @@ -17,6 +17,7 @@ if(NOT MSVC) # TODO: Clean up add_compile_options( -Wall + # -Wextra # -pedantic # -Wconversion @@ -29,6 +30,7 @@ if(NOT MSVC) endif(NOT MSVC) set(CMAKE_COLOR_DIAGNOSTICS "ON") + # For LSP-based editors set(CMAKE_EXPORT_COMPILE_COMMANDS 1) include_directories( @@ -101,12 +103,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|arm64v8" AND DEFINED ENV{LIBCRU endif() # --- Tests - if(DEFINED ENV{LIBCRUX_UNPACKED}) add_compile_definitions(LIBCRUX_UNPACKED) endif(DEFINED ENV{LIBCRUX_UNPACKED}) - # Get gtests include(FetchContent) FetchContent_Declare(googletest @@ -144,52 +144,54 @@ target_link_libraries(sha3_test PRIVATE ) # --- Benchmarks -FetchContent_Declare(benchmark - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.8.4 -) -FetchContent_MakeAvailable(benchmark) +if(DEFINED ENV{LIBCRUX_BENCHMARKS}) + FetchContent_Declare(benchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.8.4 + ) + FetchContent_MakeAvailable(benchmark) -add_executable(ml_kem_bench - ${PROJECT_SOURCE_DIR}/benches/mlkem768.cc -) -target_link_libraries(ml_kem_bench PRIVATE - ml_kem_static - benchmark::benchmark -) + add_executable(ml_kem_bench + ${PROJECT_SOURCE_DIR}/benches/mlkem768.cc + ) + target_link_libraries(ml_kem_bench PRIVATE + ml_kem_static + benchmark::benchmark + ) -if(DEFINED ENV{SYMCRYPT_PATH}) - message("Symcrypt path: $ENV{SYMCRYPT_PATH}") - add_compile_definitions(LIBCRUX_SYMCRYPT) - target_include_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}) - target_link_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}/bin/lib) - target_link_libraries(ml_kem_bench PRIVATE symcrypt) -endif(DEFINED ENV{SYMCRYPT_PATH}) + if(DEFINED ENV{SYMCRYPT_PATH}) + message("Symcrypt path: $ENV{SYMCRYPT_PATH}") + add_compile_definitions(LIBCRUX_SYMCRYPT) + target_include_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}) + target_link_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}/bin/lib) + target_link_libraries(ml_kem_bench PRIVATE symcrypt) + endif(DEFINED ENV{SYMCRYPT_PATH}) -add_executable(ml_kem_keygen - ${PROJECT_SOURCE_DIR}/benches/mlkem768_keygen.cc -) -target_link_libraries(ml_kem_keygen PRIVATE - ml_kem_static - benchmark::benchmark -) - -add_executable(ml_kem_encaps - ${PROJECT_SOURCE_DIR}/benches/mlkem768_encaps.cc -) -target_link_libraries(ml_kem_encaps PRIVATE - ml_kem_static - benchmark::benchmark -) + add_executable(ml_kem_keygen + ${PROJECT_SOURCE_DIR}/benches/mlkem768_keygen.cc + ) + target_link_libraries(ml_kem_keygen PRIVATE + ml_kem_static + benchmark::benchmark + ) -if(NOT MSVC) - # We benchmark internal functions here that are inlined and thus not available - # in MSVC. - add_executable(sha3_bench - ${PROJECT_SOURCE_DIR}/benches/sha3.cc + add_executable(ml_kem_encaps + ${PROJECT_SOURCE_DIR}/benches/mlkem768_encaps.cc ) - target_link_libraries(sha3_bench PRIVATE + target_link_libraries(ml_kem_encaps PRIVATE ml_kem_static benchmark::benchmark ) -endif(NOT MSVC) + + if(NOT MSVC) + # We benchmark internal functions here that are inlined and thus not available + # in MSVC. + add_executable(sha3_bench + ${PROJECT_SOURCE_DIR}/benches/sha3.cc + ) + target_link_libraries(sha3_bench PRIVATE + ml_kem_static + benchmark::benchmark + ) + endif(NOT MSVC) +endif(DEFINED ENV{LIBCRUX_BENCHMARKS}) diff --git a/libcrux-ml-kem/cg/CMakeLists.txt b/libcrux-ml-kem/cg/CMakeLists.txt index ce8ed53c2..e18520d55 100644 --- a/libcrux-ml-kem/cg/CMakeLists.txt +++ b/libcrux-ml-kem/cg/CMakeLists.txt @@ -26,10 +26,10 @@ if(NOT MSVC) endif(NOT MSVC) if((CMAKE_C_COMPILER_ID STREQUAL "Clang" AND - CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0.0") OR - (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND - CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.1.6")) - add_compile_options(-Werror -Wframe-larger-than=25344) + CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0.0") OR + (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND + CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL "13.1.6")) + add_compile_options(-Werror -Wframe-larger-than=25344) endif() set(CMAKE_COLOR_DIAGNOSTICS "ON") @@ -95,48 +95,50 @@ target_link_libraries(sha3_test PRIVATE ) # --- Benchmarks -FetchContent_Declare(benchmark - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.8.4 -) -FetchContent_MakeAvailable(benchmark) - -add_executable(ml_kem_bench - ${PROJECT_SOURCE_DIR}/benches/mlkem768.cc -) -target_link_libraries(ml_kem_bench PRIVATE - benchmark::benchmark -) +if(DEFINED ENV{LIBCRUX_BENCHMARKS}) + FetchContent_Declare(benchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.8.4 + ) + FetchContent_MakeAvailable(benchmark) -if(DEFINED ENV{SYMCRYPT_PATH}) - message("Symcrypt path: $ENV{SYMCRYPT_PATH}") - add_compile_definitions(LIBCRUX_SYMCRYPT) - target_include_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}) - target_link_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}/bin/lib) - target_link_libraries(ml_kem_bench PRIVATE symcrypt) -endif(DEFINED ENV{SYMCRYPT_PATH}) + add_executable(ml_kem_bench + ${PROJECT_SOURCE_DIR}/benches/mlkem768.cc + ) + target_link_libraries(ml_kem_bench PRIVATE + benchmark::benchmark + ) -add_executable(ml_kem_keygen - ${PROJECT_SOURCE_DIR}/benches/mlkem768_keygen.cc -) -target_link_libraries(ml_kem_keygen PRIVATE - benchmark::benchmark -) + if(DEFINED ENV{SYMCRYPT_PATH}) + message("Symcrypt path: $ENV{SYMCRYPT_PATH}") + add_compile_definitions(LIBCRUX_SYMCRYPT) + target_include_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}) + target_link_directories(ml_kem_bench PRIVATE $ENV{SYMCRYPT_PATH}/bin/lib) + target_link_libraries(ml_kem_bench PRIVATE symcrypt) + endif(DEFINED ENV{SYMCRYPT_PATH}) -add_executable(ml_kem_encaps - ${PROJECT_SOURCE_DIR}/benches/mlkem768_encaps.cc -) -target_link_libraries(ml_kem_encaps PRIVATE - benchmark::benchmark -) + add_executable(ml_kem_keygen + ${PROJECT_SOURCE_DIR}/benches/mlkem768_keygen.cc + ) + target_link_libraries(ml_kem_keygen PRIVATE + benchmark::benchmark + ) -if(NOT MSVC) - # We benchmark internal functions here that are inlined and thus not available - # in MSVC. - add_executable(sha3_bench - ${PROJECT_SOURCE_DIR}/benches/sha3.cc + add_executable(ml_kem_encaps + ${PROJECT_SOURCE_DIR}/benches/mlkem768_encaps.cc ) - target_link_libraries(sha3_bench PRIVATE + target_link_libraries(ml_kem_encaps PRIVATE benchmark::benchmark ) -endif(NOT MSVC) + + if(NOT MSVC) + # We benchmark internal functions here that are inlined and thus not available + # in MSVC. + add_executable(sha3_bench + ${PROJECT_SOURCE_DIR}/benches/sha3.cc + ) + target_link_libraries(sha3_bench PRIVATE + benchmark::benchmark + ) + endif(NOT MSVC) +endif(DEFINED ENV{LIBCRUX_BENCHMARKS}) diff --git a/libcrux-ml-kem/cg/eurydice_glue.h b/libcrux-ml-kem/cg/eurydice_glue.h index cdd27af77..3f9b35cc2 100644 --- a/libcrux-ml-kem/cg/eurydice_glue.h +++ b/libcrux-ml-kem/cg/eurydice_glue.h @@ -17,6 +17,7 @@ extern "C" { #include #include +#include "karamel/endianness.h" #include "karamel/target.h" // SLICES, ARRAYS, ETC. @@ -88,7 +89,7 @@ typedef struct { #define Eurydice_slice_copy(dst, src, t) \ memcpy(dst.ptr, src.ptr, dst.len * sizeof(t)) #define core_array___Array_T__N__23__as_slice(len_, ptr_, t, _ret_t) \ - ((Eurydice_slice){.ptr = ptr_, .len = len_}) + (CLITERAL(Eurydice_slice){.ptr = ptr_, .len = len_}) #define core_array___core__clone__Clone_for__Array_T__N___20__clone( \ len, src, dst, elem_type, _ret_t) \ @@ -130,18 +131,14 @@ static inline void Eurydice_slice_to_array3(uint8_t *dst_tag, char *dst_ok, // CORE STUFF (conversions, endianness, ...) static inline void core_num__u64_9__to_le_bytes(uint64_t v, uint8_t buf[8]) { - memcpy(buf, &v, sizeof(v)); + store64_le(buf, v); } static inline uint64_t core_num__u64_9__from_le_bytes(uint8_t buf[8]) { - uint64_t v; - memcpy(&v, buf, sizeof(v)); - return v; + return load64_le(buf); } static inline uint32_t core_num__u32_8__from_le_bytes(uint8_t buf[4]) { - uint32_t v; - memcpy(&v, buf, sizeof(v)); - return v; + return load32_le(buf); } static inline uint32_t core_num__u8_6__count_ones(uint8_t x0) { diff --git a/libcrux-ml-kem/cg/karamel/endianness.h b/libcrux-ml-kem/cg/karamel/endianness.h new file mode 100644 index 000000000..d59d9854d --- /dev/null +++ b/libcrux-ml-kem/cg/karamel/endianness.h @@ -0,0 +1,228 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 and MIT Licenses. */ + +#ifndef __LOWSTAR_ENDIANNESS_H +#define __LOWSTAR_ENDIANNESS_H + +#include +#include + +/******************************************************************************/ +/* Implementing C.fst (part 2: endian-ness macros) */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) || \ + defined(__USE_SYSTEM_ENDIAN_H__) || defined(__GLIBC__) +#include + +/* ... for OSX */ +#elif defined(__APPLE__) +#include +#define htole64(x) OSSwapHostToLittleInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) + +#define htole16(x) OSSwapHostToLittleInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe16(x) OSSwapHostToBigInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) + +#define htole32(x) OSSwapHostToLittleInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +#include +#define htole64(x) LE_64(x) +#define le64toh(x) LE_64(x) +#define htobe64(x) BE_64(x) +#define be64toh(x) BE_64(x) + +#define htole16(x) LE_16(x) +#define le16toh(x) LE_16(x) +#define htobe16(x) BE_16(x) +#define be16toh(x) BE_16(x) + +#define htole32(x) LE_32(x) +#define le32toh(x) LE_32(x) +#define htobe32(x) BE_32(x) +#define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +#include +#elif defined(__OpenBSD__) +#include + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +#include +#define htobe16(x) _byteswap_ushort(x) +#define htole16(x) (x) +#define be16toh(x) _byteswap_ushort(x) +#define le16toh(x) (x) + +#define htobe32(x) _byteswap_ulong(x) +#define htole32(x) (x) +#define be32toh(x) _byteswap_ulong(x) +#define le32toh(x) (x) + +#define htobe64(x) _byteswap_uint64(x) +#define htole64(x) (x) +#define be64toh(x) _byteswap_uint64(x) +#define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64) || defined(__EMSCRIPTEN__)) && \ + (defined(__GNUC__) || defined(__clang__)) + +#define htobe16(x) __builtin_bswap16(x) +#define htole16(x) (x) +#define be16toh(x) __builtin_bswap16(x) +#define le16toh(x) (x) + +#define htobe32(x) __builtin_bswap32(x) +#define htole32(x) (x) +#define be32toh(x) __builtin_bswap32(x) +#define le32toh(x) (x) + +#define htobe64(x) __builtin_bswap64(x) +#define htole64(x) (x) +#define be64toh(x) __builtin_bswap64(x) +#define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +/* ... AIX doesn't have __BYTE_ORDER__ (with XLC compiler) & is always + * big-endian */ +#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || \ + defined(_AIX) + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +#define htobe32(x) (x) +#define be32toh(x) (x) +#define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define le32toh(x) (htole32((x))) + +#define htobe64(x) (x) +#define be64toh(x) (x) +#define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +#error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t load16(uint8_t *b) { + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t load32(uint8_t *b) { + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t load64(uint8_t *b) { + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void store16(uint8_t *b, uint16_t i) { memcpy(b, &i, 2); } + +inline static void store32(uint8_t *b, uint32_t i) { memcpy(b, &i, 4); } + +inline static void store64(uint8_t *b, uint64_t i) { memcpy(b, &i, 8); } + +/* Legacy accessors so that this header can serve as an implementation of + * C.Endianness */ +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/* Co-existence of LowStar.Endianness and FStar.Endianness generates name + * conflicts, because of course both insist on having no prefixes. Until a + * prefix is added, or until we truly retire FStar.Endianness, solve this issue + * in an elegant way. */ +#define load16_le0 load16_le +#define store16_le0 store16_le +#define load16_be0 load16_be +#define store16_be0 store16_be + +#define load32_le0 load32_le +#define store32_le0 store32_le +#define load32_be0 load32_be +#define store32_be0 store32_be + +#define load64_le0 load64_le +#define store64_le0 store64_le +#define load64_be0 load64_be +#define store64_be0 store64_be + +#define load128_le0 load128_le +#define store128_le0 store128_le +#define load128_be0 load128_be +#define store128_be0 store128_be + +#endif