diff --git a/curve25519-dalek/src/backend/vector/neon/field.rs b/curve25519-dalek/src/backend/vector/neon/field.rs index 29a74342..dddd24c9 100644 --- a/curve25519-dalek/src/backend/vector/neon/field.rs +++ b/curve25519-dalek/src/backend/vector/neon/field.rs @@ -66,7 +66,7 @@ use core::arch::aarch64::vget_low_u32; macro_rules! shuffle { ($vec:expr , $index:expr) => { unsafe { - let v_n: [u32;8] = [ + let v_n: [u32; 8] = [ $vec.extract::<0>(), $vec.extract::<1>(), $vec.extract::<2>(), @@ -74,13 +74,21 @@ macro_rules! shuffle { $vec.extract::<4>(), $vec.extract::<5>(), $vec.extract::<6>(), - $vec.extract::<7>() - ]; + $vec.extract::<7>(), + ]; u32x4x2::new( - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]), - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[4]], v_n[$index[5]], v_n[$index[6]], v_n[$index[7]]]) + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[0]], + v_n[$index[1]], + v_n[$index[2]], + v_n[$index[3]], + ]), + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[4]], + v_n[$index[5]], + v_n[$index[6]], + v_n[$index[7]], + ]), ) } }; @@ -90,7 +98,7 @@ macro_rules! shuffle { macro_rules! blend { ($vec0: expr, $vec1: expr, $index:expr) => { unsafe { - let v_n: [u32;8] = [ + let v_n: [u32; 8] = [ $vec0.extract::<0>(), $vec0.extract::<1>(), $vec0.extract::<2>(), @@ -98,10 +106,14 @@ macro_rules! blend { $vec1.extract::<0>(), $vec1.extract::<1>(), $vec1.extract::<2>(), - $vec1.extract::<3>() - ]; - core::mem::transmute::<[u32; 4], u32x4>( - [v_n[$index[0]], v_n[$index[1]], v_n[$index[2]], v_n[$index[3]]]) + $vec1.extract::<3>(), + ]; + core::mem::transmute::<[u32; 4], u32x4>([ + v_n[$index[0]], + v_n[$index[1]], + v_n[$index[2]], + v_n[$index[3]], + ]) } }; } @@ -118,10 +130,10 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) { let b0: u32x2; let b1: u32x2; unsafe { - a0 = vget_low_u32(src.0.0).into(); - a1 = vget_low_u32(src.0.1).into(); - b0 = vget_high_u32(src.0.0).into(); - b1 = vget_high_u32(src.0.1).into(); + a0 = vget_low_u32(src.0 .0).into(); + a1 = vget_low_u32(src.0 .1).into(); + b0 = vget_high_u32(src.0 .0).into(); + b1 = vget_high_u32(src.0 .1).into(); } return (u32x2x2::new(a0, a1), u32x2x2::new(b0, b1)); } @@ -193,7 +205,7 @@ impl ConditionallySelectable for FieldElement2625x4 { a.0[1] ^ (mask_vec & (a.0[1] ^ b.0[1])), a.0[2] ^ (mask_vec & (a.0[2] ^ b.0[2])), a.0[3] ^ (mask_vec & (a.0[3] ^ b.0[3])), - a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])) + a.0[4] ^ (mask_vec & (a.0[4] ^ b.0[4])), ]) } @@ -266,7 +278,6 @@ impl FieldElement2625x4 { self.shuffle(Shuffle::BACD) } - // Can probably be sped up using multiple vset/vget instead of table #[inline] pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 { @@ -326,7 +337,7 @@ impl FieldElement2625x4 { buf[i] = u32x4x2::new( u32x4::new(a_2i, b_2i, a_2i_1, b_2i_1), - u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1) + u32x4::new(c_2i, d_2i, c_2i_1, d_2i_1), ); } return FieldElement2625x4(buf).reduce(); @@ -368,20 +379,12 @@ impl FieldElement2625x4 { use core::arch::aarch64::vqshlq_u32; let c: u32x4x2 = u32x4x2::new( - vqshlq_u32(v.0.0, shifts.0.into()).into(), - vqshlq_u32(v.0.1, shifts.1.into()).into(), + vqshlq_u32(v.0 .0, shifts.0.into()).into(), + vqshlq_u32(v.0 .1, shifts.1.into()).into(), ); u32x4x2::new( - vcombine_u32( - vget_high_u32(c.0.0), - vget_low_u32(c.0.0), - ) - .into(), - vcombine_u32( - vget_high_u32(c.0.1), - vget_low_u32(c.0.1), - ) - .into(), + vcombine_u32(vget_high_u32(c.0 .0), vget_low_u32(c.0 .0)).into(), + vcombine_u32(vget_high_u32(c.0 .1), vget_low_u32(c.0 .1)).into(), ) } }; @@ -390,16 +393,8 @@ impl FieldElement2625x4 { unsafe { use core::arch::aarch64::vcombine_u32; u32x4x2::new( - vcombine_u32( - vget_low_u32(v_lo.0.0), - vget_high_u32(v_hi.0.0), - ) - .into(), - vcombine_u32( - vget_low_u32(v_lo.0.1), - vget_high_u32(v_hi.0.1), - ) - .into(), + vcombine_u32(vget_low_u32(v_lo.0 .0), vget_high_u32(v_hi.0 .0)).into(), + vcombine_u32(vget_low_u32(v_lo.0 .1), vget_high_u32(v_hi.0 .1)).into(), ) } }; @@ -874,5 +869,3 @@ mod test { assert_eq!(x3, splits[3]); } } - - diff --git a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs index a8f5b40f..51aa7b67 100644 --- a/curve25519-dalek/src/backend/vector/neon/packed_simd.rs +++ b/curve25519-dalek/src/backend/vector/neon/packed_simd.rs @@ -55,7 +55,6 @@ macro_rules! impl_shared { impl Eq for $ty {} - impl Add for $ty { type Output = Self; @@ -71,7 +70,7 @@ macro_rules! impl_shared { *self = *self + rhs } } - + impl Sub for $ty { type Output = Self; @@ -128,23 +127,22 @@ macro_rules! impl_shared { pub fn shr(self) -> Self { unsafe { core::arch::aarch64::$shr_intrinsic(self.0, N).into() } } - } - } + }; } impl_shared!( - u32x4, - u32, - uint32x4_t, - vceqq_u32, - vaddq_u32, - vsubq_u32, - vandq_u32, - veorq_u32, - vshlq_n_u32, - vshrq_n_u32, - vgetq_lane_u32 + u32x4, + u32, + uint32x4_t, + vceqq_u32, + vaddq_u32, + vsubq_u32, + vandq_u32, + veorq_u32, + vshlq_n_u32, + vshrq_n_u32, + vgetq_lane_u32 ); impl u32x4 { @@ -171,7 +169,7 @@ impl u32x4 { impl From for core::arch::aarch64::uint32x4_t { #[inline] - fn from(value: u64x2) -> core::arch::aarch64::uint32x4_t { + fn from(value: u64x2) -> core::arch::aarch64::uint32x4_t { unsafe { core::arch::aarch64::vreinterpretq_u32_u64(value.into()) } } } @@ -184,7 +182,7 @@ impl From for u32x4 { } impl From for u32x4 { - #[inline] + #[inline] fn from(value: u64x2) -> u32x4 { Into::::into(value).into() } @@ -217,17 +215,17 @@ impl u32x2 { } impl_shared!( - u64x2, - u64, - uint64x2_t, - vceqq_u64, - vaddq_u64, - vsubq_u64, - vandq_u64, - veorq_u64, - vshlq_n_u64, - vshrq_n_u64, - vgetq_lane_u64 + u64x2, + u64, + uint64x2_t, + vceqq_u64, + vaddq_u64, + vsubq_u64, + vandq_u64, + veorq_u64, + vshlq_n_u64, + vshrq_n_u64, + vgetq_lane_u64 ); impl u64x2 { @@ -256,15 +254,15 @@ pub struct u32x4x2(pub core::arch::aarch64::uint32x4x2_t); impl PartialEq for u32x4x2 { fn eq(&self, other: &Self) -> bool { - u32x4::from(self.0.0) == u32x4::from(other.0.0) - && u32x4::from(self.0.1) == u32x4::from(other.0.1) + u32x4::from(self.0 .0) == u32x4::from(other.0 .0) + && u32x4::from(self.0 .1) == u32x4::from(other.0 .1) } } impl From for core::arch::aarch64::uint32x4x2_t { #[inline] fn from(value: u32x4x2) -> core::arch::aarch64::uint32x4x2_t { - value.0 + value.0 } } @@ -280,8 +278,9 @@ impl BitXor for u32x4x2 { fn bitxor(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) ^ u32x4::from(rhs.0.0), - u32x4::from(self.0.1) ^ u32x4::from(rhs.0.1)) + u32x4::from(self.0 .0) ^ u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) ^ u32x4::from(rhs.0 .1), + ) } } @@ -296,9 +295,9 @@ impl BitAnd for u32x4x2 { fn bitand(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) & u32x4::from(rhs.0.0), - u32x4::from(self.0.1) & u32x4::from(rhs.0.1)) - + u32x4::from(self.0 .0) & u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) & u32x4::from(rhs.0 .1), + ) } } @@ -306,9 +305,10 @@ impl Add for u32x4x2 { type Output = Self; fn add(self, rhs: Self) -> Self::Output { - Self::new( - u32x4::from(self.0.0) + u32x4::from(rhs.0.0), - u32x4::from(self.0.1) + u32x4::from(rhs.0.1)) + Self::new( + u32x4::from(self.0 .0) + u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) + u32x4::from(rhs.0 .1), + ) } } @@ -317,8 +317,9 @@ impl Sub for u32x4x2 { fn sub(self, rhs: Self) -> Self::Output { Self::new( - u32x4::from(self.0.0) - u32x4::from(rhs.0.0), - u32x4::from(self.0.1) - u32x4::from(rhs.0.1)) + u32x4::from(self.0 .0) - u32x4::from(rhs.0 .0), + u32x4::from(self.0 .1) - u32x4::from(rhs.0 .1), + ) } } @@ -330,21 +331,24 @@ impl u32x4x2 { #[inline] pub const fn splat(x: u32) -> Self { - Self(core::arch::aarch64::uint32x4x2_t(u32x4::const_splat(x).0, u32x4::const_splat(x).0)) + Self(core::arch::aarch64::uint32x4x2_t( + u32x4::const_splat(x).0, + u32x4::const_splat(x).0, + )) } #[inline] pub fn extract(self) -> u32 { match N { - 0 => u32x4::from(self.0.0).extract::<0>(), - 1 => u32x4::from(self.0.0).extract::<1>(), - 2 => u32x4::from(self.0.0).extract::<2>(), - 3 => u32x4::from(self.0.0).extract::<3>(), - 4 => u32x4::from(self.0.1).extract::<0>(), - 5 => u32x4::from(self.0.1).extract::<1>(), - 6 => u32x4::from(self.0.1).extract::<2>(), - 7 => u32x4::from(self.0.1).extract::<3>(), - _ => unreachable!() + 0 => u32x4::from(self.0 .0).extract::<0>(), + 1 => u32x4::from(self.0 .0).extract::<1>(), + 2 => u32x4::from(self.0 .0).extract::<2>(), + 3 => u32x4::from(self.0 .0).extract::<3>(), + 4 => u32x4::from(self.0 .1).extract::<0>(), + 5 => u32x4::from(self.0 .1).extract::<1>(), + 6 => u32x4::from(self.0 .1).extract::<2>(), + 7 => u32x4::from(self.0 .1).extract::<3>(), + _ => unreachable!(), } } } @@ -356,8 +360,8 @@ pub struct u32x2x2(pub core::arch::aarch64::uint32x2x2_t); impl PartialEq for u32x2x2 { fn eq(&self, other: &Self) -> bool { - u32x2::from(self.0.0) == u32x2::from(other.0.0) - && u32x2::from(self.0.1) == u32x2::from(other.0.1) + u32x2::from(self.0 .0) == u32x2::from(other.0 .0) + && u32x2::from(self.0 .1) == u32x2::from(other.0 .1) } } @@ -366,8 +370,8 @@ impl Add for u32x2x2 { fn add(self, rhs: Self) -> Self::Output { Self::new( - u32x2::from(self.0.0) + u32x2::from(rhs.0.0), - u32x2::from(self.0.1) + u32x2::from(rhs.0.1) + u32x2::from(self.0 .0) + u32x2::from(rhs.0 .0), + u32x2::from(self.0 .1) + u32x2::from(rhs.0 .1), ) } } @@ -381,24 +385,23 @@ impl u32x2x2 { #[inline] pub fn shl(self) -> Self { Self(core::arch::aarch64::uint32x2x2_t( - u32x2::from(self.0.0).shl::().0, - u32x2::from(self.0.1).shl::().0 + u32x2::from(self.0 .0).shl::().0, + u32x2::from(self.0 .1).shl::().0, )) } #[inline] pub fn extract(self) -> u32 { match N { - 0 => u32x2::from(self.0.0).extract::<0>(), - 1 => u32x2::from(self.0.0).extract::<1>(), - 2 => u32x2::from(self.0.1).extract::<0>(), - 3 => u32x2::from(self.0.1).extract::<1>(), - _ => unreachable!() + 0 => u32x2::from(self.0 .0).extract::<0>(), + 1 => u32x2::from(self.0 .0).extract::<1>(), + 2 => u32x2::from(self.0 .1).extract::<0>(), + 3 => u32x2::from(self.0 .1).extract::<1>(), + _ => unreachable!(), } } } - #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] #[repr(transparent)] @@ -436,8 +439,9 @@ impl Add for u64x2x2 { #[inline] fn add(self, rhs: Self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - (u64x2::from(self.0.0) + u64x2::from(rhs.0.0)).into(), - (u64x2::from(self.0.1) + u64x2::from(rhs.0.1)).into())) + (u64x2::from(self.0 .0) + u64x2::from(rhs.0 .0)).into(), + (u64x2::from(self.0 .1) + u64x2::from(rhs.0 .1)).into(), + )) } } @@ -446,8 +450,9 @@ impl BitAnd for u64x2x2 { fn bitand(self, rhs: Self) -> Self::Output { Self(core::arch::aarch64::uint64x2x2_t( - (u64x2::from(self.0.0) & u64x2::from(rhs.0.0)).into(), - (u64x2::from(self.0.1) & u64x2::from(rhs.0.1)).into())) + (u64x2::from(self.0 .0) & u64x2::from(rhs.0 .0)).into(), + (u64x2::from(self.0 .1) & u64x2::from(rhs.0 .1)).into(), + )) } } @@ -465,27 +470,27 @@ impl u64x2x2 { #[inline] pub fn extract(self) -> u64 { match N { - 0 => u64x2::from(self.0.0).extract::<0>(), - 1 => u64x2::from(self.0.0).extract::<1>(), - 2 => u64x2::from(self.0.1).extract::<0>(), - 3 => u64x2::from(self.0.1).extract::<1>(), - _ => unreachable!() - } + 0 => u64x2::from(self.0 .0).extract::<0>(), + 1 => u64x2::from(self.0 .0).extract::<1>(), + 2 => u64x2::from(self.0 .1).extract::<0>(), + 3 => u64x2::from(self.0 .1).extract::<1>(), + _ => unreachable!(), + } } #[inline] pub fn shl(self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - u64x2::from(self.0.0).shl::().into(), - u64x2::from(self.0.1).shl::().into())) + u64x2::from(self.0 .0).shl::().into(), + u64x2::from(self.0 .1).shl::().into(), + )) } #[inline] pub fn shr(self) -> Self { Self(core::arch::aarch64::uint64x2x2_t( - u64x2::from(self.0.0).shr::().into(), - u64x2::from(self.0.1).shr::().into())) + u64x2::from(self.0 .0).shr::().into(), + u64x2::from(self.0 .1).shr::().into(), + )) } - } - diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs index f439ee78..5a643b8b 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/pippenger.rs @@ -10,9 +10,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs index eb7954e6..500510b3 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/precomputed_straus.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs index 9dafd6ba..40bf0d9f 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/straus.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs index d716e199..d12cf1e9 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/variable_base.rs @@ -1,9 +1,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs index bc6eeef5..83fcdcfe 100644 --- a/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs +++ b/curve25519-dalek/src/backend/vector/scalar_mul/vartime_double_base.rs @@ -12,9 +12,9 @@ #![allow(non_snake_case)] #[curve25519_dalek_derive::unsafe_target_feature_specialize( - conditional("avx2", target_arch="x86_64"), - conditional("avx512ifma,avx512vl", all(nightly, target_arch="x86_64")), - conditional("neon", all(nightly, target_arch="aarch64")) + conditional("avx2", target_arch = "x86_64"), + conditional("avx512ifma,avx512vl", all(nightly, target_arch = "x86_64")), + conditional("neon", all(nightly, target_arch = "aarch64")) )] pub mod spec { diff --git a/curve25519-dalek/src/lib.rs b/curve25519-dalek/src/lib.rs index 4ca7ef29..bded841f 100644 --- a/curve25519-dalek/src/lib.rs +++ b/curve25519-dalek/src/lib.rs @@ -22,7 +22,7 @@ all(curve25519_dalek_backend = "simd", nightly), feature(avx512_target_feature) )] -#![cfg_attr(all(nightly, target_arch="aarch64"), feature(portable_simd))] +#![cfg_attr(all(nightly, target_arch = "aarch64"), feature(portable_simd))] #![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg, doc_cfg_hide))] #![cfg_attr(docsrs, doc(cfg_hide(docsrs)))] //------------------------------------------------------------------------