diff --git a/ipa-core/src/protocol/hybrid/mod.rs b/ipa-core/src/protocol/hybrid/mod.rs index 482f6e939..a777b7448 100644 --- a/ipa-core/src/protocol/hybrid/mod.rs +++ b/ipa-core/src/protocol/hybrid/mod.rs @@ -1,5 +1,7 @@ pub(crate) mod step; +use step::HybridStep as Step; + use crate::{ error::Error, ff::{ @@ -9,12 +11,14 @@ use crate::{ helpers::query::DpMechanism, protocol::{ context::{ShardedContext, UpgradableContext}, - ipa_prf::{oprf_padding::PaddingParameters, shuffle::Shuffle}, + ipa_prf::{ + oprf_padding::{apply_dp_padding, PaddingParameters}, + shuffle::Shuffle, + }, }, report::hybrid::IndistinguishableHybridReport, secret_sharing::replicated::semi_honest::AdditiveShare as Replicated, }; - // In theory, we could support (runtime-configured breakdown count) ≤ (compile-time breakdown count) // ≤ 2^|bk|, with all three values distinct, but at present, there is no runtime configuration and // the latter two must be equal. The implementation of `move_single_value_to_bucket` does support a @@ -61,10 +65,10 @@ impl BreakdownKey<256> for BA8 {} /// # Panics /// Propagates errors from config issues or while running the protocol pub async fn hybrid_protocol<'ctx, C, BK, V, HV, const SS_BITS: usize, const B: usize>( - _ctx: C, + ctx: C, input_rows: Vec>, _dp_params: DpMechanism, - _dp_padding_params: PaddingParameters, + dp_padding_params: PaddingParameters, ) -> Result>, Error> where C: UpgradableContext + 'ctx + Shuffle + ShardedContext, @@ -75,5 +79,14 @@ where if input_rows.is_empty() { return Ok(vec![Replicated::ZERO; B]); } + + // Apply DP padding for OPRF + let _padded_input_rows = apply_dp_padding::<_, IndistinguishableHybridReport, B>( + ctx.narrow(&Step::PaddingDp), + input_rows, + &dp_padding_params, + ) + .await?; + unimplemented!("protocol::hybrid::hybrid_protocol is not fully implemented") } diff --git a/ipa-core/src/protocol/hybrid/step.rs b/ipa-core/src/protocol/hybrid/step.rs index 5de0051be..aed51ad1e 100644 --- a/ipa-core/src/protocol/hybrid/step.rs +++ b/ipa-core/src/protocol/hybrid/step.rs @@ -3,4 +3,6 @@ use ipa_step_derive::CompactStep; #[derive(CompactStep)] pub(crate) enum HybridStep { ReshardByTag, + #[step(child = crate::protocol::ipa_prf::oprf_padding::step::PaddingDpStep, name="padding_dp")] + PaddingDp, } diff --git a/ipa-core/src/protocol/ipa_prf/oprf_padding/mod.rs b/ipa-core/src/protocol/ipa_prf/oprf_padding/mod.rs index 207dd2a43..0d74ad6a7 100644 --- a/ipa-core/src/protocol/ipa_prf/oprf_padding/mod.rs +++ b/ipa-core/src/protocol/ipa_prf/oprf_padding/mod.rs @@ -2,6 +2,8 @@ pub(crate) mod distributions; pub mod insecure; pub mod step; +use std::iter::{repeat, repeat_with}; + #[cfg(any(test, feature = "test-fixture", feature = "cli"))] pub use insecure::DiscreteDp as InsecureDiscreteDp; use rand::Rng; @@ -28,6 +30,7 @@ use crate::{ }, RecordId, }, + report::hybrid::IndistinguishableHybridReport, secret_sharing::{ replicated::{semi_honest::AdditiveShare, ReplicatedSecretSharing}, SharedValue, @@ -130,6 +133,72 @@ pub trait Paddable { Self: Sized; } +impl Paddable for IndistinguishableHybridReport +where + BK: BooleanArray + U128Conversions, + V: BooleanArray, +{ + /// Given an extendable collection of `IndistinguishableHybridReport`s, + /// this function will pad the collection with dummy reports. The reports + /// have a random `match_key` and zeros for `breakdown_key` and `value`. + /// Dummies need to be added at every possible cardinality of `match_key`s, + /// e.g., we add sets of dummies with the same `match_key` at each possible cardinality. + /// The number of sets at each cardinality is random, and determined by `padding_params`. + fn add_padding_items, const B: usize>( + direction_to_excluded_helper: Direction, + padding_input_rows: &mut VC, + padding_params: &PaddingParameters, + rng: &mut InstrumentedSequentialSharedRandomness, + ) -> Result { + let mut total_number_of_fake_rows = 0; + match padding_params.oprf_padding { + OPRFPadding::NoOPRFPadding => {} + OPRFPadding::Parameters { + oprf_epsilon, + oprf_delta, + matchkey_cardinality_cap, + oprf_padding_sensitivity, + } => { + let oprf_padding = + OPRFPaddingDp::new(oprf_epsilon, oprf_delta, oprf_padding_sensitivity)?; + for cardinality in 1..=matchkey_cardinality_cap { + let sample = oprf_padding.sample(rng); + total_number_of_fake_rows += sample * cardinality; + + padding_input_rows.extend( + repeat_with(|| { + let dummy_mk: BA64 = rng.gen(); + repeat(IndistinguishableHybridReport::from( + AdditiveShare::new_excluding_direction( + dummy_mk, + direction_to_excluded_helper, + ), + )) + .take(cardinality as usize) + }) + // this means there will be `sample` many unique + // matchkeys to add each with cardinality = `cardinality` + .take(sample as usize) + .flatten(), + ); + } + } + } + Ok(total_number_of_fake_rows) + } + + /// Given an extendable collection of `IndistinguishableHybridReport`s, + /// this function ads `total_number_of_fake_rows` of Reports with zeros in all fields. + fn add_zero_shares>( + padding_input_rows: &mut VC, + total_number_of_fake_rows: u32, + ) { + padding_input_rows.extend( + repeat(IndistinguishableHybridReport::ZERO).take(total_number_of_fake_rows as usize), + ); + } +} + impl Paddable for OPRFIPAInputRow where BK: BooleanArray + U128Conversions, @@ -426,6 +495,7 @@ mod tests { }, RecordId, }, + report::hybrid::IndistinguishableHybridReport, secret_sharing::replicated::semi_honest::AdditiveShare, test_fixture::{Reconstruct, Runner, TestWorld}, }; @@ -451,6 +521,31 @@ mod tests { Ok(input) } + pub async fn set_up_apply_dp_padding_pass_for_indistinguishable_reports< + C, + BK, + V, + const B: usize, + >( + ctx: C, + padding_params: PaddingParameters, + ) -> Result>, Error> + where + C: Context, + BK: BooleanArray + U128Conversions, + V: BooleanArray, + { + let mut input: Vec> = Vec::new(); + input = apply_dp_padding_pass::, B>( + ctx, + input, + Role::H3, + &padding_params, + ) + .await?; + Ok(input) + } + #[tokio::test] pub async fn oprf_noise_in_dp_padding_pass() { type BK = BA8; @@ -525,6 +620,83 @@ mod tests { } } + #[tokio::test] + pub async fn indistinguishable_report_noise_in_dp_padding_pass() { + // Note: This is a close copy of the test `oprf_noise_in_dp_padding_pass` + // Which will make this easier to delete the former test + // when we remove the oprf protocol. + type BK = BA8; + type V = BA3; + const B: usize = 256; + let world = TestWorld::default(); + let oprf_epsilon = 1.0; + let oprf_delta = 1e-6; + let matchkey_cardinality_cap = 10; + let oprf_padding_sensitivity = 2; + + let result = world + .semi_honest((), |ctx, ()| async move { + let padding_params = PaddingParameters { + oprf_padding: OPRFPadding::Parameters { + oprf_epsilon, + oprf_delta, + matchkey_cardinality_cap, + oprf_padding_sensitivity, + }, + aggregation_padding: AggregationPadding::NoAggPadding, + }; + set_up_apply_dp_padding_pass_for_indistinguishable_reports::<_, BK, V, B>( + ctx, + padding_params, + ) + .await + }) + .await + .map(Result::unwrap); + // check that all three helpers added the same number of dummy shares + assert!(result[0].len() == result[1].len() && result[0].len() == result[2].len()); + + let result_reconstructed = result.reconstruct(); + // check that all fields besides the matchkey are zero and matchkey is not zero + let mut match_key_counts: HashMap = HashMap::new(); + for row in result_reconstructed { + assert!(row.value == 0); + assert!(row.breakdown_key == 0); // since we set AggregationPadding::NoAggPadding + assert!(row.match_key != 0); + + let count = match_key_counts.entry(row.match_key).or_insert(0); + *count += 1; + } + // Now look at now many times a match_key occured + let mut sample_per_cardinality: BTreeMap = BTreeMap::new(); + for cardinality in match_key_counts.values() { + let count = sample_per_cardinality.entry(*cardinality).or_insert(0); + *count += 1; + } + let mut distribution_of_samples: BTreeMap = BTreeMap::new(); + + for (cardinality, sample) in sample_per_cardinality { + println!("{sample} user IDs occurred {cardinality} time(s)"); + let count = distribution_of_samples.entry(sample).or_insert(0); + *count += 1; + } + + let oprf_padding = + OPRFPaddingDp::new(oprf_epsilon, oprf_delta, oprf_padding_sensitivity).unwrap(); + + let (mean, std_bound) = oprf_padding.mean_and_std_bound(); + let tolerance_bound = 12.0; + assert!(std_bound > 1.0); // bound on the std only holds if this is true. + println!("mean = {mean}, std_bound = {std_bound}"); + for (sample, count) in &distribution_of_samples { + println!("An OPRFPadding sample value equal to {sample} occurred {count} time(s)",); + assert!( + (f64::from(*sample) - mean).abs() < tolerance_bound * std_bound, + "aggregation noise sample was not within {tolerance_bound} times the standard deviation bound from what was expected." + ); + } + } + pub async fn set_up_apply_dp_padding_pass_for_agg( ctx: C, padding_params: PaddingParameters, diff --git a/ipa-core/src/report/hybrid.rs b/ipa-core/src/report/hybrid.rs index 62d66a797..95604e2fe 100644 --- a/ipa-core/src/report/hybrid.rs +++ b/ipa-core/src/report/hybrid.rs @@ -353,9 +353,35 @@ where BK: SharedValue, V: SharedValue, { - match_key: Replicated, - value: Replicated, - breakdown_key: Replicated, + pub match_key: Replicated, + pub value: Replicated, + pub breakdown_key: Replicated, +} + +impl IndistinguishableHybridReport +where + BK: SharedValue, + V: SharedValue, +{ + pub const ZERO: Self = Self { + match_key: Replicated::::ZERO, + value: Replicated::::ZERO, + breakdown_key: Replicated::::ZERO, + }; +} + +impl From> for IndistinguishableHybridReport +where + BK: SharedValue, + V: SharedValue, +{ + fn from(match_key: Replicated) -> Self { + Self { + match_key, + value: Replicated::::ZERO, + breakdown_key: Replicated::::ZERO, + } + } } impl From> for IndistinguishableHybridReport diff --git a/ipa-core/src/secret_sharing/replicated/mod.rs b/ipa-core/src/secret_sharing/replicated/mod.rs index dcf51494e..e2c7617e4 100644 --- a/ipa-core/src/secret_sharing/replicated/mod.rs +++ b/ipa-core/src/secret_sharing/replicated/mod.rs @@ -2,12 +2,20 @@ pub mod malicious; pub mod semi_honest; use super::{SecretSharing, SharedValue}; +use crate::helpers::Direction; pub trait ReplicatedSecretSharing: SecretSharing { fn new(a: V, b: V) -> Self; fn left(&self) -> V; fn right(&self) -> V; + fn new_excluding_direction(v: V, direction: Direction) -> Self { + match direction { + Direction::Left => Self::new(V::ZERO, v), + Direction::Right => Self::new(v, V::ZERO), + } + } + fn map T, R: ReplicatedSecretSharing, T: SharedValue>(&self, f: F) -> R { R::new(f(self.left()), f(self.right())) } diff --git a/ipa-core/src/test_fixture/hybrid.rs b/ipa-core/src/test_fixture/hybrid.rs index 63ecf73e5..3b8cc2460 100644 --- a/ipa-core/src/test_fixture/hybrid.rs +++ b/ipa-core/src/test_fixture/hybrid.rs @@ -1,11 +1,56 @@ use std::collections::{HashMap, HashSet}; +use crate::{ + ff::{boolean_array::BooleanArray, U128Conversions}, + report::hybrid::IndistinguishableHybridReport, + secret_sharing::{replicated::semi_honest::AdditiveShare as Replicated, IntoShares}, + test_fixture::sharing::Reconstruct, +}; + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq)] pub enum TestHybridRecord { TestImpression { match_key: u64, breakdown_key: u32 }, TestConversion { match_key: u64, value: u32 }, } +#[derive(PartialEq, Eq)] +pub struct TestIndistinguishableHybridReport { + pub match_key: u64, + pub value: u32, + pub breakdown_key: u32, +} + +impl Reconstruct + for [&IndistinguishableHybridReport; 3] +where + BK: BooleanArray + U128Conversions + IntoShares>, + V: BooleanArray + U128Conversions + IntoShares>, +{ + fn reconstruct(&self) -> TestIndistinguishableHybridReport { + let match_key = self + .each_ref() + .map(|v| v.match_key.clone()) + .reconstruct() + .as_u128(); + let breakdown_key = self + .each_ref() + .map(|v| v.breakdown_key.clone()) + .reconstruct() + .as_u128(); + let value = self + .each_ref() + .map(|v| v.value.clone()) + .reconstruct() + .as_u128(); + + TestIndistinguishableHybridReport { + match_key: match_key.try_into().unwrap(), + breakdown_key: breakdown_key.try_into().unwrap(), + value: value.try_into().unwrap(), + } + } +} + struct HashmapEntry { breakdown_key: u32, total_value: u32,