-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hybrid padding #1381
Hybrid padding #1381
Changes from 4 commits
3c60992
638f9d0
f0ea6bf
d71ea88
947be2e
52c78c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,8 @@ pub(crate) mod distributions; | |
pub mod insecure; | ||
pub mod step; | ||
|
||
use std::iter::{repeat, repeat_with}; | ||
|
||
#[cfg(any(test, feature = "test-fixture", feature = "cli"))] | ||
pub use insecure::DiscreteDp as InsecureDiscreteDp; | ||
use rand::Rng; | ||
|
@@ -28,6 +30,7 @@ use crate::{ | |
}, | ||
RecordId, | ||
}, | ||
report::hybrid::IndistinguishableHybridReport, | ||
secret_sharing::{ | ||
replicated::{semi_honest::AdditiveShare, ReplicatedSecretSharing}, | ||
SharedValue, | ||
|
@@ -130,6 +133,64 @@ pub trait Paddable { | |
Self: Sized; | ||
} | ||
|
||
impl<BK, V> Paddable for IndistinguishableHybridReport<BK, V> | ||
where | ||
BK: BooleanArray + U128Conversions, | ||
V: BooleanArray, | ||
{ | ||
fn add_padding_items<VC: Extend<Self>, const B: usize>( | ||
eriktaubeneck marked this conversation as resolved.
Show resolved
Hide resolved
|
||
direction_to_excluded_helper: Direction, | ||
padding_input_rows: &mut VC, | ||
padding_params: &PaddingParameters, | ||
rng: &mut InstrumentedSequentialSharedRandomness, | ||
) -> Result<u32, Error> { | ||
let mut total_number_of_fake_rows = 0; | ||
match padding_params.oprf_padding { | ||
OPRFPadding::NoOPRFPadding => {} | ||
OPRFPadding::Parameters { | ||
oprf_epsilon, | ||
oprf_delta, | ||
matchkey_cardinality_cap, | ||
oprf_padding_sensitivity, | ||
} => { | ||
let oprf_padding = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we still plan to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This gets somewhat confusing, because we use the term "OPRF" to name both the entire ipa v2 protocol as well as the pseudo random value that the match key is converted into. In this instance, we are padding values so that the process of converting match keys into OPRF values is differentially private (same as previously), as opposed to padding values for aggregation or breakdown key reveal. In that sense, I think this naming still makes sense, and actually "HybridPadding" would be less clear (at least once the old protocol is purged.) Happy for input on this - but that was my thinking. I can add a comment here to make that more clear as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at this more closely, this is actually just poor naming of the existing |
||
OPRFPaddingDp::new(oprf_epsilon, oprf_delta, oprf_padding_sensitivity)?; | ||
for cardinality in 1..=matchkey_cardinality_cap { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I attempted to get this all into a single |
||
let sample = oprf_padding.sample(rng); | ||
total_number_of_fake_rows += sample * cardinality; | ||
|
||
padding_input_rows.extend( | ||
repeat_with(|| { | ||
let dummy_mk: BA64 = rng.gen(); | ||
repeat(IndistinguishableHybridReport::from( | ||
AdditiveShare::new_excluding_direction( | ||
dummy_mk, | ||
direction_to_excluded_helper, | ||
), | ||
)) | ||
.take(cardinality as usize) | ||
}) | ||
// this means there will be `sample` many unique | ||
// matchkeys to add each with cardinality = `cardinality` | ||
.take(sample as usize) | ||
.flatten(), | ||
); | ||
} | ||
} | ||
} | ||
Ok(total_number_of_fake_rows) | ||
} | ||
|
||
fn add_zero_shares<VC: Extend<Self>>( | ||
padding_input_rows: &mut VC, | ||
total_number_of_fake_rows: u32, | ||
) { | ||
padding_input_rows.extend( | ||
repeat(IndistinguishableHybridReport::ZERO).take(total_number_of_fake_rows as usize), | ||
); | ||
} | ||
} | ||
|
||
impl<BK, TV, TS> Paddable for OPRFIPAInputRow<BK, TV, TS> | ||
where | ||
BK: BooleanArray + U128Conversions, | ||
|
@@ -426,6 +487,7 @@ mod tests { | |
}, | ||
RecordId, | ||
}, | ||
report::hybrid::IndistinguishableHybridReport, | ||
secret_sharing::replicated::semi_honest::AdditiveShare, | ||
test_fixture::{Reconstruct, Runner, TestWorld}, | ||
}; | ||
|
@@ -451,6 +513,31 @@ mod tests { | |
Ok(input) | ||
} | ||
|
||
pub async fn set_up_apply_dp_padding_pass_for_indistinguishable_reports< | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The formatting on this function is crazy. Is it because the name of the function is kinda long? Does it make sense to move this to a separate module? Maybe that way you can reduce the size of the function name since it will have a more specific context in which is used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just the standard formatter. I don't think it makes sense to move to a separate module, all the padding functions live in this module. |
||
C, | ||
BK, | ||
V, | ||
const B: usize, | ||
>( | ||
ctx: C, | ||
padding_params: PaddingParameters, | ||
) -> Result<Vec<IndistinguishableHybridReport<BK, V>>, Error> | ||
where | ||
C: Context, | ||
BK: BooleanArray + U128Conversions, | ||
V: BooleanArray, | ||
{ | ||
let mut input: Vec<IndistinguishableHybridReport<BK, V>> = Vec::new(); | ||
input = apply_dp_padding_pass::<C, IndistinguishableHybridReport<BK, V>, B>( | ||
ctx, | ||
input, | ||
Role::H3, | ||
&padding_params, | ||
) | ||
.await?; | ||
Ok(input) | ||
} | ||
|
||
#[tokio::test] | ||
pub async fn oprf_noise_in_dp_padding_pass() { | ||
type BK = BA8; | ||
|
@@ -525,6 +612,83 @@ mod tests { | |
} | ||
} | ||
|
||
#[tokio::test] | ||
pub async fn indistinguishable_report_noise_in_dp_padding_pass() { | ||
// Note: This is a close copy of the test `oprf_noise_in_dp_padding_pass` | ||
// Which will make this easier to delete the former test | ||
// when we remove the oprf protocol. | ||
type BK = BA8; | ||
type V = BA3; | ||
const B: usize = 256; | ||
let world = TestWorld::default(); | ||
let oprf_epsilon = 1.0; | ||
let oprf_delta = 1e-6; | ||
let matchkey_cardinality_cap = 10; | ||
let oprf_padding_sensitivity = 2; | ||
|
||
let result = world | ||
.semi_honest((), |ctx, ()| async move { | ||
let padding_params = PaddingParameters { | ||
oprf_padding: OPRFPadding::Parameters { | ||
oprf_epsilon, | ||
oprf_delta, | ||
matchkey_cardinality_cap, | ||
oprf_padding_sensitivity, | ||
}, | ||
aggregation_padding: AggregationPadding::NoAggPadding, | ||
}; | ||
set_up_apply_dp_padding_pass_for_indistinguishable_reports::<_, BK, V, B>( | ||
ctx, | ||
padding_params, | ||
) | ||
.await | ||
}) | ||
.await | ||
.map(Result::unwrap); | ||
// check that all three helpers added the same number of dummy shares | ||
assert!(result[0].len() == result[1].len() && result[0].len() == result[2].len()); | ||
|
||
let result_reconstructed = result.reconstruct(); | ||
// check that all fields besides the matchkey are zero and matchkey is not zero | ||
let mut match_key_counts: HashMap<u64, u32> = HashMap::new(); | ||
for row in result_reconstructed { | ||
assert!(row.value == 0); | ||
assert!(row.breakdown_key == 0); // since we set AggregationPadding::NoAggPadding | ||
assert!(row.match_key != 0); | ||
|
||
let count = match_key_counts.entry(row.match_key).or_insert(0); | ||
*count += 1; | ||
} | ||
// Now look at now many times a match_key occured | ||
let mut sample_per_cardinality: BTreeMap<u32, u32> = BTreeMap::new(); | ||
for cardinality in match_key_counts.values() { | ||
let count = sample_per_cardinality.entry(*cardinality).or_insert(0); | ||
*count += 1; | ||
} | ||
let mut distribution_of_samples: BTreeMap<u32, u32> = BTreeMap::new(); | ||
|
||
for (cardinality, sample) in sample_per_cardinality { | ||
println!("{sample} user IDs occurred {cardinality} time(s)"); | ||
let count = distribution_of_samples.entry(sample).or_insert(0); | ||
*count += 1; | ||
} | ||
|
||
let oprf_padding = | ||
OPRFPaddingDp::new(oprf_epsilon, oprf_delta, oprf_padding_sensitivity).unwrap(); | ||
|
||
let (mean, std_bound) = oprf_padding.mean_and_std_bound(); | ||
let tolerance_bound = 12.0; | ||
assert!(std_bound > 1.0); // bound on the std only holds if this is true. | ||
println!("mean = {mean}, std_bound = {std_bound}"); | ||
for (sample, count) in &distribution_of_samples { | ||
println!("An OPRFPadding sample value equal to {sample} occurred {count} time(s)",); | ||
assert!( | ||
(f64::from(*sample) - mean).abs() < tolerance_bound * std_bound, | ||
"aggregation noise sample was not within {tolerance_bound} times the standard deviation bound from what was expected." | ||
); | ||
} | ||
} | ||
|
||
pub async fn set_up_apply_dp_padding_pass_for_agg<C, BK, TV, const B: usize>( | ||
ctx: C, | ||
padding_params: PaddingParameters, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This trait could be made better...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you take a look and see if this impl is cleaner?