Skip to content

Commit

Permalink
Small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dvadym committed Sep 12, 2024
1 parent df16d7a commit 9e9653a
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 16 deletions.
7 changes: 4 additions & 3 deletions analysis/cross_partition_combiners.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,13 @@ def _per_partition_to_utility_report(
# > is when there is analysis for SUM(column1), SUM(column2) etc
assert len(per_partition_utility.metric_errors) >= len(dp_metrics)
metric_errors = []
for metric_error, dp_metric in zip(per_partition_utility.metric_errors,
dp_metrics):
for metric_error in per_partition_utility.metric_errors:
metric_errors.append(
_sum_metrics_to_metric_utility(metric_error, dp_metric,
_sum_metrics_to_metric_utility(metric_error,
metric_error.aggregation,
prob_to_keep, partition_weight))

# configuration_index is set on the next stages
return metrics.UtilityReport(configuration_index=-1,
partitions_info=partition_metrics,
metric_errors=metric_errors)
Expand Down
6 changes: 4 additions & 2 deletions analysis/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,10 @@ class UtilityReportBin:
was computed. The metric can be COUNT, PRIVACY_ID_COUNT, SUM.
Attributes:
partition_size_from: lower bound of partitions size.
partition_size_to: upper bound of partitions size.
partition_size_from: lower bound of the number of privacy units in
partitions.
partition_size_to: upper (exclusive) bound of the number of privacy
units in partitions.
report: the result of utility analysis for partitions of size
[partition_size_from, partition_size_to).
"""
Expand Down
11 changes: 6 additions & 5 deletions analysis/tests/utility_analysis_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,10 @@ def test_wo_public_partitions(self, pre_aggregated: bool):
])
expected_copy = copy.deepcopy(expected)
expected.utility_report_histogram = [
metrics.UtilityReportBin(partition_size_from=20,
partition_size_to=50,
report=expected_copy)
metrics.UtilityReportBin(
partition_size_from=10, # 10 privacy ids
partition_size_to=20,
report=expected_copy)
]
common.assert_dataclasses_are_equal(self, report, expected)
self.assertLen(per_partition_result, 10)
Expand Down Expand Up @@ -322,9 +323,9 @@ def test_unnest_metrics(self):
output = list(utility_analysis._unnest_metrics(input_data))
self.assertLen(output, 4)
self.assertEqual(output[0], ((0, None), input_data[0]))
self.assertEqual(output[1], ((0, 100), input_data[0]))
self.assertEqual(output[1], ((0, 1), input_data[0]))
self.assertEqual(output[2], ((1, None), input_data[1]))
self.assertEqual(output[3], ((1, 100), input_data[1]))
self.assertEqual(output[3], ((1, 1), input_data[1]))

@parameterized.named_parameters(
dict(testcase_name="without pre-threshold",
Expand Down
10 changes: 4 additions & 6 deletions analysis/utility_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,11 @@ def _unnest_metrics(
"""Unnests metrics from different configurations."""
for i, metric in enumerate(metrics):
yield ((i, None), metric)
if metrics[0].metric_errors:
partition_size = metrics[0].metric_errors[0].sum
else:
# Select partitions case.
partition_size = metrics[0].raw_statistics.privacy_id_count
# Emits metrics for computing histogram by partition size.
# Choose bucket based on the number of privacy id count.
partition_size = metrics[0].raw_statistics.privacy_id_count
bucket = _get_lower_bound(partition_size)

# Emits metrics for computing histogram by partition size.
yield ((i, bucket), metric)


Expand Down

0 comments on commit 9e9653a

Please sign in to comment.