Skip to content

Commit

Permalink
Support Hindsight PR in TorchRec (pytorch#2627)
Browse files Browse the repository at this point in the history
Summary:

### Overview
This diff implements HindsightTargetPR metric into TorchRec. This will also include a bucketized version. Thrift changes submitted ahead in D66216486.

### Implementation
1) Create X-wide granular array to store metric states where each index represents the threshold. For bucketization, each bucket will be stacked in the next dimension within the state tensor.
2) Calculate minimum threshold that meets target_precision.
3) Calculate precision and recall points with target threshold.

### Metrics
This metric will return the following curves:
* hindsight_target_pr: this is the calculated threshold for the window state to maximize recall while achieving the target precision.
* hindsight_target_precision: this is the achieved precision with hindsight_target_pr.
* hindsight_target_recall: this is the achieved recall with hindsight_target_pr.

### Usage
Hindsight PR metrics are primarily useful to mimic the calibration system within identity team. Please adjust the bucketization and window size accordingly to best approximate this.

Note: since the states are stored as a dimensional tensor, multiple tasks will not be supported for this metric.

Reviewed By: iamzainhuda

Differential Revision: D65867461
  • Loading branch information
monofb authored and facebook-github-bot committed Dec 10, 2024
1 parent fad795e commit 69dc737
Show file tree
Hide file tree
Showing 5 changed files with 399 additions and 0 deletions.
235 changes: 235 additions & 0 deletions torchrec/metrics/hindsight_target_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

from typing import Any, cast, Dict, List, Optional, Type

import torch
from torchrec.metrics.metrics_namespace import MetricName, MetricNamespace, MetricPrefix
from torchrec.metrics.rec_metric import (
MetricComputationReport,
RecMetric,
RecMetricComputation,
RecMetricException,
)


TARGET_PRECISION = "target_precision"
THRESHOLD_GRANULARITY = 1000


def compute_precision(
num_true_positives: torch.Tensor, num_false_positives: torch.Tensor
) -> torch.Tensor:
return torch.where(
num_true_positives + num_false_positives == 0.0,
0.0,
num_true_positives / (num_true_positives + num_false_positives).double(),
)


def compute_recall(
num_true_positives: torch.Tensor, num_false_negitives: torch.Tensor
) -> torch.Tensor:
return torch.where(
num_true_positives + num_false_negitives == 0.0,
0.0,
num_true_positives / (num_true_positives + num_false_negitives),
)


def compute_threshold_idx(
num_true_positives: torch.Tensor,
num_false_positives: torch.Tensor,
target_precision: float,
) -> int:
for i in range(THRESHOLD_GRANULARITY):
if (
compute_precision(num_true_positives[i], num_false_positives[i])
>= target_precision
):
return i

return THRESHOLD_GRANULARITY - 1


def compute_true_pos_sum(
labels: torch.Tensor,
predictions: torch.Tensor,
weights: torch.Tensor,
) -> torch.Tensor:
predictions = predictions.double()
tp_sum = torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double)
thresholds = torch.linspace(0, 1, steps=THRESHOLD_GRANULARITY)
for i, threshold in enumerate(thresholds):
tp_sum[i] = torch.sum(weights * ((predictions >= threshold) * labels), -1)
return tp_sum


def compute_false_pos_sum(
labels: torch.Tensor,
predictions: torch.Tensor,
weights: torch.Tensor,
) -> torch.Tensor:
predictions = predictions.double()
fp_sum = torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double)
thresholds = torch.linspace(0, 1, steps=THRESHOLD_GRANULARITY)
for i, threshold in enumerate(thresholds):
fp_sum[i] = torch.sum(weights * ((predictions >= threshold) * (1 - labels)), -1)
return fp_sum


def compute_false_neg_sum(
labels: torch.Tensor,
predictions: torch.Tensor,
weights: torch.Tensor,
) -> torch.Tensor:
predictions = predictions.double()
fn_sum = torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double)
thresholds = torch.linspace(0, 1, steps=THRESHOLD_GRANULARITY)
for i, threshold in enumerate(thresholds):
fn_sum[i] = torch.sum(weights * ((predictions <= threshold) * labels), -1)
return fn_sum


def get_pr_states(
labels: torch.Tensor,
predictions: torch.Tensor,
weights: Optional[torch.Tensor],
) -> Dict[str, torch.Tensor]:
if weights is None:
weights = torch.ones_like(predictions)
return {
"true_pos_sum": compute_true_pos_sum(labels, predictions, weights),
"false_pos_sum": compute_false_pos_sum(labels, predictions, weights),
"false_neg_sum": compute_false_neg_sum(labels, predictions, weights),
}


class HindsightTargetPRMetricComputation(RecMetricComputation):
r"""
This class implements the RecMetricComputation for Hingsight Target PR.
The constructor arguments are defined in RecMetricComputation.
See the docstring of RecMetricComputation for more detail.
Args:
target_precision (float): If provided, computes the minimum threshold to achieve the target precision.
"""

def __init__(
self, *args: Any, target_precision: float = 0.5, **kwargs: Any
) -> None:
super().__init__(*args, **kwargs)
self._add_state(
"true_pos_sum",
torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double),
add_window_state=True,
dist_reduce_fx="sum",
persistent=True,
)
self._add_state(
"false_pos_sum",
torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double),
add_window_state=True,
dist_reduce_fx="sum",
persistent=True,
)
self._add_state(
"false_neg_sum",
torch.zeros(THRESHOLD_GRANULARITY, dtype=torch.double),
add_window_state=True,
dist_reduce_fx="sum",
persistent=True,
)
self._target_precision: float = target_precision

def update(
self,
*,
predictions: Optional[torch.Tensor],
labels: torch.Tensor,
weights: Optional[torch.Tensor],
**kwargs: Dict[str, Any],
) -> None:
if predictions is None:
raise RecMetricException(
"Inputs 'predictions' should not be None for HindsightTargetPRMetricComputation update"
)
states = get_pr_states(labels, predictions, weights)
num_samples = predictions.shape[-1]

for state_name, state_value in states.items():
state = getattr(self, state_name)
state += state_value
self._aggregate_window_state(state_name, state_value, num_samples)

def _compute(self) -> List[MetricComputationReport]:
true_pos_sum = cast(torch.Tensor, self.true_pos_sum)
false_pos_sum = cast(torch.Tensor, self.false_pos_sum)
false_neg_sum = cast(torch.Tensor, self.false_neg_sum)
threshold_idx = compute_threshold_idx(
true_pos_sum,
false_pos_sum,
self._target_precision,
)
window_threshold_idx = compute_threshold_idx(
self.get_window_state("true_pos_sum"),
self.get_window_state("false_pos_sum"),
self._target_precision,
)
reports = [
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_PR,
metric_prefix=MetricPrefix.LIFETIME,
value=torch.Tensor(threshold_idx),
),
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_PR,
metric_prefix=MetricPrefix.WINDOW,
value=torch.Tensor(window_threshold_idx),
),
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_PRECISION,
metric_prefix=MetricPrefix.LIFETIME,
value=compute_precision(
true_pos_sum[threshold_idx],
false_pos_sum[threshold_idx],
),
),
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_PRECISION,
metric_prefix=MetricPrefix.WINDOW,
value=compute_precision(
self.get_window_state("true_pos_sum")[window_threshold_idx],
self.get_window_state("false_pos_sum")[window_threshold_idx],
),
),
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_RECALL,
metric_prefix=MetricPrefix.LIFETIME,
value=compute_recall(
true_pos_sum[threshold_idx],
false_neg_sum[threshold_idx],
),
),
MetricComputationReport(
name=MetricName.HINDSIGHT_TARGET_RECALL,
metric_prefix=MetricPrefix.WINDOW,
value=compute_recall(
self.get_window_state("true_pos_sum")[window_threshold_idx],
self.get_window_state("false_neg_sum")[window_threshold_idx],
),
),
]
return reports


class HindsightTargetPRMetric(RecMetric):
_namespace: MetricNamespace = MetricNamespace.HINDSIGHT_TARGET_PR
_computation_class: Type[RecMetricComputation] = HindsightTargetPRMetricComputation
2 changes: 2 additions & 0 deletions torchrec/metrics/metric_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from torchrec.metrics.cali_free_ne import CaliFreeNEMetric
from torchrec.metrics.calibration import CalibrationMetric
from torchrec.metrics.ctr import CTRMetric
from torchrec.metrics.hindsight_target_pr import HindsightTargetPRMetric
from torchrec.metrics.mae import MAEMetric
from torchrec.metrics.metrics_config import (
BatchSizeStage,
Expand Down Expand Up @@ -94,6 +95,7 @@
RecMetricEnum.TENSOR_WEIGHTED_AVG: TensorWeightedAvgMetric,
RecMetricEnum.CALI_FREE_NE: CaliFreeNEMetric,
RecMetricEnum.UNWEIGHTED_NE: UnweightedNEMetric,
RecMetricEnum.HINDSIGHT_TARGET_PR: HindsightTargetPRMetric,
}


Expand Down
1 change: 1 addition & 0 deletions torchrec/metrics/metrics_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class RecMetricEnum(RecMetricEnumBase):
TENSOR_WEIGHTED_AVG = "tensor_weighted_avg"
CALI_FREE_NE = "cali_free_ne"
UNWEIGHTED_NE = "unweighted_ne"
HINDSIGHT_TARGET_PR = "hindsight_target_pr"


@dataclass(unsafe_hash=True, eq=True)
Expand Down
6 changes: 6 additions & 0 deletions torchrec/metrics/metrics_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ class MetricName(MetricNameBase):
CALI_FREE_NE = "cali_free_ne"
UNWEIGHTED_NE = "unweighted_ne"

HINDSIGHT_TARGET_PR = "hindsight_target_pr"
HINDSIGHT_TARGET_PRECISION = "hindsight_target_precision"
HINDSIGHT_TARGET_RECALL = "hindsight_target_recall"


class MetricNamespaceBase(StrValueMixin, Enum):
pass
Expand Down Expand Up @@ -131,6 +135,8 @@ class MetricNamespace(MetricNamespaceBase):
CALI_FREE_NE = "cali_free_ne"
UNWEIGHTED_NE = "unweighted_ne"

HINDSIGHT_TARGET_PR = "hindsight_target_pr"


class MetricPrefix(StrValueMixin, Enum):
DEFAULT = ""
Expand Down
Loading

0 comments on commit 69dc737

Please sign in to comment.