From 0fe6ac60aa60ce421effaba2ef1f9b47cf6fabbb Mon Sep 17 00:00:00 2001 From: Pluto Date: Tue, 26 Nov 2024 19:14:16 +0100 Subject: [PATCH] Add backward compatibility for metric calculation (#3798) Co-authored-by: cragwolfe --- CHANGELOG.md | 10 ++++++++++ unstructured/__version__.py | 2 +- unstructured/metrics/evaluate.py | 15 +++++++++++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bcfb375d2..cdf1cd797c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.16.8 + +### Enhancements +- **Metrics: Weighted table average is optional** + +### Features + +### Fixes + ## 0.16.7 ### Enhancements @@ -7,6 +16,7 @@ ### Fixes + ## 0.16.6 ### Enhancements diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 8685b152b7..ef1bb7da50 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.16.7" # pragma: no cover +__version__ = "0.16.8" # pragma: no cover diff --git a/unstructured/metrics/evaluate.py b/unstructured/metrics/evaluate.py index 1ff498579a..6aae15fc7f 100755 --- a/unstructured/metrics/evaluate.py +++ b/unstructured/metrics/evaluate.py @@ -216,6 +216,8 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator): """ cutoff: Optional[float] = None + weighted_average: bool = True + include_false_positives: bool = True def __post_init__(self): super().__post_init__() @@ -287,11 +289,20 @@ def _generate_dataframes(self, rows): df = pd.DataFrame(rows, columns=headers) df["_table_weights"] = df["total_tables"] - # we give false positive tables a 1 table worth of weight in computing table level acc - df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1 + + if self.include_false_positives: + # we give false positive tables a 1 table worth of weight in computing table level acc + df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1 + # filter down to only those with actual and/or predicted tables has_tables_df = df[df["_table_weights"] > 0] + if not self.weighted_average: + # for all non zero elements assign them value 1 + df["_table_weights"] = df["_table_weights"].apply( + lambda table_weight: 1 if table_weight != 0 else 0 + ) + if has_tables_df.empty: agg_df = pd.DataFrame( [[metric, None, None, None, 0] for metric in self.supported_metric_names]