From 0fe6ac60aa60ce421effaba2ef1f9b47cf6fabbb Mon Sep 17 00:00:00 2001
From: Pluto <kamil.plucinski@deepsense.ai>
Date: Tue, 26 Nov 2024 19:14:16 +0100
Subject: [PATCH] Add backward compatibility for metric calculation (#3798)

Co-authored-by: cragwolfe <crag@unstructured.io>
---
 CHANGELOG.md                     | 10 ++++++++++
 unstructured/__version__.py      |  2 +-
 unstructured/metrics/evaluate.py | 15 +++++++++++++--
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8bcfb375d2..cdf1cd797c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,12 @@
+## 0.16.8
+
+### Enhancements
+- **Metrics: Weighted table average is optional**
+
+### Features
+
+### Fixes
+
 ## 0.16.7
 
 ### Enhancements
@@ -7,6 +16,7 @@
 
 ### Fixes
 
+
 ## 0.16.6
 
 ### Enhancements
diff --git a/unstructured/__version__.py b/unstructured/__version__.py
index 8685b152b7..ef1bb7da50 100644
--- a/unstructured/__version__.py
+++ b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.16.7"  # pragma: no cover
+__version__ = "0.16.8"  # pragma: no cover
diff --git a/unstructured/metrics/evaluate.py b/unstructured/metrics/evaluate.py
index 1ff498579a..6aae15fc7f 100755
--- a/unstructured/metrics/evaluate.py
+++ b/unstructured/metrics/evaluate.py
@@ -216,6 +216,8 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator):
     """
 
     cutoff: Optional[float] = None
+    weighted_average: bool = True
+    include_false_positives: bool = True
 
     def __post_init__(self):
         super().__post_init__()
@@ -287,11 +289,20 @@ def _generate_dataframes(self, rows):
 
         df = pd.DataFrame(rows, columns=headers)
         df["_table_weights"] = df["total_tables"]
-        # we give false positive tables a 1 table worth of weight in computing table level acc
-        df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1
+
+        if self.include_false_positives:
+            # we give false positive tables a 1 table worth of weight in computing table level acc
+            df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1
+
         # filter down to only those with actual and/or predicted tables
         has_tables_df = df[df["_table_weights"] > 0]
 
+        if not self.weighted_average:
+            # for all non zero elements assign them value 1
+            df["_table_weights"] = df["_table_weights"].apply(
+                lambda table_weight: 1 if table_weight != 0 else 0
+            )
+
         if has_tables_df.empty:
             agg_df = pd.DataFrame(
                 [[metric, None, None, None, 0] for metric in self.supported_metric_names]