intel · Pennycook · Aug 14, 2024 · Aug 16, 2024 · Aug 16, 2024 · laserkelvin
diff --git a/p3/_utils.py b/p3/_utils.py
@@ -30,3 +30,21 @@ def _require_numeric(df, columns):
         except Exception:
             msg = "Column '%s' must contain only numeric values."
             raise TypeError(msg % (column))
+
+
+def _sort_by_app_order(df, app_order):
+    """
+    Sort the DataFrame such that the order of applications matches that
+    specified in app_order.
+    """
+
+    def index_function(row):
+        return app_order.tolist().index(row["application"])
+
+    sort_index = df.apply(index_function, axis=1)
+    sort_index.name = "sort_index"
+
+    order = df.join(sort_index).sort_values(by=["sort_index"]).index
+    df = df.loc[order]
+    df.reset_index(inplace=True, drop=True)  # add style change
+    return df
diff --git a/p3/metrics/_pp.py b/p3/metrics/_pp.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-from p3._utils import _require_columns, _require_numeric
+from p3._utils import _require_columns, _require_numeric, _sort_by_app_order
 
 
 def _hmean(series):
@@ -83,6 +83,8 @@ def pp(df):
         if not df[eff].fillna(0).between(0, 1).all():
             raise ValueError(f"{eff} must in range [0, 1]")
 
+    app_order = df["application"].unique()
+
     # Keep only the most efficient (application, platform) results.
     key = ["problem", "platform", "application"]
     groups = df[key + efficiencies].groupby(key)
@@ -124,4 +126,7 @@ def pp(df):
         pp.rename(columns={eff: new_column}, inplace=True)
         pp = pp.astype({new_column: "float64"})
 
+    # Sort the final DataFrame to match the original application order
+    pp = _sort_by_app_order(pp, app_order)
+
     return pp
diff --git a/p3/plot/backend/matplotlib.py b/p3/plot/backend/matplotlib.py
@@ -15,7 +15,7 @@
 from matplotlib.path import Path
 
 import p3.metrics
-from p3._utils import _require_numeric
+from p3._utils import _require_numeric, _sort_by_app_order
 from p3.plot._common import ApplicationStyle, Legend, PlatformStyle
 from p3.plot.backend import CascadePlot, NavChart
 
@@ -149,10 +149,13 @@ def __init__(self, df, eff=None, size=None, fig=None, axes=None, **kwargs):
             size = (6, 5)
 
         # Keep only the most efficient (application, platform) results.
+        # Ensure that the order of applications is unchanged, for the legend.
+        app_order = df["application"].unique()
         key = ["problem", "platform", "application"]
         groups = df[key + [eff_column]].groupby(key)
         df = groups.agg("max")
         df.reset_index(inplace=True)
+        df = _sort_by_app_order(df, app_order)
 
         platforms = df["platform"].unique()
         applications = df["application"].unique()

diff --git a/p3/report/_snapshot.py b/p3/report/_snapshot.py
@@ -8,7 +8,7 @@
 
 import p3.metrics
 import p3.plot
-from p3._utils import _require_columns
+from p3._utils import _require_columns, _sort_by_app_order
 from p3.metrics._divergence import _coverage_string_to_json
 
 
@@ -34,24 +34,6 @@ def _block_symlinks(path):
     raise PermissionError("Refusing to create files via symbolic link.")
 
 
-def _sort_by_app_order(df, app_order):
-    """
-    Sort the DataFrame such that the order of applications matches that
-    specified in app_order.
-    """
-
-    def index_function(row):
-        return app_order.tolist().index(row["application"])
-
-    sort_index = df.apply(index_function, axis=1)
-    sort_index.name = "sort_index"
-
-    order = df.join(sort_index).sort_values(by=["sort_index"]).index
-    df = df.loc[order]
-    df.reset_index(inplace=True, drop=True)  # add style change
-    return df
-
-
 def snapshot(df, cov=None, directory=None):
     """
     Generate an HTML report representing a snapshot of P3 characteristics.

diff --git a/tests/metrics/test_pp.py b/tests/metrics/test_pp.py
@@ -1,9 +1,11 @@
 # Copyright (C) 2022-2023 Intel Corporation
 # SPDX-License-Identifier: MIT
 
+import unittest
+
 import pandas as pd
+
 from p3.metrics import pp
-import unittest
 
 
 class TestPP(unittest.TestCase):
@@ -90,8 +92,8 @@ def test_pp(self):
 
         expected_data = {
             "problem": ["test"] * 3,
-            "application": ["best", "dummy", "latest"],
-            "app pp": [1.0, 0.0, 0.4878],
+            "application": ["latest", "best", "dummy"],
+            "app pp": [0.4878, 1.0, 0.0],
             "arch pp": [0.0] * 3,
         }
         expected_df = pd.DataFrame(expected_data)