From 0293c2f4e8aa404e4668fcd9c395bcf0ed3177aa Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Sep 2024 17:55:31 +0100 Subject: [PATCH] Replace _require_numeric with _cast_to_numeric Whereas _require_numeric simply checked that the values in a DataFrame could be interpreted as numbers, _cast_to_numeric actually performs the cast and returns the result. Signed-off-by: John Pennycook --- p3analysis/_utils.py | 9 +++++---- p3analysis/metrics/_efficiency.py | 4 ++-- p3analysis/metrics/_pp.py | 4 ++-- p3analysis/plot/_cascade.py | 4 ++-- p3analysis/plot/_navchart.py | 4 ++-- p3analysis/plot/backend/matplotlib.py | 4 ++-- p3analysis/plot/backend/pgfplots.py | 4 ++-- tests/metrics/test_efficiency.py | 2 ++ 8 files changed, 19 insertions(+), 16 deletions(-) diff --git a/p3analysis/_utils.py b/p3analysis/_utils.py index 87b70f9..a062a74 100644 --- a/p3analysis/_utils.py +++ b/p3analysis/_utils.py @@ -19,14 +19,15 @@ def _require_columns(df, columns): raise ValueError(msg % (column, str(columns))) -def _require_numeric(df, columns): +def _cast_to_numeric(df, columns): """ - Check that the named columns are numeric. + Check that the named columns are numeric, and cast them. """ - + result = df.copy(deep=True) for column in columns: try: - pd.to_numeric(df[column]) + result[column] = pd.to_numeric(df[column]) except Exception: msg = "Column '%s' must contain only numeric values." raise TypeError(msg % (column)) + return result diff --git a/p3analysis/metrics/_efficiency.py b/p3analysis/metrics/_efficiency.py index ed86198..a0ff097 100644 --- a/p3analysis/metrics/_efficiency.py +++ b/p3analysis/metrics/_efficiency.py @@ -3,7 +3,7 @@ import numpy -from p3analysis._utils import _require_columns, _require_numeric +from p3analysis._utils import _cast_to_numeric, _require_columns def application_efficiency(df, foms="lower"): @@ -45,7 +45,7 @@ def application_efficiency(df, foms="lower"): """ required_columns = ["problem", "platform", "application", "fom"] _require_columns(df, required_columns) - _require_numeric(df, ["fom"]) + df = _cast_to_numeric(df, ["fom"]) if foms not in ["lower", "higher"]: raise ValueError("FOM interpretation must be 'lower' or 'higher'") diff --git a/p3analysis/metrics/_pp.py b/p3analysis/metrics/_pp.py index ea4ed9a..a318761 100644 --- a/p3analysis/metrics/_pp.py +++ b/p3analysis/metrics/_pp.py @@ -6,7 +6,7 @@ import pandas as pd -from p3analysis._utils import _require_columns, _require_numeric +from p3analysis._utils import _cast_to_numeric, _require_columns def _hmean(series): @@ -79,7 +79,7 @@ def pp(df): if len(efficiencies) == 0: msg = "DataFrame must contain a column named 'arch eff' or 'app eff'." raise ValueError(msg) - _require_numeric(df, efficiencies) + df = _cast_to_numeric(df, efficiencies) # Check that efficiencies are not given in percentages for eff in efficiencies: diff --git a/p3analysis/plot/_cascade.py b/p3analysis/plot/_cascade.py index 4510145..193fe2f 100644 --- a/p3analysis/plot/_cascade.py +++ b/p3analysis/plot/_cascade.py @@ -6,7 +6,7 @@ # Copyright (c) 2020 Performance Portability authors # SPDX-License-Identifier: MIT -from p3analysis._utils import _require_columns, _require_numeric +from p3analysis._utils import _cast_to_numeric, _require_columns def cascade(df, eff=None, size=None, **kwargs): @@ -111,7 +111,7 @@ def cascade(df, eff=None, size=None, **kwargs): if eff_column not in df: msg = "DataFrame does not contain an '%s' column." raise ValueError(msg % (eff_column)) - _require_numeric(df, [eff_column]) + df = _cast_to_numeric(df, [eff_column]) # Check there is only one entry per (application, platform) pair. grouped = df.groupby(["platform", "application"]) diff --git a/p3analysis/plot/_navchart.py b/p3analysis/plot/_navchart.py index 1d14b18..b0a5297 100644 --- a/p3analysis/plot/_navchart.py +++ b/p3analysis/plot/_navchart.py @@ -1,7 +1,7 @@ # Copyright (c) 2022-2023 Intel Corporation # SPDX-License-Identifier: MIT -from p3analysis._utils import _require_columns, _require_numeric +from p3analysis._utils import _cast_to_numeric, _require_columns def navchart(pp, cd, eff=None, size=None, goal=None, **kwargs): @@ -80,7 +80,7 @@ def navchart(pp, cd, eff=None, size=None, goal=None, **kwargs): _require_columns(pp, ["problem", "application"]) _require_columns(cd, ["problem", "application", "divergence"]) - _require_numeric(cd, ["divergence"]) + cd = _cast_to_numeric(cd, ["divergence"]) if len(cd["problem"].unique()) > 1: raise NotImplementedError( diff --git a/p3analysis/plot/backend/matplotlib.py b/p3analysis/plot/backend/matplotlib.py index 387ef33..6c5e841 100644 --- a/p3analysis/plot/backend/matplotlib.py +++ b/p3analysis/plot/backend/matplotlib.py @@ -15,7 +15,7 @@ from matplotlib.path import Path import p3analysis.metrics -from p3analysis._utils import _require_numeric +from p3analysis._utils import _cast_to_numeric from p3analysis.plot._common import ApplicationStyle, Legend, PlatformStyle from p3analysis.plot.backend import CascadePlot, NavChart @@ -471,7 +471,7 @@ def __init__( if pp_column not in pp: msg = "DataFrame does not contain an '%s' column." raise ValueError(msg % (pp_column)) - _require_numeric(pp, [pp_column]) + pp = _cast_to_numeric(pp, [pp_column]) # If the size is unset, default to 5 x 5 if not size: diff --git a/p3analysis/plot/backend/pgfplots.py b/p3analysis/plot/backend/pgfplots.py index ef509a4..f563d99 100644 --- a/p3analysis/plot/backend/pgfplots.py +++ b/p3analysis/plot/backend/pgfplots.py @@ -14,7 +14,7 @@ import pandas as pd import p3analysis.metrics -from p3analysis._utils import _require_numeric +from p3analysis._utils import _cast_to_numeric from p3analysis.plot._common import ApplicationStyle, Legend, PlatformStyle from p3analysis.plot.backend import CascadePlot, NavChart @@ -293,7 +293,7 @@ def __init__( if pp_column not in pp: msg = "DataFrame does not contain an '%s' column." raise ValueError(msg % (pp_column)) - _require_numeric(pp, [pp_column]) + pp = _cast_to_numeric(pp, [pp_column]) # If the size is unset, default to 200pt x 200pt, otherwise set size plotwidth = "200pt" diff --git a/tests/metrics/test_efficiency.py b/tests/metrics/test_efficiency.py index a6948cb..ec3e356 100644 --- a/tests/metrics/test_efficiency.py +++ b/tests/metrics/test_efficiency.py @@ -5,6 +5,7 @@ import pandas as pd +from p3analysis._utils import _cast_to_numeric from p3analysis.metrics import application_efficiency @@ -141,6 +142,7 @@ def test_non_numeric(self): expected_data.update(eff_data) expected_df = pd.DataFrame(expected_data) + expected_df = _cast_to_numeric(expected_df, ["fom", "app eff"]) pd.testing.assert_frame_equal(result, expected_df)