Skip to content

Commit

Permalink
Replace _require_numeric with _cast_to_numeric
Browse files Browse the repository at this point in the history
Whereas _require_numeric simply checked that the values in a DataFrame
could be interpreted as numbers, _cast_to_numeric actually performs the
cast and returns the result.

Signed-off-by: John Pennycook <[email protected]>
  • Loading branch information
Pennycook committed Sep 13, 2024
1 parent 59b3d0a commit 0293c2f
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 16 deletions.
9 changes: 5 additions & 4 deletions p3analysis/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ def _require_columns(df, columns):
raise ValueError(msg % (column, str(columns)))


def _require_numeric(df, columns):
def _cast_to_numeric(df, columns):
"""
Check that the named columns are numeric.
Check that the named columns are numeric, and cast them.
"""

result = df.copy(deep=True)
for column in columns:
try:
pd.to_numeric(df[column])
result[column] = pd.to_numeric(df[column])
except Exception:
msg = "Column '%s' must contain only numeric values."
raise TypeError(msg % (column))
return result
4 changes: 2 additions & 2 deletions p3analysis/metrics/_efficiency.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import numpy

from p3analysis._utils import _require_columns, _require_numeric
from p3analysis._utils import _cast_to_numeric, _require_columns


def application_efficiency(df, foms="lower"):
Expand Down Expand Up @@ -45,7 +45,7 @@ def application_efficiency(df, foms="lower"):
"""
required_columns = ["problem", "platform", "application", "fom"]
_require_columns(df, required_columns)
_require_numeric(df, ["fom"])
df = _cast_to_numeric(df, ["fom"])

if foms not in ["lower", "higher"]:
raise ValueError("FOM interpretation must be 'lower' or 'higher'")
Expand Down
4 changes: 2 additions & 2 deletions p3analysis/metrics/_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas as pd

from p3analysis._utils import _require_columns, _require_numeric
from p3analysis._utils import _cast_to_numeric, _require_columns


def _hmean(series):
Expand Down Expand Up @@ -79,7 +79,7 @@ def pp(df):
if len(efficiencies) == 0:
msg = "DataFrame must contain a column named 'arch eff' or 'app eff'."
raise ValueError(msg)
_require_numeric(df, efficiencies)
df = _cast_to_numeric(df, efficiencies)

# Check that efficiencies are not given in percentages
for eff in efficiencies:
Expand Down
4 changes: 2 additions & 2 deletions p3analysis/plot/_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Copyright (c) 2020 Performance Portability authors
# SPDX-License-Identifier: MIT

from p3analysis._utils import _require_columns, _require_numeric
from p3analysis._utils import _cast_to_numeric, _require_columns


def cascade(df, eff=None, size=None, **kwargs):
Expand Down Expand Up @@ -111,7 +111,7 @@ def cascade(df, eff=None, size=None, **kwargs):
if eff_column not in df:
msg = "DataFrame does not contain an '%s' column."
raise ValueError(msg % (eff_column))
_require_numeric(df, [eff_column])
df = _cast_to_numeric(df, [eff_column])

# Check there is only one entry per (application, platform) pair.
grouped = df.groupby(["platform", "application"])
Expand Down
4 changes: 2 additions & 2 deletions p3analysis/plot/_navchart.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2022-2023 Intel Corporation
# SPDX-License-Identifier: MIT

from p3analysis._utils import _require_columns, _require_numeric
from p3analysis._utils import _cast_to_numeric, _require_columns


def navchart(pp, cd, eff=None, size=None, goal=None, **kwargs):
Expand Down Expand Up @@ -80,7 +80,7 @@ def navchart(pp, cd, eff=None, size=None, goal=None, **kwargs):

_require_columns(pp, ["problem", "application"])
_require_columns(cd, ["problem", "application", "divergence"])
_require_numeric(cd, ["divergence"])
cd = _cast_to_numeric(cd, ["divergence"])

if len(cd["problem"].unique()) > 1:
raise NotImplementedError(
Expand Down
4 changes: 2 additions & 2 deletions p3analysis/plot/backend/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from matplotlib.path import Path

import p3analysis.metrics
from p3analysis._utils import _require_numeric
from p3analysis._utils import _cast_to_numeric
from p3analysis.plot._common import ApplicationStyle, Legend, PlatformStyle
from p3analysis.plot.backend import CascadePlot, NavChart

Expand Down Expand Up @@ -471,7 +471,7 @@ def __init__(
if pp_column not in pp:
msg = "DataFrame does not contain an '%s' column."
raise ValueError(msg % (pp_column))
_require_numeric(pp, [pp_column])
pp = _cast_to_numeric(pp, [pp_column])

# If the size is unset, default to 5 x 5
if not size:
Expand Down
4 changes: 2 additions & 2 deletions p3analysis/plot/backend/pgfplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pandas as pd

import p3analysis.metrics
from p3analysis._utils import _require_numeric
from p3analysis._utils import _cast_to_numeric
from p3analysis.plot._common import ApplicationStyle, Legend, PlatformStyle
from p3analysis.plot.backend import CascadePlot, NavChart

Expand Down Expand Up @@ -293,7 +293,7 @@ def __init__(
if pp_column not in pp:
msg = "DataFrame does not contain an '%s' column."
raise ValueError(msg % (pp_column))
_require_numeric(pp, [pp_column])
pp = _cast_to_numeric(pp, [pp_column])

# If the size is unset, default to 200pt x 200pt, otherwise set size
plotwidth = "200pt"
Expand Down
2 changes: 2 additions & 0 deletions tests/metrics/test_efficiency.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd

from p3analysis._utils import _cast_to_numeric
from p3analysis.metrics import application_efficiency


Expand Down Expand Up @@ -141,6 +142,7 @@ def test_non_numeric(self):
expected_data.update(eff_data)
expected_df = pd.DataFrame(expected_data)

expected_df = _cast_to_numeric(expected_df, ["fom", "app eff"])
pd.testing.assert_frame_equal(result, expected_df)


Expand Down

0 comments on commit 0293c2f

Please sign in to comment.