From b8ae3de09de7be649c06bb2a91f0e97a03e0ce27 Mon Sep 17 00:00:00 2001 From: Juho Laitala Date: Fri, 13 Dec 2024 10:36:49 +0200 Subject: [PATCH] added closure checks also when geochemical data that is in ppm or ppb --- eis_toolkit/utilities/checks/compositional.py | 10 ++++++++-- tests/utilities/compositional_test.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/eis_toolkit/utilities/checks/compositional.py b/eis_toolkit/utilities/checks/compositional.py index 52e0173d..771f7940 100644 --- a/eis_toolkit/utilities/checks/compositional.py +++ b/eis_toolkit/utilities/checks/compositional.py @@ -36,8 +36,14 @@ def check_in_simplex_sample_space(df: pd.DataFrame, tolerance: Number = 0.0001) row_sums = df.sum(axis=1) closed_to_one = (row_sums - 1).abs() < tolerance closed_to_hundred = (row_sums - 100).abs() < tolerance + closed_to_million = (row_sums - 1e6).abs() < tolerance + closed_to_billion = (row_sums - 1e9).abs() < tolerance - if not closed_to_one.all() and not closed_to_hundred.all(): - raise InvalidCompositionException(f"Input data is not closed to 1 or 100 within tolerance of {tolerance}.") + is_valid = closed_to_one.all() or closed_to_hundred.all() or closed_to_million.all() or closed_to_billion.all() + + if not is_valid: + raise InvalidCompositionException( + f"Input data is not closed to 1, 100 (%), 10^6 (ppm) or 10^9 (ppb) within tolerance of {tolerance}." + ) return None diff --git a/tests/utilities/compositional_test.py b/tests/utilities/compositional_test.py index 1a49453a..d7613f7e 100644 --- a/tests/utilities/compositional_test.py +++ b/tests/utilities/compositional_test.py @@ -77,6 +77,9 @@ def test_compositional_data_invalid(): def test_check_for_simplex_sample_space(): """Test whether or not a dataframe belongs to a simplex sample space is correctly identified.""" unit_simplex_df = pd.DataFrame([[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.2, 0.3]]) + closed_to_hundred_df = unit_simplex_df * 100 + closed_to_million_df = unit_simplex_df * 1e6 + closed_to_billion_df = unit_simplex_df * 1e9 non_simplex_positive_df = pd.DataFrame([1, 2, 3, 4], [5, 6, 7, 8]) non_positive_df = pd.DataFrame([-1, 2, 3, 4], [1, 2, 3, 4]) @@ -91,3 +94,18 @@ def test_check_for_simplex_sample_space(): check_in_simplex_sample_space(unit_simplex_df) except Exception as ex: assert False, f"{type(ex)}: {ex}" + + try: + check_in_simplex_sample_space(closed_to_hundred_df) + except Exception as ex: + assert False, f"{type(ex)}: {ex}" + + try: + check_in_simplex_sample_space(closed_to_million_df) + except Exception as ex: + assert False, f"{type(ex)}: {ex}" + + try: + check_in_simplex_sample_space(closed_to_billion_df) + except Exception as ex: + assert False, f"{type(ex)}: {ex}"