Skip to content

Commit

Permalink
reverted back to include checks for simplex space. Changed closure ch…
Browse files Browse the repository at this point in the history
…eck to closure of 1 or 100
  • Loading branch information
jtlait authored and nmaarnio committed Nov 15, 2024
1 parent 3722316 commit 3f80048
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 44 deletions.
25 changes: 14 additions & 11 deletions eis_toolkit/utilities/checks/compositional.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
import numpy as np
from numbers import Number

import pandas as pd
from beartype import beartype
from beartype.typing import Optional

from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException
from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_only_positive_numbers


@beartype
def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.float64] = None) -> None:
def check_in_simplex_sample_space(df: pd.DataFrame, tolerance: Number = 0.0001) -> None:
"""
Check that the compositions represented by the data rows belong to a simplex sample space.
Checks that data has not NaN values.
Checks that each compositional data point belongs to the set of positive real numbers.
Checks that each composition is normalized to the same value.
Checks that input dataframe is closed to either 1 or 100.
Args:
df: The dataframe to check.
expected_sum: The expected sum of each row. If None, simply checks that the sum of each row is equal.
tolerance: Small tolerance value to allow floating-point imprecision.
Returns:
True if values are valid and the sum of each row is the expected_sum.
None.
Raises:
InvalidCompositionException: Data is not normalized to the expected value.
InvalidCompositionException: Data is not within the expected simplex sample space.
NumericValueSignException: Data contains zeros or negative values.
"""
if df.isnull().values.any():
Expand All @@ -32,9 +33,11 @@ def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.fl
if not check_dataframe_contains_only_positive_numbers(df):
raise NumericValueSignException("Data contains zeros or negative values.")

df_sum = np.sum(df, axis=1)
expected_sum = expected_sum if expected_sum is not None else df_sum.iloc[0]
if len(df_sum[df_sum.iloc[:] != expected_sum]) != 0:
raise InvalidCompositionException("Not each composition is normalized to the same value.")
row_sums = df.sum(axis=1)
closed_to_one = (row_sums - 1).abs() < tolerance
closed_to_hundred = (row_sums - 100).abs() < tolerance

if not closed_to_one.all() and not closed_to_hundred.all():
raise InvalidCompositionException(f"Input data is not closed to 1 or 100 within tolerance of {tolerance}.")

return None
21 changes: 5 additions & 16 deletions tests/transformations/coda/alr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,23 @@
SAMPLE_DATAFRAME = pd.DataFrame(sample_array, columns=["a", "b", "c", "d"])


def test_alr_transform_simple():
"""Test ALR transformation core functionality."""
ones_df_4x4 = pd.DataFrame(np.ones((4, 4)), columns=["a", "b", "c", "d"])
zeros_df_4x4 = pd.DataFrame(np.zeros((4, 3)), columns=["V1", "V2", "V3"])
result = alr_transform(ones_df_4x4)
pd.testing.assert_frame_equal(result, zeros_df_4x4)


def test_alr_transform():
"""Test ALR transformation core functionality."""
arr = np.array([[1, 4, 1, 1], [2, 1, 2, 2]])
arr = np.random.dirichlet(np.ones(4), size=4)
df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64)

result = alr_transform(df, column="b", keep_denominator_column=True)
expected = pd.DataFrame(
{
"V1": [np.log(0.25), np.log(2)],
"V2": [0, 0],
"V3": [np.log(0.25), np.log(2)],
"V4": [np.log(0.25), np.log(2)],
},
np.log(arr / arr[:, 1, None]),
columns=["V1", "V2", "V3", "V4"],
dtype=np.float64,
)
pd.testing.assert_frame_equal(result, expected)

result = alr_transform(df, column="b")
expected = pd.DataFrame(
{"V1": [np.log(0.25), np.log(2)], "V2": [np.log(0.25), np.log(2)], "V3": [np.log(0.25), np.log(2)]},
np.log(np.delete(arr, 1, axis=1) / arr[:, 1, None]),
columns=["V1", "V2", "V3"],
dtype=np.float64,
)
pd.testing.assert_frame_equal(result, expected)
Expand Down
17 changes: 7 additions & 10 deletions tests/transformations/coda/clr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,16 @@
SAMPLE_DATAFRAME = pd.DataFrame(sample_array, columns=["a", "b", "c", "d"])


def test_clr_transform_simple():
"""Test CLR transform core functionality."""
ones_df_4x4 = pd.DataFrame(np.ones((4, 4)), columns=["a", "b", "c", "d"])
zeros_df_4x4 = pd.DataFrame(np.zeros((4, 4)), columns=["V1", "V2", "V3", "V4"])
result = clr_transform(ones_df_4x4)
pd.testing.assert_frame_equal(result, zeros_df_4x4)


def test_clr_transform():
"""Test CLR transform core functionality."""
result = clr_transform(SAMPLE_DATAFRAME)
arr = np.random.dirichlet(np.ones(4), size=4)
df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64)
result = clr_transform(df)
geometric_means = np.prod(arr, axis=1) ** (1 / arr.shape[1])
expected = pd.DataFrame(
{"V1": [1.38, 1.29], "V2": [-0.30, -0.08], "V3": [0.10, -0.15], "V4": [-1.18, -1.06]}, dtype=np.float64
np.log(arr / geometric_means[:, None]),
columns=["V1", "V2", "V3", "V4"],
dtype=np.float64,
)
pd.testing.assert_frame_equal(result, expected, atol=1e-2)

Expand Down
8 changes: 1 addition & 7 deletions tests/utilities/compositional_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def test_compositional_data_invalid():
def test_check_for_simplex_sample_space():
"""Test whether or not a dataframe belongs to a simplex sample space is correctly identified."""
unit_simplex_df = pd.DataFrame([[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.2, 0.3]])
simplex_df = pd.DataFrame([[1, 2, 3, 4], [2, 3, 2, 3]], columns=["a", "b", "c", "d"])
non_simplex_positive_df = pd.DataFrame([1, 2, 3, 4], [5, 6, 7, 8])
non_positive_df = pd.DataFrame([-1, 2, 3, 4], [1, 2, 3, 4])

Expand All @@ -87,13 +86,8 @@ def test_check_for_simplex_sample_space():
with pytest.raises(NumericValueSignException):
check_in_simplex_sample_space(non_positive_df)

with pytest.raises(InvalidCompositionException):
check_in_simplex_sample_space(simplex_df, np.float64(100))

# Valid cases - assert no exception is raised
try:
check_in_simplex_sample_space(simplex_df)
check_in_simplex_sample_space(simplex_df, np.float64(10))
check_in_simplex_sample_space(unit_simplex_df, np.float64(1.0))
check_in_simplex_sample_space(unit_simplex_df)
except Exception as ex:
assert False, f"{type(ex)}: {ex}"

0 comments on commit 3f80048

Please sign in to comment.