Skip to content

Commit

Permalink
Remove duplicate closure function
Browse files Browse the repository at this point in the history
  • Loading branch information
msorvoja committed Dec 19, 2024
1 parent f47b897 commit 0a18030
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 72 deletions.
11 changes: 6 additions & 5 deletions eis_toolkit/transformations/coda/alr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern


@beartype
Expand All @@ -24,7 +24,7 @@ def alr_transform(
columns: Optional[Sequence[str]] = None,
denominator_column: Optional[str] = None,
keep_denominator_column: bool = False,
closure_target: Optional[Number] = None,
scale: Optional[Number] = None,
) -> pd.DataFrame:
"""
Perform an additive logratio transformation on the data.
Expand All @@ -35,7 +35,8 @@ def alr_transform(
denominator_column: The name of the column to be used as the denominator column.
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
dataframe retains its original shape.
closure_target: Target row sum for closure. If None, no closure is performed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A new dataframe containing the ALR transformed data.
Expand Down Expand Up @@ -65,8 +66,8 @@ def alr_transform(
else:
columns_to_transform = df.columns.to_list()

if closure_target is not None:
df = perform_closure(df, columns_to_transform, closure_target)
if scale is not None:
df = _closure(df, scale)

check_in_simplex_sample_space(df)

Expand Down
11 changes: 6 additions & 5 deletions eis_toolkit/transformations/coda/clr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns, rename_columns_by_pattern
from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern


@beartype
Expand All @@ -29,15 +29,16 @@ def _clr_transform(df: pd.DataFrame) -> pd.DataFrame:
def clr_transform(
df: pd.DataFrame,
columns: Optional[Sequence[str]] = None,
closure_target: Optional[Number] = None,
scale: Optional[Number] = None,
) -> pd.DataFrame:
"""
Perform a centered logratio transformation on the data.
Args:
df: A dataframe of compositional data.
columns: The names of the columns to be transformed.
closure_target: Target row sum for closure. If None, no closure is performed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A new dataframe containing the CLR transformed data.
Expand All @@ -57,8 +58,8 @@ def clr_transform(
else:
columns_to_transform = df.columns.to_list()

if closure_target is not None:
df = perform_closure(df, columns_to_transform, closure_target)
if scale is not None:
df = _closure(df, scale)

check_in_simplex_sample_space(df)

Expand Down
13 changes: 6 additions & 7 deletions eis_toolkit/transformations/coda/ilr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from scipy.stats import gmean

from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.checks.dataframe import check_columns_valid
from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign
from eis_toolkit.utilities.miscellaneous import perform_closure


@beartype
Expand Down Expand Up @@ -70,7 +70,7 @@ def single_ilr_transform(
df: pd.DataFrame,
subcomposition_1: Sequence[str],
subcomposition_2: Sequence[str],
closure_target: Optional[Number] = None,
scale: Optional[Number] = None,
) -> pd.Series:
"""
Perform a single isometric logratio transformation on the provided subcompositions.
Expand All @@ -81,7 +81,8 @@ def single_ilr_transform(
df: A dataframe of shape [N, D] of compositional data.
subcomposition_1: Names of the columns in the numerator part of the ratio.
subcomposition_2: Names of the columns in the denominator part of the ratio.
closure_target: Target row sum for closure. If None, no closure is performed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A series of length N containing the transforms.
Expand All @@ -103,10 +104,8 @@ def single_ilr_transform(
if check_lists_overlap(subcomposition_1, subcomposition_2):
raise InvalidCompositionException("The subcompositions overlap.")

if closure_target is not None:
columns = subcomposition_1 + subcomposition_2
df = perform_closure(df, columns, closure_target)
df = df[columns]
if scale is not None:
df = _closure(df, scale)

check_in_simplex_sample_space(df)

Expand Down
31 changes: 16 additions & 15 deletions eis_toolkit/transformations/coda/plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from scipy.stats import gmean

from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.checks.parameter import check_numeric_value_sign
from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern


@beartype
Expand Down Expand Up @@ -53,7 +54,7 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri


@beartype
def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional[Number] = None) -> pd.Series:
def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] = None) -> pd.Series:
"""
Perform a pivot logratio transformation on the selected column.
Expand All @@ -65,7 +66,8 @@ def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional
Args:
df: A dataframe of shape [N, D] of compositional data.
column: The name of the numerator column to use for the transformation.
closure_target: Target row sum for closure. If None, no closure is performed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A series of length N containing the transforms.
Expand All @@ -84,17 +86,15 @@ def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional
if idx == len(df.columns) - 1:
raise InvalidColumnException("Can't select last column as numerator.")

if closure_target is not None:
# Perform closure on columns starting from numerator "to the right"
columns = df.columns[idx:].to_list()
df = perform_closure(df, columns, closure_target)
# Keep columns from idx to the right
df = df.iloc[:, idx:]

check_in_simplex_sample_space(df[columns])
if scale is not None:
df = _closure(df, scale)

else:
check_in_simplex_sample_space(df)
check_in_simplex_sample_space(df)

return _single_plr_transform_by_index(df, idx)
return _single_plr_transform_by_index(df, 0)


@beartype
Expand All @@ -112,15 +112,16 @@ def _plr_transform(df: pd.DataFrame) -> pd.DataFrame:

@beartype
def plr_transform(
df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Optional[Number] = None
df: pd.DataFrame, columns: Optional[Sequence[str]] = None, scale: Optional[Number] = None
) -> pd.DataFrame:
"""
Perform a pivot logratio transformation on the dataframe, returning the full set of transforms.
Args:
df: A dataframe of shape [N, D] of compositional data.
columns: The names of the columns to use for the transformation.
closure_target: Target row sum for closure. If None, no closure is performed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A dataframe of shape [N, D-1] containing the set of PLR transformed data.
Expand All @@ -137,8 +138,8 @@ def plr_transform(
raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.")
df = df[columns]

if closure_target is not None:
df = perform_closure(df, closure_target=1)
if scale is not None:
df = _closure(df, scale)

check_in_simplex_sample_space(df)

Expand Down
26 changes: 0 additions & 26 deletions eis_toolkit/utilities/miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,29 +375,3 @@ def toggle_gdal_exceptions():
finally:
if not already_has_exceptions_enabled:
gdal.DontUseExceptions()


@beartype
def perform_closure(
df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Number = 1
) -> pd.DataFrame:
"""
Peform closure on selected columns of a DataFrame.
Values in the specified columns of each row are scaled so that they sum to 'closure_target'.
Args:
df: Input DataFrame.
columns: Names of the columns on which to perform closure. If not provided, all columns are used.
closure_target: Row sum of the selected columns after performing closure. Defaults to 1.
Returns:
A DataFrame where on each row the values in the selected columns sum to the closure target.
"""
if columns is None:
columns = df.columns

row_sums = df[columns].sum(axis=1)
df[columns] = df[columns].div(row_sums, axis=0).mul(closure_target)

return df
8 changes: 4 additions & 4 deletions tests/transformations/coda/ilr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def test_calculate_scaling_factor():

def test_single_ilr_transform_with_single_composition():
"""Test the core functionality of a single ILR transform with a single row of data."""
arr = np.array([80, 15, 5])
arr = np.array([80, 15, 5]).astype(np.float64)
df = pd.DataFrame(arr[None], columns=["a", "b", "c"])

result = single_ilr_transform(df, ["a"], ["b"])
result = single_ilr_transform(df, ["a"], ["b"], scale=100)
assert result[0] == pytest.approx(1.18, abs=1e-2)

result = single_ilr_transform(df, ["a", "b"], ["c"])
Expand All @@ -27,10 +27,10 @@ def test_single_ilr_transform_with_single_composition():

def test_single_ilr_transform():
"""Test the core functionality of a single ILR transform."""
arr = np.array([[80, 15, 5], [75, 18, 7]])
arr = np.array([[80, 15, 5], [75, 18, 7]]).astype(dtype=np.float64)
df = pd.DataFrame(arr, columns=["a", "b", "c"])

result = single_ilr_transform(df, ["a"], ["b"])
result = single_ilr_transform(df, ["a"], ["b"], scale=100)
assert result[1] == pytest.approx(1.01, abs=1e-2)

result = single_ilr_transform(df, ["a", "b"], ["c"])
Expand Down
2 changes: 1 addition & 1 deletion tests/transformations/coda/plr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_single_plr_transform_with_single_composition():
result = _single_plr_transform_by_index(df, 0)
assert result[0] == pytest.approx(1.82, abs=1e-2)

result = single_plr_transform(df, "b", closure_target=100)
result = single_plr_transform(df, "b", scale=100)
assert result[0] == pytest.approx(0.78, abs=1e-2)

result = _single_plr_transform_by_index(df, 1)
Expand Down
9 changes: 0 additions & 9 deletions tests/utilities/miscellaneous_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from eis_toolkit.exceptions import InvalidColumnIndexException
from eis_toolkit.utilities.miscellaneous import (
perform_closure,
rename_columns,
rename_columns_by_pattern,
replace_values,
Expand Down Expand Up @@ -72,11 +71,3 @@ def test_rename_columns_with_too_few_columns():
target_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "col3": [5, 6]})
renamed_df = rename_columns(df, colnames=colnames)
pd.testing.assert_frame_equal(renamed_df, target_df)


def test_perform_closure():
"""Test that performing closure on a DataFrame works as expected."""
df = pd.DataFrame({"col1": [1, 2, 1], "col2": [4, 8, 1], "col3": [3, 1, 6]})
closured_df = perform_closure(df, columns=["col1", "col2"], closure_target=100)
expected_df = pd.DataFrame({"col1": [20.0, 20.0, 50.0], "col2": [80.0, 80.0, 50.0], "col3": [3, 1, 6]})
pd.testing.assert_frame_equal(closured_df, expected_df)

0 comments on commit 0a18030

Please sign in to comment.