Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

448 coda transformations data closure #457

Merged
merged 4 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 88 additions & 88 deletions eis_toolkit/transformations/coda/alr.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,88 @@
from numbers import Number
import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence
from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_compositional_data
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern
@beartype
def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame:
ratios = df[columns].div(df[denominator_column], axis=0)
return np.log(ratios)
@beartype
def alr_transform(
df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False
) -> pd.DataFrame:
"""
Perform an additive logratio transformation on the data.
Args:
df: A dataframe of compositional data.
column: The name of the column to be used as the denominator column.
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
dataframe retains its original shape.
Returns:
A new dataframe containing the ALR transformed data.
Raises:
InvalidColumnException: The input column isn't found in the dataframe.
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""
check_compositional_data(df)
if column is not None and column not in df.columns:
raise InvalidColumnException(f"The column {column} was not found in the dataframe.")
column = column if column is not None else df.columns[-1]
columns = [col for col in df.columns]
if not keep_denominator_column and column in columns:
columns.remove(column)
return rename_columns_by_pattern(_alr_transform(df, columns, column))
@beartype
def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
dfc = df.copy()
if denominator_column not in dfc.columns.values:
# Add the denominator column
dfc[denominator_column] = 0.0
return _closure(np.exp(dfc), scale)
@beartype
def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of ALR transformed data.
Args:
df: A dataframe of ALR transformed compositional data.
denominator_column: The name of the denominator column.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A dataframe containing the inverse transformed data.
Raises:
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")
return _inverse_alr(df, denominator_column, scale)
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence

from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern


@beartype
def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame:

ratios = df[columns].div(df[denominator_column], axis=0)
return np.log(ratios)


@beartype
def alr_transform(
df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False
) -> pd.DataFrame:
"""
Perform an additive logratio transformation on the data.

Args:
df: A dataframe of compositional data.
column: The name of the column to be used as the denominator column.
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
dataframe retains its original shape.

Returns:
A new dataframe containing the ALR transformed data.

Raises:
InvalidColumnException: The input column isn't found in the dataframe.
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""
check_in_simplex_sample_space(df)

if column is not None and column not in df.columns:
raise InvalidColumnException(f"The column {column} was not found in the dataframe.")

column = column if column is not None else df.columns[-1]

columns = [col for col in df.columns]

if not keep_denominator_column and column in columns:
columns.remove(column)

return rename_columns_by_pattern(_alr_transform(df, columns, column))


@beartype
def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
dfc = df.copy()

if denominator_column not in dfc.columns.values:
# Add the denominator column
dfc[denominator_column] = 0.0

return _closure(np.exp(dfc), scale)


@beartype
def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of ALR transformed data.

Args:
df: A dataframe of ALR transformed compositional data.
denominator_column: The name of the denominator column.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.

Returns:
A dataframe containing the inverse transformed data.

Raises:
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")

return _inverse_alr(df, denominator_column, scale)
158 changes: 79 additions & 79 deletions eis_toolkit/transformations/coda/clr.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,79 @@
from numbers import Number
import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence
from scipy.stats import gmean
from eis_toolkit.exceptions import NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_compositional_data
from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern
@beartype
def _centered_ratio(row: pd.Series) -> pd.Series:
return row / gmean(row)
@beartype
def _clr_transform(df: pd.DataFrame) -> pd.DataFrame:
dfc = df.copy()
dfc = dfc.apply(_centered_ratio, axis=1)
return np.log(dfc)
@beartype
def clr_transform(df: pd.DataFrame) -> pd.DataFrame:
"""
Perform a centered logratio transformation on the data.
Args:
df: A dataframe of compositional data.
Returns:
A new dataframe containing the CLR transformed data.
Raises:
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""
check_compositional_data(df)
return rename_columns_by_pattern(_clr_transform(df))
@beartype
def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
inverse = _closure(np.exp(df), scale)
if colnames is not None:
return rename_columns(inverse, colnames)
return inverse
@beartype
def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of CLR transformed data.
Args:
df: A dataframe of CLR transformed compositional data.
colnames: List of column names to rename the columns to.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.
Returns:
A dataframe containing the inverse transformed data.
Raises:
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")
return _inverse_clr(df, colnames, scale)
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence
from scipy.stats import gmean

from eis_toolkit.exceptions import NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern


@beartype
def _centered_ratio(row: pd.Series) -> pd.Series:

return row / gmean(row)


@beartype
def _clr_transform(df: pd.DataFrame) -> pd.DataFrame:

dfc = df.copy()
dfc = dfc.apply(_centered_ratio, axis=1)

return np.log(dfc)


@beartype
def clr_transform(df: pd.DataFrame) -> pd.DataFrame:
"""
Perform a centered logratio transformation on the data.

Args:
df: A dataframe of compositional data.

Returns:
A new dataframe containing the CLR transformed data.

Raises:
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""
check_in_simplex_sample_space(df)
return rename_columns_by_pattern(_clr_transform(df))


@beartype
def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
inverse = _closure(np.exp(df), scale)

if colnames is not None:
return rename_columns(inverse, colnames)

return inverse


@beartype
def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of CLR transformed data.

Args:
df: A dataframe of CLR transformed compositional data.
colnames: List of column names to rename the columns to.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.

Returns:
A dataframe containing the inverse transformed data.

Raises:
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")

return _inverse_clr(df, colnames, scale)
Loading
Loading