Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

447 coda transformations add selection of columns/attributes of the input data #459

Draft
wants to merge 32 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
12fe14e
Refactor(CLR): Add option to select columns
msorvoja Nov 13, 2024
e2651d5
Edit docstrings
msorvoja Nov 14, 2024
64d7bad
refactor(ALR): Add option to select columns
msorvoja Nov 14, 2024
467d201
refactor(PLR): Add option to select columns
msorvoja Nov 14, 2024
869ba24
Run pre-commit
msorvoja Nov 14, 2024
1e61ce9
Merge master into branch
msorvoja Nov 18, 2024
b0c5659
Fix(clr_transform_cli): make columns optional parameter
msorvoja Nov 18, 2024
f546cfd
feat(plr_transform_cli): add columns parameters
msorvoja Nov 18, 2024
8ce165f
fix(ALR): Perform check_in_simplex_sample_space after selecting columns
msorvoja Nov 18, 2024
da02d54
fix(CLR): Perform check_in_simplex_sample_space after selecting columns
msorvoja Nov 18, 2024
72deedb
fix(PLR): Perform check_in_simplex_space after selecting columns
msorvoja Nov 25, 2024
cdf056e
feat(closure): add function for performing closure on DataFrame
msorvoja Dec 17, 2024
dacd830
feat(PLR): add parameter for performing closure on the input DataFrame
msorvoja Dec 17, 2024
5f47ff2
fix(closure): improve docstring
msorvoja Dec 17, 2024
33f4d78
feat(ALR): add closure_target parameter
msorvoja Dec 17, 2024
3bcfedd
feat(CLR): add closure_target parameter
msorvoja Dec 17, 2024
8f1679d
feat(ILR): add closure_target parameter
msorvoja Dec 17, 2024
85064f2
Fix parameter type for closure_target
msorvoja Dec 18, 2024
011a7e3
Select subcomposition columns if closure is performed
msorvoja Dec 18, 2024
f47b897
Fix notebook
msorvoja Dec 18, 2024
0a18030
Remove duplicate closure function
msorvoja Dec 19, 2024
94991cc
Improve documentation
msorvoja Dec 19, 2024
d117ce4
Merge branch 'master' into 447-coda-transformations-add-selection-of-…
msorvoja Dec 19, 2024
ffa84e6
Fix notebook
msorvoja Dec 19, 2024
662e73e
Clean code
msorvoja Dec 19, 2024
6f4469a
Clean code
msorvoja Dec 19, 2024
a513418
Select columns before performing closure
msorvoja Dec 19, 2024
0000746
Fix test
msorvoja Dec 19, 2024
8e09947
Add checks, fix logic, add tests
msorvoja Dec 19, 2024
3d56378
Add scale parameter to CoDa CLI functions
msorvoja Dec 19, 2024
1bf85a9
Fix notebook
msorvoja Dec 19, 2024
0026856
Fix missing denominator column in ALR
msorvoja Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions eis_toolkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,8 +3076,10 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT
def alr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
column: str = None,
columns: Annotated[List[str], typer.Option()] = None,
denominator_column: str = None,
keep_denominator_column: bool = False,
scale: Optional[float] = None,
):
"""Perform an additive logratio transformation on the data."""
from eis_toolkit.transformations.coda.alr import alr_transform
Expand All @@ -3089,7 +3091,13 @@ def alr_transform_cli(
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_df = alr_transform(df=df, column=column, keep_denominator_column=keep_denominator_column)
out_df = alr_transform(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(This applies to the other transforms as well.)

From the perspective of the QGIS plugin, would it make sense that when given a subcomposition (ie. certain columns to use), the resulting df would be combined back with the other columns in the data? Or is that something that can be implemented on the plugin side easily?

I don't know much about the actual use cases, so I'm not sure whether the user will typically want to keep working with the CoDa data separately, but I would assume it's more convenient alongside the rest of the data.

df=df,
columns=columns,
denominator_column=denominator_column,
keep_denominator_column=keep_denominator_column,
scale=scale,
)
typer.echo("Progess 75%")

out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries)
Expand All @@ -3104,6 +3112,7 @@ def inverse_alr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
denominator_column: str = typer.Option(),
columns: Annotated[List[str], typer.Option()] = None,
scale: float = 1.0,
):
"""Perform the inverse transformation for a set of ALR transformed data."""
Expand All @@ -3116,7 +3125,7 @@ def inverse_alr_transform_cli(
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_df = inverse_alr(df=df, denominator_column=denominator_column, scale=scale)
out_df = inverse_alr(df=df, denominator_column=denominator_column, columns=columns, scale=scale)
typer.echo("Progess 75%")

out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries)
Expand All @@ -3127,7 +3136,12 @@ def inverse_alr_transform_cli(

# CODA - CLR TRANSFORM
@app.command()
def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION):
def clr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
columns: Annotated[List[str], typer.Option()] = None,
scale: Optional[float] = None,
):
"""Perform a centered logratio transformation on the data."""
from eis_toolkit.transformations.coda.clr import clr_transform

Expand All @@ -3138,7 +3152,7 @@ def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_df = clr_transform(df=df)
out_df = clr_transform(df=df, columns=columns, scale=scale)
typer.echo("Progess 75%")

out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries)
Expand All @@ -3152,6 +3166,7 @@ def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL
def inverse_clr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
columns: Annotated[List[str], typer.Option()] = None,
colnames: Annotated[List[str], typer.Option()] = None,
scale: float = 1.0,
):
Expand All @@ -3165,7 +3180,7 @@ def inverse_clr_transform_cli(
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_df = inverse_clr(df=df, colnames=colnames, scale=scale)
out_df = inverse_clr(df=df, columns=columns, colnames=colnames, scale=scale)
typer.echo("Progess 75%")

out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries)
Expand All @@ -3181,6 +3196,7 @@ def single_ilr_transform_cli(
output_vector: OUTPUT_FILE_OPTION,
subcomposition_1: Annotated[List[str], typer.Option()],
subcomposition_2: Annotated[List[str], typer.Option()],
scale: Optional[float] = None,
):
"""Perform a single isometric logratio transformation on the provided subcompositions."""
from eis_toolkit.transformations.coda.ilr import single_ilr_transform
Expand All @@ -3192,7 +3208,9 @@ def single_ilr_transform_cli(
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_series = single_ilr_transform(df=df, subcomposition_1=subcomposition_1, subcomposition_2=subcomposition_2)
out_series = single_ilr_transform(
df=df, subcomposition_1=subcomposition_1, subcomposition_2=subcomposition_2, scale=scale
)
typer.echo("Progess 75%")

# NOTE: Output of pairwise_logratio might be changed to DF in the future, to automatically do the following
Expand Down Expand Up @@ -3237,7 +3255,9 @@ def pairwise_logratio_cli(
def single_plr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
column: str = typer.Option(),
numerator: str = typer.Option(),
denominator_columns: Annotated[List[str], typer.Option()] = None,
scale: Optional[float] = None,
):
"""Perform a pivot logratio transformation on the selected column."""
from eis_toolkit.transformations.coda.plr import single_plr_transform
Expand All @@ -3249,7 +3269,7 @@ def single_plr_transform_cli(
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_series = single_plr_transform(df=df, column=column)
out_series = single_plr_transform(df=df, numerator=numerator, denominator_columns=denominator_columns, scale=scale)
typer.echo("Progess 75%")

# NOTE: Output of single_plr_transform might be changed to DF in the future, to automatically do the following
Expand All @@ -3262,8 +3282,13 @@ def single_plr_transform_cli(

# CODA - PLR TRANSFORM
@app.command()
def plr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION):
"""Perform a pivot logratio transformation on the dataframe, returning the full set of transforms."""
def plr_transform_cli(
input_vector: INPUT_FILE_OPTION,
output_vector: OUTPUT_FILE_OPTION,
columns: Annotated[List[str], typer.Option()] = None,
scale: Optional[float] = None,
):
"""Perform a pivot logratio transformation on the selected columns."""
from eis_toolkit.transformations.coda.plr import plr_transform

typer.echo("Progress: 10%")
Expand All @@ -3273,7 +3298,7 @@ def plr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL
df = pd.DataFrame(gdf.drop(columns="geometry"))
typer.echo("Progress: 25%")

out_df = plr_transform(df=df)
out_df = plr_transform(df=df, columns=columns, scale=scale)
typer.echo("Progess 75%")

out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries)
Expand Down
213 changes: 125 additions & 88 deletions eis_toolkit/transformations/coda/alr.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,125 @@
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence

from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern


@beartype
def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame:

ratios = df[columns].div(df[denominator_column], axis=0)
return np.log(ratios)


@beartype
def alr_transform(
df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False
) -> pd.DataFrame:
"""
Perform an additive logratio transformation on the data.

Args:
df: A dataframe of compositional data.
column: The name of the column to be used as the denominator column.
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
dataframe retains its original shape.

Returns:
A new dataframe containing the ALR transformed data.

Raises:
InvalidColumnException: The input column isn't found in the dataframe.
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""
check_in_simplex_sample_space(df)

if column is not None and column not in df.columns:
raise InvalidColumnException(f"The column {column} was not found in the dataframe.")

column = column if column is not None else df.columns[-1]

columns = [col for col in df.columns]

if not keep_denominator_column and column in columns:
columns.remove(column)

return rename_columns_by_pattern(_alr_transform(df, columns, column))


@beartype
def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
dfc = df.copy()

if denominator_column not in dfc.columns.values:
# Add the denominator column
dfc[denominator_column] = 0.0

return _closure(np.exp(dfc), scale)


@beartype
def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of ALR transformed data.

Args:
df: A dataframe of ALR transformed compositional data.
denominator_column: The name of the denominator column.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.

Returns:
A dataframe containing the inverse transformed data.

Raises:
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")

return _inverse_alr(df, denominator_column, scale)
from numbers import Number

import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Optional, Sequence

from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
from eis_toolkit.utilities.aitchison_geometry import _closure
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern


@beartype
def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame:

ratios = df[columns].div(df[denominator_column], axis=0)
return np.log(ratios)


@beartype
def alr_transform(
df: pd.DataFrame,
columns: Optional[Sequence[str]] = None,
denominator_column: Optional[str] = None,
keep_denominator_column: bool = False,
scale: Optional[Number] = None,
) -> pd.DataFrame:
"""
Perform an additive logratio transformation on the data.

Args:
df: A dataframe of compositional data.
columns: The names of the columns to be transformed.
denominator_column: The name of the column to be used as the denominator column.
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
dataframe retains its original shape.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100. Closure is not performed by default.

Returns:
A new dataframe containing the ALR transformed data.

Raises:
InvalidColumnException: The input column isn't found in the dataframe.
InvalidCompositionException: Data is not normalized to the expected value.
NumericValueSignException: Data contains zeros or negative values.
"""

if denominator_column is not None and denominator_column not in df.columns:
raise InvalidColumnException(f"The column {denominator_column} was not found in the dataframe.")

if denominator_column is not None and keep_denominator_column and columns and denominator_column not in columns:
raise InvalidColumnException(
f"Denominator column '{denominator_column}' must be in selected columns if keep_denominator_column is True."
)

denominator_column = denominator_column if denominator_column is not None else df.columns[-1]

if columns:
invalid_columns = [col for col in columns if col not in df.columns]
if invalid_columns:
raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.")
columns_to_transform = columns

if denominator_column not in columns_to_transform:
df = df[columns_to_transform + [denominator_column]]
else:
df = df[columns_to_transform]

else:
columns_to_transform = df.columns.to_list()

if scale is not None:
df = _closure(df, scale)

check_in_simplex_sample_space(df)

if not keep_denominator_column and denominator_column in columns_to_transform:
columns_to_transform.remove(denominator_column)

return rename_columns_by_pattern(_alr_transform(df, columns_to_transform, denominator_column))


@beartype
def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
dfc = df.copy()
if denominator_column not in dfc.columns.values:
# Add the denominator column
dfc[denominator_column] = 0.0

return _closure(np.exp(dfc), scale)


@beartype
def inverse_alr(
df: pd.DataFrame, denominator_column: str, columns: Optional[Sequence[str]] = None, scale: Number = 1.0
) -> pd.DataFrame:
"""
Perform the inverse transformation for a set of ALR transformed data.

Args:
df: A dataframe of ALR transformed compositional data.
denominator_column: The name of the denominator column.
columns: The names of the columns to be transformed.
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
as percentages, scale=100.

Returns:
A dataframe containing the inverse transformed data.

Raises:
InvalidColumnException: The input column(s) not found in the dataframe.
NumericValueSignException: The input scale value is zero or less.
"""
if scale <= 0:
raise NumericValueSignException("The scale value should be positive.")

if columns:
invalid_columns = [col for col in columns if col not in df.columns]
if invalid_columns:
raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.")
df = df[columns]

return _inverse_alr(df, denominator_column, scale)
Loading
Loading