From 12fe14efc36e14869a58a28f1ee8d658d92b258a Mon Sep 17 00:00:00 2001 From: msorvoja Date: Wed, 13 Nov 2024 12:07:50 +0200 Subject: [PATCH 01/30] Refactor(CLR): Add option to select columns --- eis_toolkit/cli.py | 11 +++-- eis_toolkit/transformations/coda/alr.py | 10 ++--- eis_toolkit/transformations/coda/clr.py | 57 +++++++++++++++++------- tests/data/remote/test.gpkg | Bin 106496 -> 106496 bytes tests/transformations/coda/clr_test.py | 19 +++++++- 5 files changed, 72 insertions(+), 25 deletions(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index fc0d86a5..37cb1b06 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3086,7 +3086,11 @@ def inverse_alr_transform_cli( # CODA - CLR TRANSFORM @app.command() -def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION): +def clr_transform_cli( + input_vector: INPUT_FILE_OPTION, + output_vector: OUTPUT_FILE_OPTION, + columns: Annotated[List[str], typer.Option()] = None, +): """Perform a centered logratio transformation on the data.""" from eis_toolkit.transformations.coda.clr import clr_transform @@ -3097,7 +3101,7 @@ def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = clr_transform(df=df) + out_df = clr_transform(df=df, columns=columns) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) @@ -3111,6 +3115,7 @@ def clr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL def inverse_clr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, + columns: Annotated[List[str], typer.Option()] = None, colnames: Annotated[List[str], typer.Option()] = None, scale: float = 1.0, ): @@ -3124,7 +3129,7 @@ def inverse_clr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = inverse_clr(df=df, colnames=colnames, scale=scale) + out_df = inverse_clr(df=df, columns=columns, colnames=colnames, scale=scale) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index d9b5af9b..a0e8d848 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -44,14 +44,14 @@ def alr_transform( if column is not None and column not in df.columns: raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - column = column if column is not None else df.columns[-1] + denominator_column = column if column is not None else df.columns[-1] - columns = [col for col in df.columns] + columns_to_transform = [col for col in df.columns] - if not keep_denominator_column and column in columns: - columns.remove(column) + if not keep_denominator_column and denominator_column in columns_to_transform: + columns_to_transform.remove(denominator_column) - return rename_columns_by_pattern(_alr_transform(df, columns, column)) + return rename_columns_by_pattern(_alr_transform(df, columns_to_transform, denominator_column)) @beartype diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index d5a82215..514da864 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -6,7 +6,7 @@ from beartype.typing import Optional, Sequence from scipy.stats import gmean -from eis_toolkit.exceptions import NumericValueSignException +from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_compositional_data from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern @@ -14,55 +14,68 @@ @beartype def _centered_ratio(row: pd.Series) -> pd.Series: - return row / gmean(row) @beartype -def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: - - dfc = df.copy() +def _clr_transform(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame: + dfc = df[columns].copy() dfc = dfc.apply(_centered_ratio, axis=1) return np.log(dfc) @beartype -def clr_transform(df: pd.DataFrame) -> pd.DataFrame: +def clr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> pd.DataFrame: """ Perform a centered logratio transformation on the data. Args: df: A dataframe of compositional data. + columns: The names of the columns to be transformed. Returns: A new dataframe containing the CLR transformed data. Raises: + InvalidColumnException: The input column(s) not found in the dataframe. InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ check_compositional_data(df) - return rename_columns_by_pattern(_clr_transform(df)) + if columns: + invalid_columns = [col for col in columns if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + columns_to_transform = columns + else: + columns_to_transform = df.columns.to_list() + + return rename_columns_by_pattern(_clr_transform(df, columns_to_transform)) -@beartype -def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: - inverse = _closure(np.exp(df), scale) - if colnames is not None: - return rename_columns(inverse, colnames) +@beartype +def _inverse_clr(df: pd.DataFrame, columns: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: + if columns: + df = df[columns] - return inverse + return _closure(np.exp(df), scale) @beartype -def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: +def inverse_clr( + df: pd.DataFrame, + columns: Optional[Sequence[str]] = None, + colnames: Optional[Sequence[str]] = None, + scale: Number = 1.0, +) -> pd.DataFrame: """ Perform the inverse transformation for a set of CLR transformed data. Args: df: A dataframe of CLR transformed compositional data. + columns: The names of the columns to be transformed. colnames: List of column names to rename the columns to. scale: The value to which each composition should be normalized. Eg., if the composition is expressed as percentages, scale=100. @@ -71,9 +84,23 @@ def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scal A dataframe containing the inverse transformed data. Raises: + InvalidColumnException: The input column(s) not found in the dataframe. NumericValueSignException: The input scale value is zero or less. """ if scale <= 0: raise NumericValueSignException("The scale value should be positive.") - return _inverse_clr(df, colnames, scale) + if columns: + invalid_columns = [col for col in columns if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + columns_to_transform = columns + else: + columns_to_transform = df.columns.to_list() + + inverse_data = _inverse_clr(df, columns_to_transform, scale) + + if colnames: + return rename_columns(inverse_data, colnames) + + return inverse_data diff --git a/tests/data/remote/test.gpkg b/tests/data/remote/test.gpkg index 6dd130f462e5eabb5cd23ea5234c38cf6f1b1064..b0b2ca51a22a06d059b6746fcc6ca1a5a60d0c81 100644 GIT binary patch delta 26 icmZoTz}9epZGtqTz(g5mRsjaRaIeOcttpIi<^uq3rwCU7 delta 26 icmZoTz}9epZGto--$WT_Rz3#3c$vnOttpIi<^uq2MF=eb diff --git a/tests/transformations/coda/clr_test.py b/tests/transformations/coda/clr_test.py index 41459035..0507f470 100644 --- a/tests/transformations/coda/clr_test.py +++ b/tests/transformations/coda/clr_test.py @@ -26,21 +26,36 @@ def test_clr_transform(): pd.testing.assert_frame_equal(result, expected, atol=1e-2) +def test_clr_transform_with_columns(): + """Test CLR transform with columns.""" + result = clr_transform(SAMPLE_DATAFRAME, columns=["a", "b", "d"]) + expected = pd.DataFrame({"V1": [1.42, 1.24], "V2": [-0.27, -0.13], "V3": [-1.15, -1.11]}, dtype=np.float64) + pd.testing.assert_frame_equal(result, expected, atol=1e-2) + + def test_inverse_clr_simple(): """Test CLR inverse core functionality.""" zeros_df_4x4 = pd.DataFrame(np.zeros((4, 4)), columns=["V1", "V2", "V3", "V4"]) ones_df_4x4 = pd.DataFrame(np.ones((4, 4)), columns=["a", "b", "c", "d"]) - result = inverse_clr(zeros_df_4x4, ["a", "b", "c", "d"], 4) + result = inverse_clr(zeros_df_4x4, colnames=["a", "b", "c", "d"], scale=4) pd.testing.assert_frame_equal(result, ones_df_4x4) def test_inverse_clr(): """Test CLR inverse core functionality.""" clr = clr_transform(SAMPLE_DATAFRAME) - result = inverse_clr(clr, ["a", "b", "c", "d"], 100) + result = inverse_clr(clr, colnames=["a", "b", "c", "d"], scale=100) pd.testing.assert_frame_equal(result, SAMPLE_DATAFRAME) +def test_inverse_clr_with_columns(): + """Test CLR inverse with columns.""" + clr = clr_transform(SAMPLE_DATAFRAME) + result = inverse_clr(clr, columns=["V1", "V2"], colnames=["a", "b"], scale=100) + expected = pd.DataFrame({"a": [84.42, 79.75], "b": [15.58, 20.25]}) + pd.testing.assert_frame_equal(result, expected, atol=1e-2) + + def test_inverse_clr_with_invalid_scale_value(): """Test that inverse CLR with an invalid input scale raises the correct exception.""" clr = clr_transform(SAMPLE_DATAFRAME) From e2651d59f095963f9059690579f2440940e4c8f3 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 14 Nov 2024 07:49:33 +0200 Subject: [PATCH 02/30] Edit docstrings --- tests/transformations/coda/clr_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/transformations/coda/clr_test.py b/tests/transformations/coda/clr_test.py index 0507f470..0bfa18c5 100644 --- a/tests/transformations/coda/clr_test.py +++ b/tests/transformations/coda/clr_test.py @@ -27,7 +27,7 @@ def test_clr_transform(): def test_clr_transform_with_columns(): - """Test CLR transform with columns.""" + """Test CLR transform with column selection.""" result = clr_transform(SAMPLE_DATAFRAME, columns=["a", "b", "d"]) expected = pd.DataFrame({"V1": [1.42, 1.24], "V2": [-0.27, -0.13], "V3": [-1.15, -1.11]}, dtype=np.float64) pd.testing.assert_frame_equal(result, expected, atol=1e-2) @@ -49,7 +49,7 @@ def test_inverse_clr(): def test_inverse_clr_with_columns(): - """Test CLR inverse with columns.""" + """Test CLR inverse with column selection.""" clr = clr_transform(SAMPLE_DATAFRAME) result = inverse_clr(clr, columns=["V1", "V2"], colnames=["a", "b"], scale=100) expected = pd.DataFrame({"a": [84.42, 79.75], "b": [15.58, 20.25]}) From 64d7badf2604537d88d97f0bc9e043fffffe7bbe Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 14 Nov 2024 09:39:13 +0200 Subject: [PATCH 03/30] refactor(ALR): Add option to select columns --- eis_toolkit/cli.py | 12 ++++-- eis_toolkit/transformations/coda/alr.py | 50 ++++++++++++++++++++----- tests/transformations/coda/alr_test.py | 47 +++++++++++++++++++---- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index 37cb1b06..781f1e7b 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3035,7 +3035,8 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT def alr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, - column: str = None, + columns: Annotated[List[str], typer.Option()] = None, + denominator_column: str = None, keep_denominator_column: bool = False, ): """Perform an additive logratio transformation on the data.""" @@ -3048,7 +3049,9 @@ def alr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = alr_transform(df=df, column=column, keep_denominator_column=keep_denominator_column) + out_df = alr_transform( + df=df, columns=columns, denominator_column=denominator_column, keep_denominator_column=keep_denominator_column + ) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) @@ -3063,6 +3066,7 @@ def inverse_alr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, denominator_column: str = typer.Option(), + columns: Annotated[List[str], typer.Option()] = None, scale: float = 1.0, ): """Perform the inverse transformation for a set of ALR transformed data.""" @@ -3075,7 +3079,7 @@ def inverse_alr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = inverse_alr(df=df, denominator_column=denominator_column, scale=scale) + out_df = inverse_alr(df=df, denominator_column=denominator_column, columns=columns, scale=scale) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) @@ -3089,7 +3093,7 @@ def inverse_alr_transform_cli( def clr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, - columns: Annotated[List[str], typer.Option()] = None, + columns: Annotated[List[str], typer.Option()], ): """Perform a centered logratio transformation on the data.""" from eis_toolkit.transformations.coda.clr import clr_transform diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index a0e8d848..23d91957 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from beartype import beartype -from beartype.typing import Optional, Sequence +from beartype.typing import List, Optional, Sequence from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure @@ -20,14 +20,18 @@ def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: @beartype def alr_transform( - df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False + df: pd.DataFrame, + columns: Optional[Sequence[str]] = None, + denominator_column: Optional[str] = None, + keep_denominator_column: bool = False, ) -> pd.DataFrame: """ Perform an additive logratio transformation on the data. Args: df: A dataframe of compositional data. - column: The name of the column to be used as the denominator column. + columns: The names of the columns to be transformed. + denominator_column: The name of the column to be used as the denominator column. keep_denominator_column: Whether to include the denominator column in the result. If True, the returned dataframe retains its original shape. @@ -41,12 +45,25 @@ def alr_transform( """ check_compositional_data(df) - if column is not None and column not in df.columns: - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") + print(columns) - denominator_column = column if column is not None else df.columns[-1] + if denominator_column is not None and denominator_column not in df.columns: + raise InvalidColumnException(f"The column {denominator_column} was not found in the dataframe.") - columns_to_transform = [col for col in df.columns] + if denominator_column is not None and keep_denominator_column and columns and denominator_column not in columns: + raise InvalidColumnException( + f"Denominator column '{denominator_column}' must be in selected columns if keep_denominator_column is True." + ) + + denominator_column = denominator_column if denominator_column is not None else df.columns[-1] + + if columns: + invalid_columns = [col for col in columns if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + columns_to_transform = columns + else: + columns_to_transform = df.columns.to_list() if not keep_denominator_column and denominator_column in columns_to_transform: columns_to_transform.remove(denominator_column) @@ -55,8 +72,9 @@ def alr_transform( @beartype -def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: +def _inverse_alr(df: pd.DataFrame, columns: List[str], denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: dfc = df.copy() + dfc = dfc[columns] if denominator_column not in dfc.columns.values: # Add the denominator column @@ -66,13 +84,16 @@ def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) @beartype -def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: +def inverse_alr( + df: pd.DataFrame, denominator_column: str, columns: Optional[Sequence[str]] = None, scale: Number = 1.0 +) -> pd.DataFrame: """ Perform the inverse transformation for a set of ALR transformed data. Args: df: A dataframe of ALR transformed compositional data. denominator_column: The name of the denominator column. + columns: The names of the columns to be transformed. scale: The value to which each composition should be normalized. Eg., if the composition is expressed as percentages, scale=100. @@ -80,9 +101,18 @@ def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) A dataframe containing the inverse transformed data. Raises: + InvalidColumnException: The input column(s) not found in the dataframe. NumericValueSignException: The input scale value is zero or less. """ if scale <= 0: raise NumericValueSignException("The scale value should be positive.") - return _inverse_alr(df, denominator_column, scale) + if columns: + invalid_columns = [col for col in columns if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + columns_to_transform = columns + else: + columns_to_transform = df.columns.to_list() + + return _inverse_alr(df, columns_to_transform, denominator_column, scale) diff --git a/tests/transformations/coda/alr_test.py b/tests/transformations/coda/alr_test.py index 90049350..b7f8e983 100644 --- a/tests/transformations/coda/alr_test.py +++ b/tests/transformations/coda/alr_test.py @@ -22,7 +22,7 @@ def test_alr_transform(): arr = np.array([[1, 4, 1, 1], [2, 1, 2, 2]]) df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64) - result = alr_transform(df, column="b", keep_denominator_column=True) + result = alr_transform(df, denominator_column="b", keep_denominator_column=True) expected = pd.DataFrame( { "V1": [np.log(0.25), np.log(2)], @@ -34,7 +34,7 @@ def test_alr_transform(): ) pd.testing.assert_frame_equal(result, expected) - result = alr_transform(df, column="b") + result = alr_transform(df, denominator_column="b") expected = pd.DataFrame( {"V1": [np.log(0.25), np.log(2)], "V2": [np.log(0.25), np.log(2)], "V3": [np.log(0.25), np.log(2)]}, dtype=np.float64, @@ -42,12 +42,33 @@ def test_alr_transform(): pd.testing.assert_frame_equal(result, expected) -def test_alr_transform_with_invalid_column(): - """Test that providing a column doesn't exist raises the correct exception.""" +def test_alr_transform_with_columns(): + """Test ALR transform with column selection.""" + alr = alr_transform(SAMPLE_DATAFRAME, columns=["a", "c", "d"], denominator_column="c", keep_denominator_column=True) + + expected = pd.DataFrame( + { + "V1": [np.log(65 / 18), np.log(63 / 15)], + "V2": [np.log(18 / 18), np.log(15 / 15)], + "V3": [np.log(5 / 18), np.log(6 / 15)], + }, + dtype=np.float64, + ) + pd.testing.assert_frame_equal(alr, expected) + + +def test_alr_transform_with_invalid_denominator_column(): + """Test that providing a denominator column doesn't exist raises the correct exception.""" with pytest.raises(InvalidColumnException): alr_transform(SAMPLE_DATAFRAME, "e") +def test_alr_transform_with_invalid_columns(): + """Test that providing invalid columns raises the correct exception.""" + with pytest.raises(InvalidColumnException): + alr_transform(SAMPLE_DATAFRAME, columns=["x", "y", "z"]) + + def test_alr_transform_denominator_column(): """ Test ALR transformation with the keep_denominator_column option set to True. @@ -66,7 +87,7 @@ def test_inverse_alr(): arr = np.array([[np.log(0.25), np.log(0.25), np.log(0.25)], [np.log(2), np.log(2), np.log(2)]]) df = pd.DataFrame(arr, columns=["V1", "V2", "V3"], dtype=np.float64) column_name = "d" - result = inverse_alr(df, column_name, 7) + result = inverse_alr(df, denominator_column=column_name, scale=7) expected_arr = np.array([[1, 1, 1, 4], [2, 2, 2, 1]]) expected = pd.DataFrame(expected_arr, columns=["V1", "V2", "V3", "d"], dtype=np.float64) pd.testing.assert_frame_equal(result, expected, atol=1e-2) @@ -80,7 +101,7 @@ def test_inverse_alr_with_existing_denominator_column(): expected_arr = np.array([[1, 1, 4, 1], [2, 2, 1, 2]]) expected = pd.DataFrame(expected_arr, columns=["V1", "V2", "d", "V3"], dtype=np.float64) - result = inverse_alr(df, column_name, 7) + result = inverse_alr(df, denominator_column=column_name, scale=7) pd.testing.assert_frame_equal(result, expected, atol=1e-2) @@ -89,6 +110,16 @@ def test_inverse_alr_with_invalid_scale_value(): arr = np.array([[np.log(0.25), np.log(0.25), np.log(0.25)], [np.log(2), np.log(2), np.log(2)]]) df = pd.DataFrame(arr, columns=["V1", "V2", "V3"], dtype=np.float64) with pytest.raises(NumericValueSignException): - inverse_alr(df, "d", 0) + inverse_alr(df, denominator_column="d", scale=0) with pytest.raises(NumericValueSignException): - inverse_alr(df, "d", -7) + inverse_alr(df, denominator_column="d", scale=-7) + + +def test_inverse_alr_with_invalid_columns(): + """Test that providing invalid columns raises the correct exception.""" + arr = np.array([[np.log(0.25), np.log(0.25), np.log(0.25)], [np.log(2), np.log(2), np.log(2)]]) + df = pd.DataFrame(arr, columns=["V1", "V2", "V3"], dtype=np.float64) + with pytest.raises(InvalidColumnException): + inverse_alr(df, columns=["a"], denominator_column="V1") + with pytest.raises(InvalidColumnException): + inverse_alr(df, columns=["a", "b", "c"], denominator_column="V1") From 467d20160f2f94d7b9f49bc56293ecc98eda99ff Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 14 Nov 2024 14:43:41 +0200 Subject: [PATCH 04/30] refactor(PLR): Add option to select columns --- eis_toolkit/cli.py | 5 +-- eis_toolkit/transformations/coda/plr.py | 46 +++++++++++++++++++------ tests/transformations/coda/plr_test.py | 36 ++++++++++++++++--- 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index 781f1e7b..ef69a3e2 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3205,7 +3205,8 @@ def pairwise_logratio_cli( def single_plr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, - column: str = typer.Option(), + numerator: str = typer.Option(), + denominators: Annotated[List[str], typer.Option()] = None ): """Perform a pivot logratio transformation on the selected column.""" from eis_toolkit.transformations.coda.plr import single_plr_transform @@ -3217,7 +3218,7 @@ def single_plr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_series = single_plr_transform(df=df, column=column) + out_series = single_plr_transform(df=df, numerator=numerator, denominators=denominators) typer.echo("Progess 75%") # NOTE: Output of single_plr_transform might be changed to DF in the future, to automatically do the following diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index d0721db0..39c1b5f8 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd from beartype import beartype +from beartype.typing import Optional, Sequence from scipy.stats import gmean from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException @@ -37,6 +38,9 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri columns = [col for col in df.columns] subcomposition = [columns[i] for i in range(len(columns)) if i > column_ind] c = len(subcomposition) + + if c == 0: + raise InvalidColumnException("No columns found to the right of the numerator.") scaling_factor = _calculate_plr_scaling_factor(c) # A series to hold the transformed rows @@ -57,7 +61,7 @@ def _single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: @beartype -def single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: +def single_plr_transform(df: pd.DataFrame, numerator: str, denominators: Optional[Sequence[str]] = None) -> pd.Series: """ Perform a pivot logratio transformation on the selected column. @@ -68,28 +72,46 @@ def single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: Args: df: A dataframe of shape [N, D] of compositional data. - column: The name of the numerator column to use for the transformation. + numerator: The name of the numerator column to use for the transformation. + denoinators: The names of the denominator columns to use for the transformation. Returns: A series of length N containing the transforms. Raises: InvalidColumnException: The input column isn't found in the dataframe, or there are no columns - to the right of the given column. + to the right of the given column, or last column selected as numerator, or selected numerator + is in denominators. InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ check_compositional_data(df) - if column not in df.columns: - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - - idx = df.columns.get_loc(column) + if numerator not in df.columns: + raise InvalidColumnException(f"The column {numerator} was not found in the dataframe.") + idx = df.columns.get_loc(numerator) if idx == len(df.columns) - 1: - raise InvalidColumnException() + raise InvalidColumnException("Can't select last column as numerator.") + + if denominators: + if numerator in denominators: + raise InvalidColumnException("Numerator can't be one of denominators.") - return _single_plr_transform(df, column) + invalid_columns = [col for col in denominators if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + + # Place numerator to the left of the denominators + denominators.insert(0, numerator) + df = df[denominators] + + else: + # Select only columns starting from the numerator + indices = df.columns[idx:].to_list() + df = df[indices] + + return _single_plr_transform(df, numerator) @beartype @@ -106,12 +128,13 @@ def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: @beartype -def plr_transform(df: pd.DataFrame) -> pd.DataFrame: +def plr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> pd.DataFrame: """ Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. Args: df: A dataframe of shape [N, D] of compositional data. + columns: The names of the columns to use for the transformation. Returns: A dataframe of shape [N, D-1] containing the set of PLR transformed data. @@ -123,4 +146,7 @@ def plr_transform(df: pd.DataFrame) -> pd.DataFrame: """ check_compositional_data(df) + if columns: + df = df[columns] + return rename_columns_by_pattern(_plr_transform(df)) diff --git a/tests/transformations/coda/plr_test.py b/tests/transformations/coda/plr_test.py index d5bd3672..3beda72f 100644 --- a/tests/transformations/coda/plr_test.py +++ b/tests/transformations/coda/plr_test.py @@ -11,13 +11,13 @@ def test_single_plr_transform_with_single_composition(): arr = np.array([80, 15, 5]) df = pd.DataFrame(arr[None], columns=["a", "b", "c"]) - result = single_plr_transform(df, "a") + result = single_plr_transform(df, numerator="a", denominators=["b", "c"]) assert result[0] == pytest.approx(1.82, abs=1e-2) result = _single_plr_transform_by_index(df, 0) assert result[0] == pytest.approx(1.82, abs=1e-2) - result = single_plr_transform(df, "b") + result = single_plr_transform(df, numerator="b") assert result[0] == pytest.approx(0.78, abs=1e-2) result = _single_plr_transform_by_index(df, 1) @@ -28,7 +28,7 @@ def test_single_plr_transform_with_simple_data(): """Test the core functionality of a single PLR transform.""" arr = np.array([[80, 15, 5], [75, 18, 7]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - result = single_plr_transform(df, "b") + result = single_plr_transform(df, "b", ["c"]) assert result[1] == pytest.approx(0.67, abs=1e-2) @@ -37,7 +37,25 @@ def test_single_plr_transform_with_last_column(): with pytest.raises(InvalidColumnException): arr = np.array([[80, 15, 5], [75, 18, 7]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - single_plr_transform(df, "c") + single_plr_transform(df, "c", ["b", "c"]) + + +def test_single_plr_invalid_inputs(): + """Test that invalid inputs raises exceptions.""" + arr = np.array([[80, 15, 5], [75, 18, 7]]) + df = pd.DataFrame(arr, columns=["a", "b", "c"]) + + # Numerator not in df + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "x", ["b", "c"]) + + # Numerator in denominators + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "a", ["a", "b"]) + + # Denominators not in df + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "a", ["x", "y"]) def test_plr_transform(): @@ -48,3 +66,13 @@ def test_plr_transform(): assert len(result.columns) == len(df.columns) - 1 expected = pd.DataFrame(np.array([[1.60, 0.19, 0.91], [1.49, 0.43, 0.65]]), columns=["V1", "V2", "V3"]) pd.testing.assert_frame_equal(result, expected, atol=1e-2) + + +def test_plr_transform_with_columns(): + """Test PLR transform with column selection.""" + arr = np.array([[65, 12, 18, 5], [63, 16, 15, 6]]) + df = pd.DataFrame(arr, columns=["a", "b", "c", "d"]) + result = plr_transform(df, columns=["a", "c"]) + assert len(result.columns) == 1 + expected = pd.DataFrame(np.array([[0.91], [1.01]]), columns=["V1"]) + pd.testing.assert_frame_equal(result, expected, atol=1e-2) From 869ba2478ca365dc8438c0e04cb2915e488466ff Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 14 Nov 2024 14:56:40 +0200 Subject: [PATCH 05/30] Run pre-commit --- eis_toolkit/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index ef69a3e2..69ff1774 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3206,7 +3206,7 @@ def single_plr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, numerator: str = typer.Option(), - denominators: Annotated[List[str], typer.Option()] = None + denominators: Annotated[List[str], typer.Option()] = None, ): """Perform a pivot logratio transformation on the selected column.""" from eis_toolkit.transformations.coda.plr import single_plr_transform From b0c565915c97391190775d379683deed2e7d37c2 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Mon, 18 Nov 2024 10:27:39 +0200 Subject: [PATCH 06/30] Fix(clr_transform_cli): make columns optional parameter --- eis_toolkit/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index dd8c0aad..d94d6965 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3093,7 +3093,7 @@ def inverse_alr_transform_cli( def clr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, - columns: Annotated[List[str], typer.Option()], + columns: Annotated[List[str], typer.Option()] = None, ): """Perform a centered logratio transformation on the data.""" from eis_toolkit.transformations.coda.clr import clr_transform From f546cfd4203bfa968ea898030434067af220172f Mon Sep 17 00:00:00 2001 From: msorvoja Date: Mon, 18 Nov 2024 11:02:01 +0200 Subject: [PATCH 07/30] feat(plr_transform_cli): add columns parameters --- eis_toolkit/cli.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index d94d6965..a50aeecc 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3206,7 +3206,7 @@ def single_plr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, numerator: str = typer.Option(), - denominators: Annotated[List[str], typer.Option()] = None, + denominator: Annotated[List[str], typer.Option()] = None, ): """Perform a pivot logratio transformation on the selected column.""" from eis_toolkit.transformations.coda.plr import single_plr_transform @@ -3218,7 +3218,7 @@ def single_plr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_series = single_plr_transform(df=df, numerator=numerator, denominators=denominators) + out_series = single_plr_transform(df=df, numerator=numerator, denominators=denominator) typer.echo("Progess 75%") # NOTE: Output of single_plr_transform might be changed to DF in the future, to automatically do the following @@ -3231,8 +3231,12 @@ def single_plr_transform_cli( # CODA - PLR TRANSFORM @app.command() -def plr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION): - """Perform a pivot logratio transformation on the dataframe, returning the full set of transforms.""" +def plr_transform_cli( + input_vector: INPUT_FILE_OPTION, + output_vector: OUTPUT_FILE_OPTION, + columns: Annotated[List[str], typer.Option()] = None, +): + """Perform a pivot logratio transformation on the selected columns.""" from eis_toolkit.transformations.coda.plr import plr_transform typer.echo("Progress: 10%") @@ -3242,7 +3246,7 @@ def plr_transform_cli(input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FIL df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = plr_transform(df=df) + out_df = plr_transform(df=df, columns=columns) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) From 8ce165f6218094dac76184da0fe9812616c71c36 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Mon, 18 Nov 2024 13:35:51 +0200 Subject: [PATCH 08/30] fix(ALR): Perform check_in_simplex_sample_space after selecting columns --- eis_toolkit/transformations/coda/alr.py | 14 ++++++++------ tests/transformations/coda/alr_test.py | 15 ++++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 17434564..2a1b4d4f 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from beartype import beartype -from beartype.typing import List, Optional, Sequence +from beartype.typing import Optional, Sequence from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure @@ -43,7 +43,6 @@ def alr_transform( InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ - check_in_simplex_sample_space(df) if denominator_column is not None and denominator_column not in df.columns: raise InvalidColumnException(f"The column {denominator_column} was not found in the dataframe.") @@ -60,9 +59,12 @@ def alr_transform( if invalid_columns: raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") columns_to_transform = columns + df = df[columns_to_transform] else: columns_to_transform = df.columns.to_list() + check_in_simplex_sample_space(df) + if not keep_denominator_column and denominator_column in columns_to_transform: columns_to_transform.remove(denominator_column) @@ -70,10 +72,8 @@ def alr_transform( @beartype -def _inverse_alr(df: pd.DataFrame, columns: List[str], denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: +def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: dfc = df.copy() - dfc = dfc[columns] - if denominator_column not in dfc.columns.values: # Add the denominator column dfc[denominator_column] = 0.0 @@ -113,4 +113,6 @@ def inverse_alr( else: columns_to_transform = df.columns.to_list() - return _inverse_alr(df, columns_to_transform, denominator_column, scale) + df = df[columns_to_transform] + + return _inverse_alr(df, denominator_column, scale) diff --git a/tests/transformations/coda/alr_test.py b/tests/transformations/coda/alr_test.py index 4afce9ce..edba1028 100644 --- a/tests/transformations/coda/alr_test.py +++ b/tests/transformations/coda/alr_test.py @@ -33,17 +33,18 @@ def test_alr_transform(): def test_alr_transform_with_columns(): """Test ALR transform with column selection.""" - alr = alr_transform(SAMPLE_DATAFRAME, columns=["a", "c", "d"], denominator_column="c", keep_denominator_column=True) + arr = np.random.dirichlet(np.ones(4), size=4) + df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64) + df["e"] = ["value1", "value2", "value3", "value4"] + + result = alr_transform(df, columns=["a", "b", "c", "d"], denominator_column="b", keep_denominator_column=True) expected = pd.DataFrame( - { - "V1": [np.log(65 / 18), np.log(63 / 15)], - "V2": [np.log(18 / 18), np.log(15 / 15)], - "V3": [np.log(5 / 18), np.log(6 / 15)], - }, + np.log(arr / arr[:, 1, None]), + columns=["V1", "V2", "V3", "V4"], dtype=np.float64, ) - pd.testing.assert_frame_equal(alr, expected) + pd.testing.assert_frame_equal(result, expected) def test_alr_transform_with_invalid_denominator_column(): From da02d545318ec7f3d898332d1a0c3cfd7c185603 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Mon, 18 Nov 2024 13:36:17 +0200 Subject: [PATCH 09/30] fix(CLR): Perform check_in_simplex_sample_space after selecting columns --- eis_toolkit/transformations/coda/clr.py | 20 +++++++++++--------- tests/transformations/coda/clr_test.py | 12 ++++++++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index a03cc971..d5cf7761 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -18,8 +18,8 @@ def _centered_ratio(row: pd.Series) -> pd.Series: @beartype -def _clr_transform(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame: - dfc = df[columns].copy() +def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: + dfc = df.copy() dfc = dfc.apply(_centered_ratio, axis=1) return np.log(dfc) @@ -42,24 +42,23 @@ def clr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ - check_in_simplex_sample_space(df) if columns: invalid_columns = [col for col in columns if col not in df.columns] if invalid_columns: raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") columns_to_transform = columns + df = df[columns_to_transform] else: columns_to_transform = df.columns.to_list() - return rename_columns_by_pattern(_clr_transform(df, columns_to_transform)) + check_in_simplex_sample_space(df) + return rename_columns_by_pattern(_clr_transform(df)) -@beartype -def _inverse_clr(df: pd.DataFrame, columns: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: - if columns: - df = df[columns] +@beartype +def _inverse_clr(df: pd.DataFrame, scale: Number = 1.0) -> pd.DataFrame: return _closure(np.exp(df), scale) @@ -98,7 +97,10 @@ def inverse_clr( else: columns_to_transform = df.columns.to_list() - inverse_data = _inverse_clr(df, columns_to_transform, scale) + dfc = df.copy() + dfc = dfc[columns_to_transform] + + inverse_data = _inverse_clr(dfc, scale) if colnames: return rename_columns(inverse_data, colnames) diff --git a/tests/transformations/coda/clr_test.py b/tests/transformations/coda/clr_test.py index 5dc05e4e..59a3e4a8 100644 --- a/tests/transformations/coda/clr_test.py +++ b/tests/transformations/coda/clr_test.py @@ -25,8 +25,16 @@ def test_clr_transform(): def test_clr_transform_with_columns(): """Test CLR transform with column selection.""" - result = clr_transform(SAMPLE_DATAFRAME, columns=["a", "b", "d"]) - expected = pd.DataFrame({"V1": [1.42, 1.24], "V2": [-0.27, -0.13], "V3": [-1.15, -1.11]}, dtype=np.float64) + arr = np.random.dirichlet(np.ones(4), size=4) + df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64) + df["e"] = ["value1", "value2", "value3", "value4"] + result = clr_transform(df, columns=["a", "b", "c", "d"]) + geometric_means = np.prod(arr, axis=1) ** (1 / arr.shape[1]) + expected = pd.DataFrame( + np.log(arr / geometric_means[:, None]), + columns=["V1", "V2", "V3", "V4"], + dtype=np.float64, + ) pd.testing.assert_frame_equal(result, expected, atol=1e-2) From 72deedb898c294000c27b15c46c8f18c52535c7e Mon Sep 17 00:00:00 2001 From: msorvoja Date: Mon, 25 Nov 2024 07:56:14 +0200 Subject: [PATCH 10/30] fix(PLR): Perform check_in_simplex_space after selecting columns --- eis_toolkit/transformations/coda/plr.py | 11 +++++--- tests/transformations/coda/plr_test.py | 34 +++++++++++++------------ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index f9520752..faade971 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -85,7 +85,6 @@ def single_plr_transform(df: pd.DataFrame, numerator: str, denominator: Optional InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ - check_in_simplex_sample_space(df) if numerator not in df.columns: raise InvalidColumnException(f"The column {numerator} was not found in the dataframe.") @@ -111,6 +110,8 @@ def single_plr_transform(df: pd.DataFrame, numerator: str, denominator: Optional indices = df.columns[idx:].to_list() df = df[indices] + check_in_simplex_sample_space(df) + return _single_plr_transform(df, numerator) @@ -140,13 +141,17 @@ def plr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> A dataframe of shape [N, D-1] containing the set of PLR transformed data. Raises: - InvalidColumnException: The data contains one or more zeros. + InvalidColumnException: The data contains one or more zeros, or input column(s) not found in the dataframe. InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ - check_in_simplex_sample_space(df) if columns: + invalid_columns = [col for col in columns if col not in df.columns] + if invalid_columns: + raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") df = df[columns] + check_in_simplex_sample_space(df) + return rename_columns_by_pattern(_plr_transform(df)) diff --git a/tests/transformations/coda/plr_test.py b/tests/transformations/coda/plr_test.py index dc29b253..e9693d48 100644 --- a/tests/transformations/coda/plr_test.py +++ b/tests/transformations/coda/plr_test.py @@ -8,28 +8,30 @@ def test_single_plr_transform_with_single_composition(): """Test a single PLR transform operation with a single composition.""" - arr = np.array([80, 15, 5]) - df = pd.DataFrame(arr[None], columns=["a", "b", "c"]) + arr1 = np.array([80, 15, 5]) + df1 = pd.DataFrame(arr1[None], columns=["a", "b", "c"]) + arr2 = [0, 80, 15, 5] + df2 = pd.DataFrame([arr2], columns=["a", "b", "c", "d"]) - result = single_plr_transform(df, numerator="a", denominator=["b", "c"]) + result = single_plr_transform(df1, numerator="a", denominator=["b", "c"]) assert result[0] == pytest.approx(1.82, abs=1e-2) - result = _single_plr_transform_by_index(df, 0) + result = _single_plr_transform_by_index(df1, 0) assert result[0] == pytest.approx(1.82, abs=1e-2) - result = single_plr_transform(df, numerator="b") - assert result[0] == pytest.approx(0.78, abs=1e-2) + result = single_plr_transform(df2, numerator="b") + assert result[0] == pytest.approx(1.82, abs=1e-2) - result = _single_plr_transform_by_index(df, 1) - assert result[0] == pytest.approx(0.78, abs=1e-2) + result = _single_plr_transform_by_index(df2, 1) + assert result[0] == pytest.approx(1.82, abs=1e-2) def test_single_plr_transform_with_simple_data(): """Test the core functionality of a single PLR transform.""" - arr = np.array([[80, 15, 5], [75, 18, 7]]) + arr = np.array([[80, 15, 5], [75, 20, 5]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - result = single_plr_transform(df, "b", ["c"]) - assert result[1] == pytest.approx(0.67, abs=1e-2) + result = single_plr_transform(df, "a", ["b", "c"]) + assert result[1] == pytest.approx(1.65, abs=1e-2) def test_single_plr_transform_with_last_column(): @@ -70,9 +72,9 @@ def test_plr_transform(): def test_plr_transform_with_columns(): """Test PLR transform with column selection.""" - arr = np.array([[65, 12, 18, 5], [63, 16, 15, 6]]) - df = pd.DataFrame(arr, columns=["a", "b", "c", "d"]) - result = plr_transform(df, columns=["a", "c"]) - assert len(result.columns) == 1 - expected = pd.DataFrame(np.array([[0.91], [1.01]]), columns=["V1"]) + arr = np.array([[0, 65, 12, 18, 5], [0, 63, 16, 15, 6]]) + df = pd.DataFrame(arr, columns=["a", "b", "c", "d", "e"]) + result = plr_transform(df, columns=["b", "c", "d", "e"]) + assert len(result.columns) == 3 + expected = pd.DataFrame(np.array([[1.60, 0.19, 0.91], [1.49, 0.43, 0.65]]), columns=["V1", "V2", "V3"]) pd.testing.assert_frame_equal(result, expected, atol=1e-2) From cdf056e1301a4cf6842e56de6162bd7abcbda99b Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 09:11:17 +0200 Subject: [PATCH 11/30] feat(closure): add function for performing closure on DataFrame --- eis_toolkit/utilities/miscellaneous.py | 24 ++++++++++++++++++++++++ tests/utilities/miscellaneous_test.py | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/eis_toolkit/utilities/miscellaneous.py b/eis_toolkit/utilities/miscellaneous.py index 7e0fcb02..f08a7b56 100644 --- a/eis_toolkit/utilities/miscellaneous.py +++ b/eis_toolkit/utilities/miscellaneous.py @@ -375,3 +375,27 @@ def toggle_gdal_exceptions(): finally: if not already_has_exceptions_enabled: gdal.DontUseExceptions() + + +@beartype +def perform_closure(df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: int = 1) -> pd.DataFrame: + """ + Peform closure on selected columns of a DataFrame. + + Values in the specified columns of each row are scaled so that they sum to 'closure_target'. + + Args: + df: Input DataFrame. + columns: Names of the columns on which to perform closure. If not provided, all columns are used. + closure_target: Row sum of the selected columns after performing closure. Defaults to 1. + + Returns: + A DataFrame with each row sum to the closure target. + """ + if columns is None: + columns = df.columns + + row_sums = df[columns].sum(axis=1) + df[columns] = df[columns].div(row_sums, axis=0).mul(closure_target) + + return df diff --git a/tests/utilities/miscellaneous_test.py b/tests/utilities/miscellaneous_test.py index 8178fe16..bdfb3591 100644 --- a/tests/utilities/miscellaneous_test.py +++ b/tests/utilities/miscellaneous_test.py @@ -4,6 +4,7 @@ from eis_toolkit.exceptions import InvalidColumnIndexException from eis_toolkit.utilities.miscellaneous import ( + perform_closure, rename_columns, rename_columns_by_pattern, replace_values, @@ -71,3 +72,11 @@ def test_rename_columns_with_too_few_columns(): target_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "col3": [5, 6]}) renamed_df = rename_columns(df, colnames=colnames) pd.testing.assert_frame_equal(renamed_df, target_df) + + +def test_perform_closure(): + """Test that performing closure on a DataFrame works as expected.""" + df = pd.DataFrame({"col1": [1, 2, 1], "col2": [4, 8, 1], "col3": [3, 1, 6]}) + closured_df = perform_closure(df, columns=["col1", "col2"], closure_target=100) + expected_df = pd.DataFrame({"col1": [20.0, 20.0, 50.0], "col2": [80.0, 80.0, 50.0], "col3": [3, 1, 6]}) + pd.testing.assert_frame_equal(closured_df, expected_df) From dacd83095b378ceba413827214061aa27917fc53 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 09:19:24 +0200 Subject: [PATCH 12/30] feat(PLR): add parameter for performing closure on the input DataFrame --- eis_toolkit/transformations/coda/plr.py | 56 ++++++++++--------------- tests/transformations/coda/plr_test.py | 36 ++++++---------- 2 files changed, 34 insertions(+), 58 deletions(-) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index faade971..998dc041 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -7,7 +7,7 @@ from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space from eis_toolkit.utilities.checks.parameter import check_numeric_value_sign -from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern @beartype @@ -39,8 +39,6 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri subcomposition = [columns[i] for i in range(len(columns)) if i > column_ind] c = len(subcomposition) - if c == 0: - raise InvalidColumnException("No columns found to the right of the numerator.") scaling_factor = _calculate_plr_scaling_factor(c) # A series to hold the transformed rows @@ -53,15 +51,7 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri @beartype -def _single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: - - idx = df.columns.get_loc(column) - - return _single_plr_transform_by_index(df, idx) - - -@beartype -def single_plr_transform(df: pd.DataFrame, numerator: str, denominator: Optional[Sequence[str]] = None) -> pd.Series: +def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional[int] = None) -> pd.Series: """ Perform a pivot logratio transformation on the selected column. @@ -72,9 +62,8 @@ def single_plr_transform(df: pd.DataFrame, numerator: str, denominator: Optional Args: df: A dataframe of shape [N, D] of compositional data. - numerator: The name of the numerator column to use for the transformation. - denoinator: The names of the denominator columns to use for the transformation. - + column: The name of the numerator column to use for the transformation. + closure_target: Target row sum for closure. If None, no closure is performed. Returns: A series of length N containing the transforms. @@ -86,33 +75,24 @@ def single_plr_transform(df: pd.DataFrame, numerator: str, denominator: Optional NumericValueSignException: Data contains zeros or negative values. """ - if numerator not in df.columns: - raise InvalidColumnException(f"The column {numerator} was not found in the dataframe.") + if column not in df.columns: + raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - idx = df.columns.get_loc(numerator) + idx = df.columns.get_loc(column) if idx == len(df.columns) - 1: raise InvalidColumnException("Can't select last column as numerator.") - if denominator: - if numerator in denominator: - raise InvalidColumnException("Numerator can't be one of denominators.") - - invalid_columns = [col for col in denominator if col not in df.columns] - if invalid_columns: - raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") + if closure_target is not None: + # Perform closure on columns starting from numerator "to the right" + columns = df.columns[idx:].to_list() + df = perform_closure(df, columns, closure_target) - # Place numerator to the left of the denominators - denominator.insert(0, numerator) - df = df[denominator] + check_in_simplex_sample_space(df[columns]) else: - # Select only columns starting from the numerator - indices = df.columns[idx:].to_list() - df = df[indices] + check_in_simplex_sample_space(df) - check_in_simplex_sample_space(df) - - return _single_plr_transform(df, numerator) + return _single_plr_transform_by_index(df, idx) @beartype @@ -129,13 +109,16 @@ def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: @beartype -def plr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> pd.DataFrame: +def plr_transform( + df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Optional[int] = None +) -> pd.DataFrame: """ Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. Args: df: A dataframe of shape [N, D] of compositional data. columns: The names of the columns to use for the transformation. + closure_target: Target row sum for closure. If None, no closure is performed. Returns: A dataframe of shape [N, D-1] containing the set of PLR transformed data. @@ -152,6 +135,9 @@ def plr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") df = df[columns] + if closure_target is not None: + df = perform_closure(df, closure_target=1) + check_in_simplex_sample_space(df) return rename_columns_by_pattern(_plr_transform(df)) diff --git a/tests/transformations/coda/plr_test.py b/tests/transformations/coda/plr_test.py index e9693d48..372d3452 100644 --- a/tests/transformations/coda/plr_test.py +++ b/tests/transformations/coda/plr_test.py @@ -8,29 +8,27 @@ def test_single_plr_transform_with_single_composition(): """Test a single PLR transform operation with a single composition.""" - arr1 = np.array([80, 15, 5]) - df1 = pd.DataFrame(arr1[None], columns=["a", "b", "c"]) - arr2 = [0, 80, 15, 5] - df2 = pd.DataFrame([arr2], columns=["a", "b", "c", "d"]) + arr = np.array([80, 15, 5]) + df = pd.DataFrame(arr[None], columns=["a", "b", "c"]) - result = single_plr_transform(df1, numerator="a", denominator=["b", "c"]) + result = single_plr_transform(df, "a") assert result[0] == pytest.approx(1.82, abs=1e-2) - result = _single_plr_transform_by_index(df1, 0) + result = _single_plr_transform_by_index(df, 0) assert result[0] == pytest.approx(1.82, abs=1e-2) - result = single_plr_transform(df2, numerator="b") - assert result[0] == pytest.approx(1.82, abs=1e-2) + result = single_plr_transform(df, "b", closure_target=100) + assert result[0] == pytest.approx(0.78, abs=1e-2) - result = _single_plr_transform_by_index(df2, 1) - assert result[0] == pytest.approx(1.82, abs=1e-2) + result = _single_plr_transform_by_index(df, 1) + assert result[0] == pytest.approx(0.78, abs=1e-2) def test_single_plr_transform_with_simple_data(): """Test the core functionality of a single PLR transform.""" arr = np.array([[80, 15, 5], [75, 20, 5]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - result = single_plr_transform(df, "a", ["b", "c"]) + result = single_plr_transform(df, "a") assert result[1] == pytest.approx(1.65, abs=1e-2) @@ -39,25 +37,17 @@ def test_single_plr_transform_with_last_column(): with pytest.raises(InvalidColumnException): arr = np.array([[80, 15, 5], [75, 18, 7]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - single_plr_transform(df, "c", ["b", "c"]) + single_plr_transform(df, "c") -def test_single_plr_invalid_inputs(): - """Test that invalid inputs raises exceptions.""" +def test_single_plr_invalid_column(): + """Test that invalid column name raises exceptions.""" arr = np.array([[80, 15, 5], [75, 18, 7]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) # Numerator not in df with pytest.raises(InvalidColumnException): - single_plr_transform(df, "x", ["b", "c"]) - - # Numerator in denominators - with pytest.raises(InvalidColumnException): - single_plr_transform(df, "a", ["a", "b"]) - - # Denominators not in df - with pytest.raises(InvalidColumnException): - single_plr_transform(df, "a", ["x", "y"]) + single_plr_transform(df, "x") def test_plr_transform(): From 5f47ff21b27bbd0f94759ed9dab3ff945d37e28a Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 09:49:35 +0200 Subject: [PATCH 13/30] fix(closure): improve docstring --- eis_toolkit/utilities/miscellaneous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eis_toolkit/utilities/miscellaneous.py b/eis_toolkit/utilities/miscellaneous.py index f08a7b56..7e050905 100644 --- a/eis_toolkit/utilities/miscellaneous.py +++ b/eis_toolkit/utilities/miscellaneous.py @@ -390,7 +390,7 @@ def perform_closure(df: pd.DataFrame, columns: Optional[Sequence[str]] = None, c closure_target: Row sum of the selected columns after performing closure. Defaults to 1. Returns: - A DataFrame with each row sum to the closure target. + A DataFrame where on each row the values in the selected columns sum to the closure target. """ if columns is None: columns = df.columns From 33f4d784377fb7ad257d3a75c3a9cdbdfe61e4dd Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 10:07:29 +0200 Subject: [PATCH 14/30] feat(ALR): add closure_target parameter --- eis_toolkit/transformations/coda/alr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 2a1b4d4f..1e3ec230 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -8,7 +8,7 @@ from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern @beartype @@ -24,6 +24,7 @@ def alr_transform( columns: Optional[Sequence[str]] = None, denominator_column: Optional[str] = None, keep_denominator_column: bool = False, + closure_target: Optional[int] = None, ) -> pd.DataFrame: """ Perform an additive logratio transformation on the data. @@ -34,6 +35,7 @@ def alr_transform( denominator_column: The name of the column to be used as the denominator column. keep_denominator_column: Whether to include the denominator column in the result. If True, the returned dataframe retains its original shape. + closure_target: Target row sum for closure. If None, no closure is performed. Returns: A new dataframe containing the ALR transformed data. @@ -63,6 +65,9 @@ def alr_transform( else: columns_to_transform = df.columns.to_list() + if closure_target is not None: + df = perform_closure(df, columns_to_transform, closure_target) + check_in_simplex_sample_space(df) if not keep_denominator_column and denominator_column in columns_to_transform: From 3bcfedddd26aa0b4a0f89cfe846744965f40745c Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 10:07:47 +0200 Subject: [PATCH 15/30] feat(CLR): add closure_target parameter --- eis_toolkit/transformations/coda/clr.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index d5cf7761..7a6c4728 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -9,7 +9,7 @@ from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns, rename_columns_by_pattern @beartype @@ -26,13 +26,18 @@ def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: @beartype -def clr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> pd.DataFrame: +def clr_transform( + df: pd.DataFrame, + columns: Optional[Sequence[str]] = None, + closure_target: Optional[int] = None, +) -> pd.DataFrame: """ Perform a centered logratio transformation on the data. Args: df: A dataframe of compositional data. columns: The names of the columns to be transformed. + closure_target: Target row sum for closure. If None, no closure is performed. Returns: A new dataframe containing the CLR transformed data. @@ -52,6 +57,9 @@ def clr_transform(df: pd.DataFrame, columns: Optional[Sequence[str]] = None) -> else: columns_to_transform = df.columns.to_list() + if closure_target is not None: + df = perform_closure(df, columns_to_transform, closure_target) + check_in_simplex_sample_space(df) return rename_columns_by_pattern(_clr_transform(df)) From 8f1679d55b1bcb538ca725c45b3df306a782fac6 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 17 Dec 2024 10:07:57 +0200 Subject: [PATCH 16/30] feat(ILR): add closure_target parameter --- eis_toolkit/transformations/coda/ilr.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index ed8831f1..1499b87c 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -1,13 +1,14 @@ import numpy as np import pandas as pd from beartype import beartype -from beartype.typing import Sequence +from beartype.typing import Optional, Sequence from scipy.stats import gmean from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space from eis_toolkit.utilities.checks.dataframe import check_columns_valid from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign +from eis_toolkit.utilities.miscellaneous import perform_closure @beartype @@ -64,7 +65,10 @@ def _single_ilr_transform( @beartype def single_ilr_transform( - df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] + df: pd.DataFrame, + subcomposition_1: Sequence[str], + subcomposition_2: Sequence[str], + closure_target: Optional[int] = None, ) -> pd.Series: """ Perform a single isometric logratio transformation on the provided subcompositions. @@ -75,6 +79,7 @@ def single_ilr_transform( df: A dataframe of shape [N, D] of compositional data. subcomposition_1: Names of the columns in the numerator part of the ratio. subcomposition_2: Names of the columns in the denominator part of the ratio. + closure_target: Target row sum for closure. If None, no closure is performed. Returns: A series of length N containing the transforms. @@ -86,6 +91,11 @@ def single_ilr_transform( InvalidParameterValueException: At least one subcomposition provided was empty. NumericValueSignException: Data contains zeros or negative values. """ + + if closure_target is not None: + columns = subcomposition_1 + subcomposition_2 + df = perform_closure(df, columns, closure_target) + check_in_simplex_sample_space(df) if not (subcomposition_1 and subcomposition_2): From 85064f2ef8cbabc2d2ea9251caa09ef3cdfbf833 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Wed, 18 Dec 2024 07:48:22 +0200 Subject: [PATCH 17/30] Fix parameter type for closure_target --- eis_toolkit/transformations/coda/alr.py | 2 +- eis_toolkit/transformations/coda/clr.py | 2 +- eis_toolkit/transformations/coda/ilr.py | 16 +++++++++------- eis_toolkit/transformations/coda/plr.py | 6 ++++-- eis_toolkit/utilities/miscellaneous.py | 4 +++- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 1e3ec230..72955394 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -24,7 +24,7 @@ def alr_transform( columns: Optional[Sequence[str]] = None, denominator_column: Optional[str] = None, keep_denominator_column: bool = False, - closure_target: Optional[int] = None, + closure_target: Optional[Number] = None, ) -> pd.DataFrame: """ Perform an additive logratio transformation on the data. diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index 7a6c4728..b6af75f6 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -29,7 +29,7 @@ def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: def clr_transform( df: pd.DataFrame, columns: Optional[Sequence[str]] = None, - closure_target: Optional[int] = None, + closure_target: Optional[Number] = None, ) -> pd.DataFrame: """ Perform a centered logratio transformation on the data. diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index 1499b87c..c4c40ead 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -1,3 +1,5 @@ +from numbers import Number + import numpy as np import pandas as pd from beartype import beartype @@ -68,7 +70,7 @@ def single_ilr_transform( df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str], - closure_target: Optional[int] = None, + closure_target: Optional[Number] = None, ) -> pd.Series: """ Perform a single isometric logratio transformation on the provided subcompositions. @@ -92,12 +94,6 @@ def single_ilr_transform( NumericValueSignException: Data contains zeros or negative values. """ - if closure_target is not None: - columns = subcomposition_1 + subcomposition_2 - df = perform_closure(df, columns, closure_target) - - check_in_simplex_sample_space(df) - if not (subcomposition_1 and subcomposition_2): raise InvalidParameterValueException("A subcomposition should contain at least one column.") @@ -107,4 +103,10 @@ def single_ilr_transform( if check_lists_overlap(subcomposition_1, subcomposition_2): raise InvalidCompositionException("The subcompositions overlap.") + if closure_target is not None: + columns = subcomposition_1 + subcomposition_2 + df = perform_closure(df, columns, closure_target) + + check_in_simplex_sample_space(df) + return _single_ilr_transform(df, subcomposition_1, subcomposition_2) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index 998dc041..c438932b 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -1,3 +1,5 @@ +from numbers import Number + import numpy as np import pandas as pd from beartype import beartype @@ -51,7 +53,7 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri @beartype -def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional[int] = None) -> pd.Series: +def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional[Number] = None) -> pd.Series: """ Perform a pivot logratio transformation on the selected column. @@ -110,7 +112,7 @@ def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: @beartype def plr_transform( - df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Optional[int] = None + df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Optional[Number] = None ) -> pd.DataFrame: """ Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. diff --git a/eis_toolkit/utilities/miscellaneous.py b/eis_toolkit/utilities/miscellaneous.py index 7e050905..42d526b7 100644 --- a/eis_toolkit/utilities/miscellaneous.py +++ b/eis_toolkit/utilities/miscellaneous.py @@ -378,7 +378,9 @@ def toggle_gdal_exceptions(): @beartype -def perform_closure(df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: int = 1) -> pd.DataFrame: +def perform_closure( + df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Number = 1 +) -> pd.DataFrame: """ Peform closure on selected columns of a DataFrame. From 011a7e38c41067c86b8239cca46068505f812ff0 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Wed, 18 Dec 2024 08:29:39 +0200 Subject: [PATCH 18/30] Select subcomposition columns if closure is performed --- eis_toolkit/transformations/coda/ilr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index c4c40ead..4c4685dd 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -106,6 +106,7 @@ def single_ilr_transform( if closure_target is not None: columns = subcomposition_1 + subcomposition_2 df = perform_closure(df, columns, closure_target) + df = df[columns] check_in_simplex_sample_space(df) From f47b897b6a4a6f0b6ccb999d2bc076f2d7a37398 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Wed, 18 Dec 2024 08:30:02 +0200 Subject: [PATCH 19/30] Fix notebook --- .../testing_logratio_transformations.ipynb | 82 +++++++++---------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/notebooks/testing_logratio_transformations.ipynb b/notebooks/testing_logratio_transformations.ipynb index f8f62961..4ca0dcd3 100644 --- a/notebooks/testing_logratio_transformations.ipynb +++ b/notebooks/testing_logratio_transformations.ipynb @@ -20,9 +20,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "/root/.cache/pypoetry/virtualenvs/eis-toolkit-QEzTY9B6-py3.10/lib/python3.10/site-packages/beartype/_util/hint/pep/utilpeptest.py:347: BeartypeDecorHintPep585DeprecationWarning: PEP 484 type hint typing.Sequence[str] deprecated by PEP 585. This hint is scheduled for removal in the first Python version released after October 5th, 2025. To resolve this, import this hint from \"beartype.typing\" rather than \"typing\". For further commentary and alternatives, see also:\n", - " https://beartype.readthedocs.io/en/latest/api_roar/#pep-585-deprecations\n", - " warn(\n" + "/home/mika/.cache/pypoetry/virtualenvs/eis-toolkit-l5cKD1lZ-py3.10/lib/python3.10/site-packages/geopandas/_compat.py:112: UserWarning: The Shapely GEOS version (3.10.3-CAPI-1.16.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.\n", + " warnings.warn(\n" ] } ], @@ -40,7 +39,7 @@ "from eis_toolkit.transformations.coda.pairwise import pairwise_logratio, single_pairwise_logratio\n", "from eis_toolkit.transformations.coda.plr import plr_transform, single_plr_transform\n", "\n", - "GEOCHEMICAL_DATA = \"../tests/data/local/coda/IOCG_CLB_Till_Geochem_reg_511p.shp\"" + "GEOCHEMICAL_DATA = \"../tests/data/remote/IOCG_CLB_Till_Geochem_reg_511p.gpkg\"" ] }, { @@ -601,7 +600,7 @@ }, "outputs": [], "source": [ - "sample_alr_inv = inverse_alr(sample_alr, \"d\", 100)" + "sample_alr_inv = inverse_alr(sample_alr, denominator_column=\"d\", scale=100)" ] }, { @@ -757,7 +756,6 @@ " Ca_ppm_511\n", " Fe_ppm_511\n", " Mg_ppm_511\n", - " residual\n", " \n", " \n", " \n", @@ -767,7 +765,6 @@ " 40200.0\n", " 83200.0\n", " 17200.0\n", - " 831800.0\n", " \n", " \n", " 1\n", @@ -775,7 +772,6 @@ " 5000.0\n", " 28300.0\n", " 7520.0\n", - " 945080.0\n", " \n", " \n", " 2\n", @@ -783,7 +779,6 @@ " 3070.0\n", " 14500.0\n", " 4540.0\n", - " 970010.0\n", " \n", " \n", " 3\n", @@ -791,7 +786,6 @@ " 3290.0\n", " 14600.0\n", " 3240.0\n", - " 971570.0\n", " \n", " \n", " 4\n", @@ -799,19 +793,18 @@ " 3600.0\n", " 31500.0\n", " 8020.0\n", - " 944380.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Al_ppm_511 Ca_ppm_511 Fe_ppm_511 Mg_ppm_511 residual\n", - "0 27600.0 40200.0 83200.0 17200.0 831800.0\n", - "1 14100.0 5000.0 28300.0 7520.0 945080.0\n", - "2 7880.0 3070.0 14500.0 4540.0 970010.0\n", - "3 7300.0 3290.0 14600.0 3240.0 971570.0\n", - "4 12500.0 3600.0 31500.0 8020.0 944380.0" + " Al_ppm_511 Ca_ppm_511 Fe_ppm_511 Mg_ppm_511\n", + "0 27600.0 40200.0 83200.0 17200.0\n", + "1 14100.0 5000.0 28300.0 7520.0\n", + "2 7880.0 3070.0 14500.0 4540.0\n", + "3 7300.0 3290.0 14600.0 3240.0\n", + "4 12500.0 3600.0 31500.0 8020.0" ] }, "execution_count": 21, @@ -824,16 +817,12 @@ "\n", "df = gpd.read_file(GEOCHEMICAL_DATA, include_fields=elements_to_analyze)\n", "df = pd.DataFrame(df.drop(columns='geometry'))\n", - "\n", - "# Add a column for the residual\n", - "\n", - "df[\"residual\"] = million - np.sum(df, axis=1)\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "id": "75728aa4-5b2e-46b6-9511-1250bf4b13ae", "metadata": { "tags": [] @@ -843,23 +832,32 @@ "pair_Al_Ca = pairwise_logratio(df, \"Al_ppm_511\", \"Ca_ppm_511\")\n", "pair_Fe_Mg = pairwise_logratio(df, \"Fe_ppm_511\", \"Mg_ppm_511\")\n", "pair_Mg_Al = pairwise_logratio(df, \"Mg_ppm_511\", \"Al_ppm_511\")\n", - "pair_Mg_res = pairwise_logratio(df, \"Mg_ppm_511\", \"residual\")\n", "\n", - "df_alr = alr_transform(df)\n", - "df_alr_Mg = alr_transform(df, \"Mg_ppm_511\")\n", - "df_clr = clr_transform(df)\n", - "df_plr = plr_transform(df)\n", + "df_alr = df.copy()\n", + "df_alr_Mg = df.copy()\n", + "df_clr = df.copy()\n", + "df_plr = df.copy()\n", + "\n", + "# As real world geochemical data will often not satisfy sum to a constant, for example 100, a closure needs to be performed\n", + "\n", + "df_alr = alr_transform(df_alr, closure_target=100)\n", + "df_alr_Mg = alr_transform(df_alr_Mg, denominator_column=\"Mg_ppm_511\", closure_target=100)\n", + "df_clr = clr_transform(df_clr, closure_target=100)\n", + "df_plr = plr_transform(df_plr, closure_target=100)\n", "\n", "df_clr_inv = inverse_clr(df_clr, scale=million)\n", "df_alr_inv = inverse_alr(df_alr, \"c\", scale=million)\n", "\n", - "df_ilr_Al_Ca = single_ilr_transform(df, [\"Al_ppm_511\"], [\"Ca_ppm_511\"])\n", - "df_ilr_AlCa_FeMg = single_ilr_transform(df, [\"Al_ppm_511\", \"Ca_ppm_511\"], [\"Fe_ppm_511\", \"Mg_ppm_511\"])" + "df_ilr_Al_Ca = df.copy()\n", + "df_ilr_AlCa_FeMg = df.copy()\n", + "\n", + "df_ilr_Al_Ca = single_ilr_transform(df_ilr_Al_Ca, [\"Al_ppm_511\"], [\"Ca_ppm_511\"], closure_target=100)\n", + "df_ilr_AlCa_FeMg = single_ilr_transform(df_ilr_AlCa_FeMg, [\"Al_ppm_511\", \"Ca_ppm_511\"], [\"Fe_ppm_511\", \"Mg_ppm_511\"], closure_target=100)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "id": "e136d05d-671d-420f-95b9-5f350bc7a94c", "metadata": { "tags": [] @@ -876,7 +874,7 @@ "dtype: float64" ] }, - "execution_count": 25, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -887,7 +885,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "id": "ad352680-433a-4026-b7b5-560b682dfb96", "metadata": { "tags": [] @@ -917,7 +915,6 @@ " V1\n", " V2\n", " V3\n", - " V4\n", " \n", " \n", " \n", @@ -926,50 +923,45 @@ " 0.472906\n", " 0.848958\n", " 1.576338\n", - " 3.878683\n", " \n", " \n", " 1\n", " 0.628609\n", " -0.408128\n", " 1.325296\n", - " 4.833703\n", " \n", " \n", " 2\n", " 0.551401\n", " -0.391249\n", " 1.161222\n", - " 5.364379\n", " \n", " \n", " 3\n", " 0.812301\n", " 0.015314\n", " 1.505448\n", - " 5.703340\n", " \n", " \n", " 4\n", " 0.443790\n", " -0.801005\n", " 1.368049\n", - " 4.768590\n", " \n", " \n", "\n", "" ], "text/plain": [ - " V1 V2 V3 V4\n", - "0 0.472906 0.848958 1.576338 3.878683\n", - "1 0.628609 -0.408128 1.325296 4.833703\n", - "2 0.551401 -0.391249 1.161222 5.364379\n", - "3 0.812301 0.015314 1.505448 5.703340\n", - "4 0.443790 -0.801005 1.368049 4.768590" + " V1 V2 V3\n", + "0 0.472906 0.848958 1.576338\n", + "1 0.628609 -0.408128 1.325296\n", + "2 0.551401 -0.391249 1.161222\n", + "3 0.812301 0.015314 1.505448\n", + "4 0.443790 -0.801005 1.368049" ] }, - "execution_count": 26, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } From 0a18030863df79013e3d21c641b95be8479f541a Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 08:27:50 +0200 Subject: [PATCH 20/30] Remove duplicate closure function --- eis_toolkit/transformations/coda/alr.py | 11 +++++---- eis_toolkit/transformations/coda/clr.py | 11 +++++---- eis_toolkit/transformations/coda/ilr.py | 13 +++++------ eis_toolkit/transformations/coda/plr.py | 31 +++++++++++++------------ eis_toolkit/utilities/miscellaneous.py | 26 --------------------- tests/transformations/coda/ilr_test.py | 8 +++---- tests/transformations/coda/plr_test.py | 2 +- tests/utilities/miscellaneous_test.py | 9 ------- 8 files changed, 39 insertions(+), 72 deletions(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 72955394..f5817fab 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -8,7 +8,7 @@ from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern @beartype @@ -24,7 +24,7 @@ def alr_transform( columns: Optional[Sequence[str]] = None, denominator_column: Optional[str] = None, keep_denominator_column: bool = False, - closure_target: Optional[Number] = None, + scale: Optional[Number] = None, ) -> pd.DataFrame: """ Perform an additive logratio transformation on the data. @@ -35,7 +35,8 @@ def alr_transform( denominator_column: The name of the column to be used as the denominator column. keep_denominator_column: Whether to include the denominator column in the result. If True, the returned dataframe retains its original shape. - closure_target: Target row sum for closure. If None, no closure is performed. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. Returns: A new dataframe containing the ALR transformed data. @@ -65,8 +66,8 @@ def alr_transform( else: columns_to_transform = df.columns.to_list() - if closure_target is not None: - df = perform_closure(df, columns_to_transform, closure_target) + if scale is not None: + df = _closure(df, scale) check_in_simplex_sample_space(df) diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index b6af75f6..e81a4718 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -9,7 +9,7 @@ from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns, rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern @beartype @@ -29,7 +29,7 @@ def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: def clr_transform( df: pd.DataFrame, columns: Optional[Sequence[str]] = None, - closure_target: Optional[Number] = None, + scale: Optional[Number] = None, ) -> pd.DataFrame: """ Perform a centered logratio transformation on the data. @@ -37,7 +37,8 @@ def clr_transform( Args: df: A dataframe of compositional data. columns: The names of the columns to be transformed. - closure_target: Target row sum for closure. If None, no closure is performed. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. Returns: A new dataframe containing the CLR transformed data. @@ -57,8 +58,8 @@ def clr_transform( else: columns_to_transform = df.columns.to_list() - if closure_target is not None: - df = perform_closure(df, columns_to_transform, closure_target) + if scale is not None: + df = _closure(df, scale) check_in_simplex_sample_space(df) diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index 4c4685dd..38ad2e1e 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -7,10 +7,10 @@ from scipy.stats import gmean from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException +from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space from eis_toolkit.utilities.checks.dataframe import check_columns_valid from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign -from eis_toolkit.utilities.miscellaneous import perform_closure @beartype @@ -70,7 +70,7 @@ def single_ilr_transform( df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str], - closure_target: Optional[Number] = None, + scale: Optional[Number] = None, ) -> pd.Series: """ Perform a single isometric logratio transformation on the provided subcompositions. @@ -81,7 +81,8 @@ def single_ilr_transform( df: A dataframe of shape [N, D] of compositional data. subcomposition_1: Names of the columns in the numerator part of the ratio. subcomposition_2: Names of the columns in the denominator part of the ratio. - closure_target: Target row sum for closure. If None, no closure is performed. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. Returns: A series of length N containing the transforms. @@ -103,10 +104,8 @@ def single_ilr_transform( if check_lists_overlap(subcomposition_1, subcomposition_2): raise InvalidCompositionException("The subcompositions overlap.") - if closure_target is not None: - columns = subcomposition_1 + subcomposition_2 - df = perform_closure(df, columns, closure_target) - df = df[columns] + if scale is not None: + df = _closure(df, scale) check_in_simplex_sample_space(df) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index c438932b..88d2c563 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -7,9 +7,10 @@ from scipy.stats import gmean from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException +from eis_toolkit.utilities.aitchison_geometry import _closure from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space from eis_toolkit.utilities.checks.parameter import check_numeric_value_sign -from eis_toolkit.utilities.miscellaneous import perform_closure, rename_columns_by_pattern +from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern @beartype @@ -53,7 +54,7 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri @beartype -def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional[Number] = None) -> pd.Series: +def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] = None) -> pd.Series: """ Perform a pivot logratio transformation on the selected column. @@ -65,7 +66,8 @@ def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional Args: df: A dataframe of shape [N, D] of compositional data. column: The name of the numerator column to use for the transformation. - closure_target: Target row sum for closure. If None, no closure is performed. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. Returns: A series of length N containing the transforms. @@ -84,17 +86,15 @@ def single_plr_transform(df: pd.DataFrame, column: str, closure_target: Optional if idx == len(df.columns) - 1: raise InvalidColumnException("Can't select last column as numerator.") - if closure_target is not None: - # Perform closure on columns starting from numerator "to the right" - columns = df.columns[idx:].to_list() - df = perform_closure(df, columns, closure_target) + # Keep columns from idx to the right + df = df.iloc[:, idx:] - check_in_simplex_sample_space(df[columns]) + if scale is not None: + df = _closure(df, scale) - else: - check_in_simplex_sample_space(df) + check_in_simplex_sample_space(df) - return _single_plr_transform_by_index(df, idx) + return _single_plr_transform_by_index(df, 0) @beartype @@ -112,7 +112,7 @@ def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: @beartype def plr_transform( - df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Optional[Number] = None + df: pd.DataFrame, columns: Optional[Sequence[str]] = None, scale: Optional[Number] = None ) -> pd.DataFrame: """ Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. @@ -120,7 +120,8 @@ def plr_transform( Args: df: A dataframe of shape [N, D] of compositional data. columns: The names of the columns to use for the transformation. - closure_target: Target row sum for closure. If None, no closure is performed. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. Returns: A dataframe of shape [N, D-1] containing the set of PLR transformed data. @@ -137,8 +138,8 @@ def plr_transform( raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") df = df[columns] - if closure_target is not None: - df = perform_closure(df, closure_target=1) + if scale is not None: + df = _closure(df, scale) check_in_simplex_sample_space(df) diff --git a/eis_toolkit/utilities/miscellaneous.py b/eis_toolkit/utilities/miscellaneous.py index 42d526b7..7e0fcb02 100644 --- a/eis_toolkit/utilities/miscellaneous.py +++ b/eis_toolkit/utilities/miscellaneous.py @@ -375,29 +375,3 @@ def toggle_gdal_exceptions(): finally: if not already_has_exceptions_enabled: gdal.DontUseExceptions() - - -@beartype -def perform_closure( - df: pd.DataFrame, columns: Optional[Sequence[str]] = None, closure_target: Number = 1 -) -> pd.DataFrame: - """ - Peform closure on selected columns of a DataFrame. - - Values in the specified columns of each row are scaled so that they sum to 'closure_target'. - - Args: - df: Input DataFrame. - columns: Names of the columns on which to perform closure. If not provided, all columns are used. - closure_target: Row sum of the selected columns after performing closure. Defaults to 1. - - Returns: - A DataFrame where on each row the values in the selected columns sum to the closure target. - """ - if columns is None: - columns = df.columns - - row_sums = df[columns].sum(axis=1) - df[columns] = df[columns].div(row_sums, axis=0).mul(closure_target) - - return df diff --git a/tests/transformations/coda/ilr_test.py b/tests/transformations/coda/ilr_test.py index 29922c04..c1195cd9 100644 --- a/tests/transformations/coda/ilr_test.py +++ b/tests/transformations/coda/ilr_test.py @@ -15,10 +15,10 @@ def test_calculate_scaling_factor(): def test_single_ilr_transform_with_single_composition(): """Test the core functionality of a single ILR transform with a single row of data.""" - arr = np.array([80, 15, 5]) + arr = np.array([80, 15, 5]).astype(np.float64) df = pd.DataFrame(arr[None], columns=["a", "b", "c"]) - result = single_ilr_transform(df, ["a"], ["b"]) + result = single_ilr_transform(df, ["a"], ["b"], scale=100) assert result[0] == pytest.approx(1.18, abs=1e-2) result = single_ilr_transform(df, ["a", "b"], ["c"]) @@ -27,10 +27,10 @@ def test_single_ilr_transform_with_single_composition(): def test_single_ilr_transform(): """Test the core functionality of a single ILR transform.""" - arr = np.array([[80, 15, 5], [75, 18, 7]]) + arr = np.array([[80, 15, 5], [75, 18, 7]]).astype(dtype=np.float64) df = pd.DataFrame(arr, columns=["a", "b", "c"]) - result = single_ilr_transform(df, ["a"], ["b"]) + result = single_ilr_transform(df, ["a"], ["b"], scale=100) assert result[1] == pytest.approx(1.01, abs=1e-2) result = single_ilr_transform(df, ["a", "b"], ["c"]) diff --git a/tests/transformations/coda/plr_test.py b/tests/transformations/coda/plr_test.py index 372d3452..247c0182 100644 --- a/tests/transformations/coda/plr_test.py +++ b/tests/transformations/coda/plr_test.py @@ -17,7 +17,7 @@ def test_single_plr_transform_with_single_composition(): result = _single_plr_transform_by_index(df, 0) assert result[0] == pytest.approx(1.82, abs=1e-2) - result = single_plr_transform(df, "b", closure_target=100) + result = single_plr_transform(df, "b", scale=100) assert result[0] == pytest.approx(0.78, abs=1e-2) result = _single_plr_transform_by_index(df, 1) diff --git a/tests/utilities/miscellaneous_test.py b/tests/utilities/miscellaneous_test.py index bdfb3591..8178fe16 100644 --- a/tests/utilities/miscellaneous_test.py +++ b/tests/utilities/miscellaneous_test.py @@ -4,7 +4,6 @@ from eis_toolkit.exceptions import InvalidColumnIndexException from eis_toolkit.utilities.miscellaneous import ( - perform_closure, rename_columns, rename_columns_by_pattern, replace_values, @@ -72,11 +71,3 @@ def test_rename_columns_with_too_few_columns(): target_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "col3": [5, 6]}) renamed_df = rename_columns(df, colnames=colnames) pd.testing.assert_frame_equal(renamed_df, target_df) - - -def test_perform_closure(): - """Test that performing closure on a DataFrame works as expected.""" - df = pd.DataFrame({"col1": [1, 2, 1], "col2": [4, 8, 1], "col3": [3, 1, 6]}) - closured_df = perform_closure(df, columns=["col1", "col2"], closure_target=100) - expected_df = pd.DataFrame({"col1": [20.0, 20.0, 50.0], "col2": [80.0, 80.0, 50.0], "col3": [3, 1, 6]}) - pd.testing.assert_frame_equal(closured_df, expected_df) From 94991cc40c2dd9c6b6c13dd9d7f2af25f82d37fb Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 08:36:41 +0200 Subject: [PATCH 21/30] Improve documentation --- eis_toolkit/transformations/coda/alr.py | 2 +- eis_toolkit/transformations/coda/clr.py | 2 +- eis_toolkit/transformations/coda/ilr.py | 2 +- eis_toolkit/transformations/coda/plr.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index f5817fab..1f52573f 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -36,7 +36,7 @@ def alr_transform( keep_denominator_column: Whether to include the denominator column in the result. If True, the returned dataframe retains its original shape. scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. + as percentages, scale=100. Closure is not performed by default. Returns: A new dataframe containing the ALR transformed data. diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index e81a4718..5890922a 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -38,7 +38,7 @@ def clr_transform( df: A dataframe of compositional data. columns: The names of the columns to be transformed. scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. + as percentages, scale=100. Closure is not performed by default. Returns: A new dataframe containing the CLR transformed data. diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index 38ad2e1e..22518820 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -82,7 +82,7 @@ def single_ilr_transform( subcomposition_1: Names of the columns in the numerator part of the ratio. subcomposition_2: Names of the columns in the denominator part of the ratio. scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. + as percentages, scale=100. Closure is not performed by default. Returns: A series of length N containing the transforms. diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index 88d2c563..c35a1e23 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -67,7 +67,7 @@ def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] df: A dataframe of shape [N, D] of compositional data. column: The name of the numerator column to use for the transformation. scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. + as percentages, scale=100. Closure is not performed by default. Returns: A series of length N containing the transforms. @@ -121,7 +121,7 @@ def plr_transform( df: A dataframe of shape [N, D] of compositional data. columns: The names of the columns to use for the transformation. scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. + as percentages, scale=100. Closure is not performed by default. Returns: A dataframe of shape [N, D-1] containing the set of PLR transformed data. From ffa84e6d3d749ae4a1911d6a69a60dcd05fe07dc Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 09:14:09 +0200 Subject: [PATCH 22/30] Fix notebook --- .../testing_logratio_transformations.ipynb | 45 +++++-------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/notebooks/testing_logratio_transformations.ipynb b/notebooks/testing_logratio_transformations.ipynb index 4ca0dcd3..53e18c74 100644 --- a/notebooks/testing_logratio_transformations.ipynb +++ b/notebooks/testing_logratio_transformations.ipynb @@ -680,21 +680,6 @@ "## Testing with example data" ] }, - { - "cell_type": "code", - "execution_count": 18, - "id": "f49926c2-f1dd-47e8-a484-f78ce6821904", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Define some constants\n", - "\n", - "ppm = 1e-6\n", - "million = 1e6" - ] - }, { "cell_type": "code", "execution_count": 19, @@ -822,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 26, "id": "75728aa4-5b2e-46b6-9511-1250bf4b13ae", "metadata": { "tags": [] @@ -838,12 +823,14 @@ "df_clr = df.copy()\n", "df_plr = df.copy()\n", "\n", - "# As real world geochemical data will often not satisfy sum to a constant, for example 100, a closure needs to be performed\n", + "# As real world geochemical data will often not satisfy sum to a constant, a closure needs to be performed by providing the \"scale\" parameter.\n", + "# In this example, as the example data are in ppm, let's define a scaling factor 1e6\n", + "million = 1e6\n", "\n", - "df_alr = alr_transform(df_alr, closure_target=100)\n", - "df_alr_Mg = alr_transform(df_alr_Mg, denominator_column=\"Mg_ppm_511\", closure_target=100)\n", - "df_clr = clr_transform(df_clr, closure_target=100)\n", - "df_plr = plr_transform(df_plr, closure_target=100)\n", + "df_alr = alr_transform(df_alr, scale=million)\n", + "df_alr_Mg = alr_transform(df_alr_Mg, denominator_column=\"Mg_ppm_511\", scale=million)\n", + "df_clr = clr_transform(df_clr, scale=million)\n", + "df_plr = plr_transform(df_plr, scale=million)\n", "\n", "df_clr_inv = inverse_clr(df_clr, scale=million)\n", "df_alr_inv = inverse_alr(df_alr, \"c\", scale=million)\n", @@ -851,8 +838,8 @@ "df_ilr_Al_Ca = df.copy()\n", "df_ilr_AlCa_FeMg = df.copy()\n", "\n", - "df_ilr_Al_Ca = single_ilr_transform(df_ilr_Al_Ca, [\"Al_ppm_511\"], [\"Ca_ppm_511\"], closure_target=100)\n", - "df_ilr_AlCa_FeMg = single_ilr_transform(df_ilr_AlCa_FeMg, [\"Al_ppm_511\", \"Ca_ppm_511\"], [\"Fe_ppm_511\", \"Mg_ppm_511\"], closure_target=100)" + "df_ilr_Al_Ca = single_ilr_transform(df_ilr_Al_Ca, [\"Al_ppm_511\"], [\"Ca_ppm_511\"], scale=million)\n", + "df_ilr_AlCa_FeMg = single_ilr_transform(df_ilr_AlCa_FeMg, [\"Al_ppm_511\", \"Ca_ppm_511\"], [\"Fe_ppm_511\", \"Mg_ppm_511\"], scale=million)" ] }, { @@ -969,19 +956,11 @@ "source": [ "df_alr_Mg.head()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b6a1929-51ef-4b7a-8621-f46bbe337e31", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "eis-toolkit-l5cKD1lZ-py3.10", "language": "python", "name": "python3" }, @@ -995,7 +974,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.15" } }, "nbformat": 4, From 662e73ecab5cef28e127269fdd1a8db755056457 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:52:31 +0200 Subject: [PATCH 23/30] Clean code --- eis_toolkit/transformations/coda/alr.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 1f52573f..8e43224f 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -115,10 +115,6 @@ def inverse_alr( invalid_columns = [col for col in columns if col not in df.columns] if invalid_columns: raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") - columns_to_transform = columns - else: - columns_to_transform = df.columns.to_list() - - df = df[columns_to_transform] + df = df[columns] return _inverse_alr(df, denominator_column, scale) From 6f4469ab1091d267990cc924d5e443e6689f1ee6 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:53:14 +0200 Subject: [PATCH 24/30] Clean code --- eis_toolkit/transformations/coda/clr.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index 5890922a..bd7c4940 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -55,8 +55,6 @@ def clr_transform( raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") columns_to_transform = columns df = df[columns_to_transform] - else: - columns_to_transform = df.columns.to_list() if scale is not None: df = _closure(df, scale) @@ -102,14 +100,9 @@ def inverse_clr( invalid_columns = [col for col in columns if col not in df.columns] if invalid_columns: raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") - columns_to_transform = columns - else: - columns_to_transform = df.columns.to_list() - - dfc = df.copy() - dfc = dfc[columns_to_transform] + df = df[columns] - inverse_data = _inverse_clr(dfc, scale) + inverse_data = _inverse_clr(df, scale) if colnames: return rename_columns(inverse_data, colnames) From a51341889e1304733e328a5c675e0a65d0d19030 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:54:10 +0200 Subject: [PATCH 25/30] Select columns before performing closure --- eis_toolkit/transformations/coda/ilr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index 22518820..9891ef31 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -104,6 +104,9 @@ def single_ilr_transform( if check_lists_overlap(subcomposition_1, subcomposition_2): raise InvalidCompositionException("The subcompositions overlap.") + columns = subcomposition_1 + subcomposition_2 + df = df[columns] + if scale is not None: df = _closure(df, scale) From 0000746665c26d774e4a9ebbe60d116d854f2555 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:54:41 +0200 Subject: [PATCH 26/30] Fix test --- tests/utilities/compositional_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utilities/compositional_test.py b/tests/utilities/compositional_test.py index d7613f7e..e7456c81 100644 --- a/tests/utilities/compositional_test.py +++ b/tests/utilities/compositional_test.py @@ -35,7 +35,7 @@ def test_compositional_data_has_negatives(): with pytest.raises(NumericValueSignException): clr_transform(df) with pytest.raises(NumericValueSignException): - single_ilr_transform(df, ["a"], ["b"]) + single_ilr_transform(df, ["a"], ["b", "c"], scale=100) with pytest.raises(NumericValueSignException): plr_transform(df) with pytest.raises(NumericValueSignException): From 8e0994723af19fd9ffddafe1532cbe568f5012b1 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:55:40 +0200 Subject: [PATCH 27/30] Add checks, fix logic, add tests --- eis_toolkit/transformations/coda/plr.py | 43 +++++++++++++++++++------ tests/transformations/coda/plr_test.py | 16 +++++++-- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index c35a1e23..e41faf5e 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -54,7 +54,12 @@ def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Seri @beartype -def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] = None) -> pd.Series: +def single_plr_transform( + df: pd.DataFrame, + numerator: str, + denominator_columns: Optional[Sequence[str]] = None, + scale: Optional[Number] = None, +) -> pd.Series: """ Perform a pivot logratio transformation on the selected column. @@ -65,7 +70,9 @@ def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] Args: df: A dataframe of shape [N, D] of compositional data. - column: The name of the numerator column to use for the transformation. + numerator: The name of the numerator column to use for the transformation. + denominator_columns: The names of the columns to use for the transformation. Must be "to the right" of + the numerator column. scale: The value to which each composition should be normalized. Eg., if the composition is expressed as percentages, scale=100. Closure is not performed by default. Returns: @@ -74,20 +81,36 @@ def single_plr_transform(df: pd.DataFrame, column: str, scale: Optional[Number] Raises: InvalidColumnException: The input column isn't found in the dataframe, or there are no columns to the right of the given column, or last column selected as numerator, or selected numerator - is in denominators. + is in denominator columns, or one or more denominator columns is left of numerator column. InvalidCompositionException: Data is not normalized to the expected value. NumericValueSignException: Data contains zeros or negative values. """ + if numerator not in df.columns: + raise InvalidColumnException(f"The numerator column {numerator} was not found in the dataframe.") - if column not in df.columns: - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - - idx = df.columns.get_loc(column) - if idx == len(df.columns) - 1: + numerator_idx = df.columns.get_loc(numerator) + if numerator_idx == len(df.columns) - 1: raise InvalidColumnException("Can't select last column as numerator.") - # Keep columns from idx to the right - df = df.iloc[:, idx:] + if denominator_columns is not None: + for column in denominator_columns: + if column not in df.columns: + raise InvalidColumnException(f"The column {column} was not found in the dataframe.") + + if numerator in denominator_columns: + raise InvalidColumnException(f"The column {numerator} is in the denominator columns.") + + for column in denominator_columns: + column_idx = df.columns.get_loc(column) + if column_idx < numerator_idx: + raise InvalidColumnException(f"The column {column} is to the left of the numerator column {numerator}.") + else: + # Select all columns to the right of the numerator + denominator_columns = df.columns[numerator_idx + 1 :].to_list() + + # Keep columns from numerator_idx to the right + columns = [numerator] + denominator_columns + df = df.loc[:, columns] if scale is not None: df = _closure(df, scale) diff --git a/tests/transformations/coda/plr_test.py b/tests/transformations/coda/plr_test.py index 247c0182..5e8c567b 100644 --- a/tests/transformations/coda/plr_test.py +++ b/tests/transformations/coda/plr_test.py @@ -40,8 +40,8 @@ def test_single_plr_transform_with_last_column(): single_plr_transform(df, "c") -def test_single_plr_invalid_column(): - """Test that invalid column name raises exceptions.""" +def test_single_plr_invalid_columns(): + """Test that invalid column names raise exceptions.""" arr = np.array([[80, 15, 5], [75, 18, 7]]) df = pd.DataFrame(arr, columns=["a", "b", "c"]) @@ -49,6 +49,18 @@ def test_single_plr_invalid_column(): with pytest.raises(InvalidColumnException): single_plr_transform(df, "x") + # A denominator columnnot in df + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "a", "x") + + # Numerator in denominator columns + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "a", ["a", "b"]) + + # A denominator column is to the left of numerator column + with pytest.raises(InvalidColumnException): + single_plr_transform(df, "b", ["a", "c"]) + def test_plr_transform(): """Test PLR transform core functionality.""" From 3d56378ae72204042b2a7550403d84928413e8e1 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:56:37 +0200 Subject: [PATCH 28/30] Add scale parameter to CoDa CLI functions --- eis_toolkit/cli.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index af3cfad2..f507df12 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3079,6 +3079,7 @@ def alr_transform_cli( columns: Annotated[List[str], typer.Option()] = None, denominator_column: str = None, keep_denominator_column: bool = False, + scale: Optional[float] = None, ): """Perform an additive logratio transformation on the data.""" from eis_toolkit.transformations.coda.alr import alr_transform @@ -3091,7 +3092,11 @@ def alr_transform_cli( typer.echo("Progress: 25%") out_df = alr_transform( - df=df, columns=columns, denominator_column=denominator_column, keep_denominator_column=keep_denominator_column + df=df, + columns=columns, + denominator_column=denominator_column, + keep_denominator_column=keep_denominator_column, + scale=scale, ) typer.echo("Progess 75%") @@ -3135,6 +3140,7 @@ def clr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, columns: Annotated[List[str], typer.Option()] = None, + scale: Optional[float] = None, ): """Perform a centered logratio transformation on the data.""" from eis_toolkit.transformations.coda.clr import clr_transform @@ -3146,7 +3152,7 @@ def clr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = clr_transform(df=df, columns=columns) + out_df = clr_transform(df=df, columns=columns, scale=scale) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) @@ -3190,6 +3196,7 @@ def single_ilr_transform_cli( output_vector: OUTPUT_FILE_OPTION, subcomposition_1: Annotated[List[str], typer.Option()], subcomposition_2: Annotated[List[str], typer.Option()], + scale: Optional[float] = None, ): """Perform a single isometric logratio transformation on the provided subcompositions.""" from eis_toolkit.transformations.coda.ilr import single_ilr_transform @@ -3201,7 +3208,9 @@ def single_ilr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_series = single_ilr_transform(df=df, subcomposition_1=subcomposition_1, subcomposition_2=subcomposition_2) + out_series = single_ilr_transform( + df=df, subcomposition_1=subcomposition_1, subcomposition_2=subcomposition_2, scale=scale + ) typer.echo("Progess 75%") # NOTE: Output of pairwise_logratio might be changed to DF in the future, to automatically do the following @@ -3247,7 +3256,8 @@ def single_plr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, numerator: str = typer.Option(), - denominator: Annotated[List[str], typer.Option()] = None, + denominator_columns: Annotated[List[str], typer.Option()] = None, + scale: Optional[float] = None, ): """Perform a pivot logratio transformation on the selected column.""" from eis_toolkit.transformations.coda.plr import single_plr_transform @@ -3259,7 +3269,7 @@ def single_plr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_series = single_plr_transform(df=df, numerator=numerator, denominators=denominator) + out_series = single_plr_transform(df=df, numerator=numerator, denominator_columns=denominator_columns, scale=scale) typer.echo("Progess 75%") # NOTE: Output of single_plr_transform might be changed to DF in the future, to automatically do the following @@ -3276,6 +3286,7 @@ def plr_transform_cli( input_vector: INPUT_FILE_OPTION, output_vector: OUTPUT_FILE_OPTION, columns: Annotated[List[str], typer.Option()] = None, + scale: Optional[float] = None, ): """Perform a pivot logratio transformation on the selected columns.""" from eis_toolkit.transformations.coda.plr import plr_transform @@ -3287,7 +3298,7 @@ def plr_transform_cli( df = pd.DataFrame(gdf.drop(columns="geometry")) typer.echo("Progress: 25%") - out_df = plr_transform(df=df, columns=columns) + out_df = plr_transform(df=df, columns=columns, scale=scale) typer.echo("Progess 75%") out_gdf = gpd.GeoDataFrame(out_df, geometry=geometries) From 1bf85a9bcc07c92911caa1c8d9c5dc6db4290a43 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Thu, 19 Dec 2024 11:57:12 +0200 Subject: [PATCH 29/30] Fix notebook --- .../testing_logratio_transformations.ipynb | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/notebooks/testing_logratio_transformations.ipynb b/notebooks/testing_logratio_transformations.ipynb index 53e18c74..e5dec896 100644 --- a/notebooks/testing_logratio_transformations.ipynb +++ b/notebooks/testing_logratio_transformations.ipynb @@ -88,7 +88,18 @@ "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mika/code/EIS/eis_toolkit/notebooks/../eis_toolkit/utilities/aitchison_geometry.py:43: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '84.21052631578948' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\n", + " dfc.iloc[idx] = _normalize(row, scale) if scale is not None else _normalize(row)\n", + "/home/mika/code/EIS/eis_toolkit/notebooks/../eis_toolkit/utilities/aitchison_geometry.py:43: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '15.789473684210527' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\n", + " dfc.iloc[idx] = _normalize(row, scale) if scale is not None else _normalize(row)\n" + ] + } + ], "source": [ "pair_a_b = single_pairwise_logratio(float(C.iloc[0, 0]), float(C.iloc[0, 1]))\n", "pair_a_c = single_pairwise_logratio(float(C.iloc[0, 0]), float(C.iloc[0, 2]))\n", @@ -101,7 +112,7 @@ "C_clr_inv = inverse_clr(C_clr, scale=100.0)\n", "C_alr_inv = inverse_alr(C_alr, \"c\", scale=100)\n", "\n", - "C_ilr_ab = single_ilr_transform(C, [\"a\"], [\"b\"])\n", + "C_ilr_ab = single_ilr_transform(C, [\"a\"], [\"b\"], scale=100)\n", "C_ilr_ab_c = single_ilr_transform(C, [\"a\", \"b\"], [\"c\"])" ] }, @@ -682,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "81a98117-b981-47ea-a7bb-ba06c0dacb13", "metadata": { "tags": [] @@ -696,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "c0204220-7bf2-4235-b92a-0e139180050e", "metadata": { "tags": [] @@ -710,7 +721,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "e1bda63b-ab9b-4060-90d5-7520952f2e3a", "metadata": { "tags": [] @@ -792,7 +803,7 @@ "4 12500.0 3600.0 31500.0 8020.0" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -807,7 +818,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "id": "75728aa4-5b2e-46b6-9511-1250bf4b13ae", "metadata": { "tags": [] @@ -844,7 +855,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "e136d05d-671d-420f-95b9-5f350bc7a94c", "metadata": { "tags": [] @@ -861,7 +872,7 @@ "dtype: float64" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -872,7 +883,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "ad352680-433a-4026-b7b5-560b682dfb96", "metadata": { "tags": [] @@ -948,7 +959,7 @@ "4 0.443790 -0.801005 1.368049" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } From 00268567c1076681d7383410b9ccee0b1a7d5ff9 Mon Sep 17 00:00:00 2001 From: msorvoja Date: Tue, 7 Jan 2025 13:08:30 +0200 Subject: [PATCH 30/30] Fix missing denominator column in ALR Previously, if denominator column was not in selected columns, it was removed before the transformation. --- eis_toolkit/transformations/coda/alr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index 8e43224f..8a16d2e3 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -62,7 +62,12 @@ def alr_transform( if invalid_columns: raise InvalidColumnException(f"The following columns were not found in the dataframe: {invalid_columns}.") columns_to_transform = columns - df = df[columns_to_transform] + + if denominator_column not in columns_to_transform: + df = df[columns_to_transform + [denominator_column]] + else: + df = df[columns_to_transform] + else: columns_to_transform = df.columns.to_list()