diff --git a/src/brainreg/utils/misc.py b/src/brainreg/utils/misc.py index 576730c..2ebaa77 100644 --- a/src/brainreg/utils/misc.py +++ b/src/brainreg/utils/misc.py @@ -2,6 +2,8 @@ from argparse import Namespace from pathlib import PurePath +import pandas as pd + def get_arg_groups(args, parser): arg_groups = {} @@ -24,3 +26,28 @@ def serialise(obj): def log_metadata(file_path, args): with open(file_path, "w") as f: json.dump(args, f, default=serialise) + + +def safe_pandas_concat(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: + """ + Concatenate two DataFrames without relying on deprecated functionality + when one of the DataFrames is empty. + + If df1 and df2 are non-empty, return the concatenation. + If df1 is empty and df2 is not, return a copy of df2. + If df1 is non-empty and df2 is, return a copy of df1. + If df1 and df2 are empty, return an empty DataFrame with the same column + names as df1. + + :param df1: DataFrame to concatenate. + :param df2: DataFrame to concatenate. + :returns: DataFrame formed from concatenation of df1 and df2. + """ + if df1.empty and df2.empty: + return pd.DataFrame(columns=df1.columns) + elif df1.empty: + return df2.copy() + elif df2.empty: + return df1.copy() + else: + return pd.concat([df1, df2], ignore_index=True) diff --git a/src/brainreg/utils/volume.py b/src/brainreg/utils/volume.py index 3409286..6dbed1a 100644 --- a/src/brainreg/utils/volume.py +++ b/src/brainreg/utils/volume.py @@ -12,6 +12,8 @@ import pandas as pd from brainglobe_utils.pandas.misc import initialise_df +from .misc import safe_pandas_concat + class UnknownAtlasValue(Exception): pass @@ -100,7 +102,7 @@ def add_structure_volume_to_df( "total_volume_mm3": [left_volume + right_volume], } ) - df = pd.concat([df, df_new_row], ignore_index=True) + df = safe_pandas_concat(df, df_new_row) return df diff --git a/tests/tests/test_unit/test_misc.py b/tests/tests/test_unit/test_misc.py new file mode 100644 index 0000000..3f1956c --- /dev/null +++ b/tests/tests/test_unit/test_misc.py @@ -0,0 +1,21 @@ +import pandas as pd +from brainreg.utils.misc import safe_pandas_concat + + +def test_safe_pandas_concat() -> None: + """ + Test the following: + - Non-empty dataframes are concatenated as expected, + - When one dataframe is empty, the other is returned, + - When both dataframes are empty, an empty dataframe with + the corresponding columns is returned. + """ + df1 = pd.DataFrame(data={"a": [1], "b": [2], "c": [3]}) + df2 = pd.DataFrame(data={"a": [4], "b": [5], "c": [6]}) + empty_df = pd.DataFrame(columns=["a", "b", "c"]) + combined_df = pd.DataFrame(data={"a": [1, 4], "b": [2, 5], "c": [3, 6]}) + + assert combined_df.equals(safe_pandas_concat(df1, df2)) + assert df1.equals(safe_pandas_concat(df1, empty_df)) + assert df2.equals(safe_pandas_concat(empty_df, df2)) + assert empty_df.equals(safe_pandas_concat(empty_df, empty_df))