Skip to content

Commit

Permalink
Merge pull request #8 from brainglobe/safe-concat
Browse files Browse the repository at this point in the history
Add safe pandas concat function
  • Loading branch information
willGraham01 authored Nov 1, 2023
2 parents d5c1923 + b3e1334 commit 2b94f89
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
23 changes: 23 additions & 0 deletions brainglobe_utils/pandas/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,26 @@ def regex_remove_df_columns(df, search_string_list):
for search_string in search_string_list:
df = df.drop(df.filter(regex=search_string).columns, axis=1)
return df


def safe_pandas_concat(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
"""
Concatenate two DataFrames without relying on deprecated functionality
when one of the DataFrames is empty.
If df1 and df2 are non-empty, return the concatenation.
If df1 is empty and df2 is not, return a copy of df2.
If df1 is non-empty and df2 is, return a copy of df1.
If df1 and df2 are empty, return an empty DataFrame with the same column
names as df1.
:param df1: DataFrame to concatenate.
:param df2: DataFrame to concatenate.
:returns: DataFrame formed from concatenation of df1 and df2.
"""
if df1.empty and df2.empty:
return pd.DataFrame(columns=df1.columns)
elif df1.empty:
return df2.copy()
elif df2.empty:
return df1.copy()
else:
return pd.concat([df1, df2], ignore_index=True)
19 changes: 19 additions & 0 deletions tests/tests/test_unit/test_pandas/test_pandas_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,22 @@ def test_move_column_first():

with pytest.raises(ValueError):
pandas_misc.move_column_first(df_with_nan, columns)


def test_safe_pandas_concat() -> None:
"""
Test the following:
- Non-empty dataframes are concatenated as expected,
- When one dataframe is empty, the other is returned,
- When both dataframes are empty, an empty dataframe with
the corresponding columns is returned.
"""
df1 = pd.DataFrame(data={"a": [1], "b": [2], "c": [3]})
df2 = pd.DataFrame(data={"a": [4], "b": [5], "c": [6]})
empty_df = pd.DataFrame(columns=["a", "b", "c"])
combined_df = pd.DataFrame(data={"a": [1, 4], "b": [2, 5], "c": [3, 6]})

assert combined_df.equals(pandas_misc.safe_pandas_concat(df1, df2))
assert df1.equals(pandas_misc.safe_pandas_concat(df1, empty_df))
assert df2.equals(pandas_misc.safe_pandas_concat(empty_df, df2))
assert empty_df.equals(pandas_misc.safe_pandas_concat(empty_df, empty_df))

0 comments on commit 2b94f89

Please sign in to comment.