Skip to content

Commit

Permalink
Merge pull request #307 from cschlaffner/215-select-columns-when-impo…
Browse files Browse the repository at this point in the history
…rting-metadata

215 select columns when importing metadata
  • Loading branch information
RogerAK authored Nov 20, 2023
2 parents 74ce2cf + fae37c1 commit af789f7
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 10 deletions.
5 changes: 5 additions & 0 deletions protzilla/constants/location_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
"metadata_import",
"metadata_import_method",
): metadata_import.metadata_import_method,
(
"importing",
"metadata_import",
"metadata_column_assignment",
): metadata_import.metadata_column_assignment,
("importing", "peptide_import", "peptide_import"): peptide_import.peptide_import,
(
"data_preprocessing",
Expand Down
44 changes: 35 additions & 9 deletions protzilla/constants/workflow_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,32 @@
"default": "Columns (samples in rows, features in columns)"
}
}
},
"metadata_column_assignment": {
"name": "Metadata Column Assignment",
"description": "Assign columns to metadata categories, repeatable for each category",
"parameters": {
"metadata_df": {
"type": "empty",
"name": "metadata_df",
"default": null
},
"metadata_required_column": {
"name": "Missing, but required Metadata Columns",
"fill": "metadata_required_columns",
"type": "categorical",
"categories": [],
"default": null
},
"metadata_unknown_column": {
"name": "Existing, but unknown Metadata Columns",
"fill": "metadata_unknown_columns",
"type": "categorical",
"categories": [],
"default": null
}

}
}
},
"peptide_import": {
Expand Down Expand Up @@ -807,7 +833,7 @@
"grouping": {
"name": "Grouping:",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"selected_groups"
],
Expand Down Expand Up @@ -873,7 +899,7 @@
"grouping": {
"name": "Grouping",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"group1",
"group2"
Expand Down Expand Up @@ -939,7 +965,7 @@
"grouping": {
"name": "Grouping",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"group1",
"group2"
Expand Down Expand Up @@ -1172,7 +1198,7 @@
"labels_column": {
"name": "Choose labels column from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"positive_label"
],
Expand Down Expand Up @@ -1308,7 +1334,7 @@
"labels_column": {
"name": "Choose labels column from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"positive_label"
],
Expand Down Expand Up @@ -1444,7 +1470,7 @@
"labels_column": {
"name": "Choose labels column from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"positive_label"
],
Expand Down Expand Up @@ -1565,7 +1591,7 @@
"labels_column": {
"name": "Choose labels column from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"positive_label"
],
Expand Down Expand Up @@ -1771,7 +1797,7 @@
"labels_column": {
"name": "Choose labels column from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"categories": [],
"default": null
},
Expand Down Expand Up @@ -2520,7 +2546,7 @@
"grouping": {
"name": "Grouping from metadata",
"type": "categorical",
"fill": "metadata_columns",
"fill": "metadata_non_sample_columns",
"fill_dynamic": [
"group1",
"group2"
Expand Down
39 changes: 39 additions & 0 deletions protzilla/importing/metadata_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,42 @@ def metadata_import_method(df, file_path, feature_orientation):
os.remove(file_path)

return df, {"metadata": meta_df}


def metadata_column_assignment(
df: pd.DataFrame,
metadata_df: pd.DataFrame,
metadata_required_column: str = None,
metadata_unknown_column: str = None,
):
"""
This function renames a column in the metadata dataframe to the required column name.
:param df: this is passed for consistency, but not used
:type df: pandas DataFrame
:param metadata_df: the metadata dataframe to be changed
:type metadata_df: float
:param metadata_required_column: the name of the column in the dataframe that is used for the metadata assignment
:type metadata_df: str
:param metadata_unknown_column: the name of the column in the metadata dataframe that is renamed to the
required column name
:type metadata_unknown_column: str
:return: returns the unchanged dataframe and a dict with messages, potentially empty if no messages
:rtype: pd.DataFrame, dict
"""

# TODO add info box in UI explaining that no option for unknown columns means all columns are named correctly
# check if required column already in metadata, if so give error message
if metadata_required_column is None or metadata_unknown_column is None:
msg = f"You can proceed, as there is nothing that needs to be changed."
return df, dict(messages=[dict(level=messages.INFO, msg=msg)])

if metadata_required_column in metadata_df.columns:
msg = f"Metadata already contains column '{metadata_required_column}'. \
Please rename the column or select another column."
return df, dict(messages=[dict(level=messages.ERROR, msg=msg)])
# rename given in metadata_sample_column column to "Sample" if it is called otherwise
renamed_metadata_df = metadata_df.rename(
columns={metadata_unknown_column: metadata_required_column}, inplace=True
)
return df, dict()
18 changes: 17 additions & 1 deletion protzilla/run_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,27 @@ def insert_special_params(param_dict, run):
param_dict["outputs"].sort()

if "fill" in param_dict:
if param_dict["fill"] == "metadata_columns":
if param_dict["fill"] == "metadata_non_sample_columns":
# Sample not needed for anova and t-test
param_dict["categories"] = run.metadata.columns[
run.metadata.columns != "Sample"
].unique()
elif param_dict["fill"] == "metadata_unknown_columns":
# give selection of existing columns without ["Sample", "Group", "Batch"]
# as they are already named correctly for our purposes
param_dict["categories"] = run.metadata.columns[
~run.metadata.columns.isin(["Sample", "Group", "Batch"])
].unique()

elif param_dict["fill"] == "metadata_required_columns":
# TODO add other possible metadata columns
# exclude columns that are already in metadata and known to be required
param_dict["categories"] = [
col
for col in ["Sample", "Group", "Batch"]
if col not in run.metadata.columns
]

elif param_dict["fill"] == "metadata_column_data":
# per default fill with second column data since it is selected in dropdown
param_dict["categories"] = run.metadata.iloc[:, 1].unique()
Expand Down
44 changes: 44 additions & 0 deletions tests/protzilla/importing/test_metadata_import.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from shutil import rmtree

import pandas as pd
from django.contrib import messages

from protzilla.constants.paths import PROJECT_PATH, RUNS_PATH
from protzilla.importing import metadata_import
Expand Down Expand Up @@ -42,3 +43,46 @@ def test_metadata_orientation():
pd.testing.assert_frame_equal(run1.metadata, run2.metadata)
rmtree(RUNS_PATH / name1)
rmtree(RUNS_PATH / name2)


def test_metadata_column_assignment():
name = "test_run" + random_string()
run = Run.create(name)
run.step_index += 1
run.calculate_and_next(
metadata_import.metadata_import_method,
file_path=f"{PROJECT_PATH}/tests/metadata_cut_columns.csv",
feature_orientation="Columns (samples in rows, features in columns)",
)
# this is a workaround because the metadata is not passed properly using calculate_and_next,
# TODO but it works in the UI, it would be better to fix this
metadata_import.metadata_column_assignment(
df=run.df,
metadata_df=run.metadata,
metadata_required_column="Sample_renamed",
metadata_unknown_column="Sample",
)
assert run.metadata.columns[0] == "Sample_renamed"
metadata_import.metadata_column_assignment(
df=run.df,
metadata_df=run.metadata,
metadata_required_column="Sample",
metadata_unknown_column="Sample_renamed",
)
assert run.metadata.columns[0] == "Sample"
df, out = metadata_import.metadata_column_assignment(
df=run.df,
metadata_df=run.metadata,
metadata_required_column="Group",
metadata_unknown_column="Sample",
)
assert out["messages"][0]["level"] == messages.ERROR
assert out["messages"][0]["msg"]
df_new, out_new = metadata_import.metadata_column_assignment(
df=run.df,
metadata_df=run.metadata,
metadata_required_column="",
metadata_unknown_column="",
)

rmtree(RUNS_PATH / name)

0 comments on commit af789f7

Please sign in to comment.