diff --git a/src/rcx_tk/process_metadata_file.py b/src/rcx_tk/process_metadata_file.py index 6e5cf4b..813a331 100644 --- a/src/rcx_tk/process_metadata_file.py +++ b/src/rcx_tk/process_metadata_file.py @@ -64,8 +64,8 @@ def process_metadata(df: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: A metadata dataframe with rearranged and newly derived columns. """ df = rearrange_columns(df) - validateFileNames(df) - validateInjectionOrder(df) + validate_filenames_column(df) + validate_injection_order(df) df = derive_additional_metadata(df) df = cleanup(df) return df @@ -84,7 +84,7 @@ def cleanup(df: pd.DataFrame) -> pd.DataFrame: df.insert(0, "sampleName", column_to_move) return df -def validateInjectionOrder(df: pd.DataFrame) -> bool: +def validate_injection_order(df: pd.DataFrame) -> bool: """Validates if injectionOrder is of integer type. Args: @@ -104,10 +104,10 @@ def derive_additional_metadata(df: pd.DataFrame) -> pd.DataFrame: Returns: pd.DataFrame: The processed dataframe. """ - df['sampleName'] = df['File name'].apply(replace_fileName) - df['sequenceIdentifier'] = df['File name'].apply(add_sequenceIdentifier) - df['subjectIdentifier'] = df['File name'].apply(add_subjectIdentifier) - df['localOrder'] = df['File name'].apply(add_localOrder) + df['sampleName'] = df['File name'].apply(replace_spaces) + df['sequenceIdentifier'] = df['File name'].apply(add_sequence_identifier) + df['subjectIdentifier'] = df['File name'].apply(add_subject_identifier) + df['localOrder'] = df['File name'].apply(add_local_order) return df def rearrange_columns(df: pd.DataFrame) -> pd.DataFrame: @@ -138,7 +138,7 @@ def rearrange_columns(df: pd.DataFrame) -> pd.DataFrame: return df -def validateFileNames(df: pd.DataFrame) -> None: +def validate_filenames_column(df: pd.DataFrame) -> None: """Validates the file names. Args: @@ -150,7 +150,7 @@ def validateFileNames(df: pd.DataFrame) -> None: if not df['File name'].apply(validate_filename).all(): raise ValueError("Invalid File name.") -def replace_fileName(file_name: str) -> str: +def replace_spaces(file_name: str) -> str: """Replaces spaces with underscores in Filename. Args: @@ -194,7 +194,7 @@ def is_not_empty(x: str) -> bool: -def add_localOrder(file_name: str) -> int: +def add_local_order(file_name: str) -> int: """Returns the localOrder value, i.e. the last n-digits after the last underscore. Args: @@ -206,7 +206,7 @@ def add_localOrder(file_name: str) -> int: _, b = separate_filename(file_name) return(int(b)) -def add_sequenceIdentifier(file_name: str) -> str: +def add_sequence_identifier(file_name: str) -> str: """Returns the sequenceIdentifier value, i.e. everything before last _[digits]. Args: @@ -232,7 +232,7 @@ def separate_filename(file_name: str) -> Tuple[str, str]: a, b = re.findall(r'(.*(?:\D|^))(\d+)', file_name)[0] return (a, b) -def add_subjectIdentifier(file_name: str) -> str: +def add_subject_identifier(file_name: str) -> str: """Returns the subjectIdentifier value, i.e. everything between [digit_] and [_digit]. Args: diff --git a/tests/test_process_metadata_file.py b/tests/test_process_metadata_file.py index 6ba8c12..eb59eb4 100644 --- a/tests/test_process_metadata_file.py +++ b/tests/test_process_metadata_file.py @@ -3,17 +3,17 @@ from typing import Final import pandas as pd import pytest -from rcx_tk.process_metadata_file import add_localOrder -from rcx_tk.process_metadata_file import add_sequenceIdentifier -from rcx_tk.process_metadata_file import add_subjectIdentifier +from rcx_tk.process_metadata_file import add_local_order +from rcx_tk.process_metadata_file import add_sequence_identifier +from rcx_tk.process_metadata_file import add_subject_identifier from rcx_tk.process_metadata_file import process_alkane_ri_file from rcx_tk.process_metadata_file import process_metadata_file from rcx_tk.process_metadata_file import read_file -from rcx_tk.process_metadata_file import replace_fileName +from rcx_tk.process_metadata_file import replace_spaces from rcx_tk.process_metadata_file import save_dataframe_as_tsv from rcx_tk.process_metadata_file import separate_filename from rcx_tk.process_metadata_file import validate_filename -from rcx_tk.process_metadata_file import validateInjectionOrder +from rcx_tk.process_metadata_file import validate_injection_order __location__: Final[Path] = Path(__file__).parent.resolve() @@ -300,7 +300,7 @@ def test_add_localOrder(file_name: str, expected: int): file_name (str): The filename. expected (int): The localOrder value. """ - actual = add_localOrder(file_name) + actual = add_local_order(file_name) assert actual == expected @pytest.mark.parametrize("file_name, expected", [ @@ -314,7 +314,7 @@ def test_add_sequenceIdentifier(file_name: str, expected: str): file_name (str): The filename. expected (str): The sequenceIdentifier value. """ - actual = add_sequenceIdentifier(file_name) + actual = add_sequence_identifier(file_name) assert actual == expected @pytest.mark.parametrize("file_name, expected", [ @@ -329,7 +329,7 @@ def test_add_subjectIdentifier(file_name: str, expected: str): file_name (str): The filename. expected (str): The subjectIdentifier value. """ - actual = add_subjectIdentifier(file_name) + actual = add_subject_identifier(file_name) assert actual == expected @pytest.mark.parametrize("file_name, expected", [ @@ -343,7 +343,7 @@ def test_replace_fileName(file_name: str, expected: str): file_name (str): The filename. expected (str): The filename with replaced spaces by underscores. """ - actual = replace_fileName(file_name) + actual = replace_spaces(file_name) assert actual == expected @@ -372,5 +372,5 @@ def test_validateInjectionOrder(dataFrame: pd.DataFrame, expected: bool): dataFrame (pd.DataFrame): A dataframe with injection order. expected (bool): Whether it is of integer (True) or other data type (False) """ - actual = validateInjectionOrder(dataFrame) + actual = validate_injection_order(dataFrame) assert expected == actual