diff --git a/docs/conf.py b/docs/conf.py index 7faedfd..c796d1b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,9 +17,9 @@ # -- Project information ----------------------------------------------------- -project = u"rcx_tk" -copyright = u"2024, RECETOX, Masaryk University" -author = u"Zargham Ahmad" +project = "rcx_tk" +copyright = "2024, RECETOX, Masaryk University" +author = "Zargham Ahmad" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -61,7 +61,7 @@ # -- Use autoapi.extension to run sphinx-apidoc ------- -autoapi_dirs = ['../src/rcx_tk'] +autoapi_dirs = ["../src/rcx_tk"] # -- Options for HTML output ---------------------------------------------- @@ -78,11 +78,12 @@ # -- Options for Intersphinx -intersphinx_mapping = {'python': ('https://docs.python.org/3', None), - # Commonly used libraries, uncomment when used in package - # 'numpy': ('http://docs.scipy.org/doc/numpy/', None), - # 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), - # 'scikit-learn': ('https://scikit-learn.org/stable/', None), - # 'matplotlib': ('https://matplotlib.org/stable/', None), - # 'pandas': ('http://pandas.pydata.org/docs/', None), - } +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + # Commonly used libraries, uncomment when used in package + # 'numpy': ('http://docs.scipy.org/doc/numpy/', None), + # 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), + # 'scikit-learn': ('https://scikit-learn.org/stable/', None), + # 'matplotlib': ('https://matplotlib.org/stable/', None), + # 'pandas': ('http://pandas.pydata.org/docs/', None), +} diff --git a/src/rcx_tk/__init__.py b/src/rcx_tk/__init__.py index 9140d26..9dea85a 100644 --- a/src/rcx_tk/__init__.py +++ b/src/rcx_tk/__init__.py @@ -1,4 +1,5 @@ """Documentation about rcx_tk.""" + import logging logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/src/rcx_tk/process_metadata_file.py b/src/rcx_tk/process_metadata_file.py index bf075ec..0e534d6 100644 --- a/src/rcx_tk/process_metadata_file.py +++ b/src/rcx_tk/process_metadata_file.py @@ -3,7 +3,7 @@ def read_file(file_path: str) -> pd.DataFrame: - """Imports the metadata file to pandas dataframe. + """Imports the metadata file to pandas dataframe. Args: file_path (str): The path to the input data. @@ -14,17 +14,17 @@ def read_file(file_path: str) -> pd.DataFrame: Returns: pd.DataFrame: Dataframe containing the metadata. """ - file_extension = os.path.splitext(file_path)[1].lower() - if file_extension == '.csv': - return pd.read_csv(file_path, encoding='UTF-8') - elif file_extension in ['.xls', '.xlsx']: + if file_extension == ".csv": + return pd.read_csv(file_path, encoding="UTF-8") + elif file_extension in [".xls", ".xlsx"]: return pd.read_excel(file_path) - elif file_extension in ['.tsv', '.txt']: - return pd.read_csv(file_path, sep='\t') + elif file_extension in [".tsv", ".txt"]: + return pd.read_csv(file_path, sep="\t") else: raise ValueError("Unsupported file format. Please provide a CSV, Excel, or TSV file.") + def save_dataframe_as_tsv(df: pd.DataFrame, file_path: str) -> None: """Saves the dataframe as a TSV file. @@ -37,8 +37,8 @@ def save_dataframe_as_tsv(df: pd.DataFrame, file_path: str) -> None: """ if os.path.splitext(file_path)[1] != ".tsv": raise ValueError("Unsupported file format. Please point to a TSV file.") - df.to_csv(file_path, sep='\t', index=False) - + df.to_csv(file_path, sep="\t", index=False) + def process_metadata_file(file_path: str, out_path: str) -> None: """Processes a metadata file, keeping and renaming specific columns. @@ -48,18 +48,19 @@ def process_metadata_file(file_path: str, out_path: str) -> None: out_path (str): A path where processed metadata dataframe is exported. """ columns_to_keep = { - 'File name': 'sampleName', - 'Type': 'sampleType', - 'Class ID': 'class', - 'Batch': 'batch', - 'Analytical order': 'injectionOrder' + "File name": "sampleName", + "Type": "sampleType", + "Class ID": "class", + "Batch": "batch", + "Analytical order": "injectionOrder", } df = read_file(file_path) df = df[list(columns_to_keep.keys())].rename(columns=columns_to_keep) - df['sampleName'] = df['sampleName'].str.replace(' ', '_') + df["sampleName"] = df["sampleName"].str.replace(" ", "_") save_dataframe_as_tsv(df, out_path) + def process_alkane_ri_file(file_path: str, out_path: str) -> None: """Processes an alkane file, keeping and renaming specific columns. @@ -67,13 +68,9 @@ def process_alkane_ri_file(file_path: str, out_path: str) -> None: file_path (str): A path to the alkane file. out_path (str): A path where processed alkane file is exported. """ - columns_to_keep = { - 'Carbon number': 'carbon_number', - 'RT (min)': 'rt' - } + columns_to_keep = {"Carbon number": "carbon_number", "RT (min)": "rt"} df = read_file(file_path) df.columns = df.columns.str.strip() df = df.rename(columns=columns_to_keep) save_dataframe_as_tsv(df, out_path) - diff --git a/tests/test_process_metadata_file.py b/tests/test_process_metadata_file.py index 70e278a..074b62f 100644 --- a/tests/test_process_metadata_file.py +++ b/tests/test_process_metadata_file.py @@ -1,16 +1,31 @@ +import os from pathlib import Path from typing import Final import pandas as pd -import os import pytest -from rcx_tk.process_metadata_file import read_file, save_dataframe_as_tsv, process_metadata_file, process_alkane_ri_file +from rcx_tk.process_metadata_file import process_alkane_ri_file +from rcx_tk.process_metadata_file import process_metadata_file +from rcx_tk.process_metadata_file import read_file +from rcx_tk.process_metadata_file import save_dataframe_as_tsv __location__: Final[Path] = Path(__file__).parent.resolve() -@pytest.fixture + +@pytest.fixture def dataframe() -> pd.DataFrame: +<<<<<<< HEAD +======= + """Creates a dataframe corresponding to metadata test file. + + Returns: + pd.DataFrame: Expected dataframe matching metadata test file. + """ + path_prefix: Final[str] = ( + "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\" # noqa: E501 path prefix is expected in test data to be longer than line limit + ) +>>>>>>> 6264d42 (ruff 5) d = { - 'File path': [ + "File path": [ "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\1_instrumental blank_01.raw", "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\4_Alkane mix_04.raw", "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\6_instrumental blank_06.raw", @@ -21,9 +36,9 @@ def dataframe() -> pd.DataFrame: "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\15_QC non-dilute_15.raw", "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\18_QC 4 _18.raw", "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\19_QC 8_19.raw", - "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\29_instrument blank_29.raw" + "Z:\\000020-Shares\\hram\\MS_omics\\Personal Folders\\COUFALIKOVA Katerina\\ATHLETE\\finalni data zaloha\\batch1-20231121-Katerina Coufalikova\\RAW_profile\\29_instrument blank_29.raw", ], - 'File name': [ + "File name": [ "1_instrumental blank_01", "4_Alkane mix_04", "6_instrumental blank_06", @@ -34,9 +49,9 @@ def dataframe() -> pd.DataFrame: "15_QC non-dilute_15", "18_QC 4 _18", "19_QC 8_19", - "29_instrument blank_29" + "29_instrument blank_29", ], - 'Type': [ + "Type": [ "Standard", "Standard", "Standard", @@ -47,81 +62,22 @@ def dataframe() -> pd.DataFrame: "QC", "QC", "QC", - "Standard" - ], - "Class ID": [ - 3, - 5, - 3, - 6, - 2, - 2, - 2, - 2, - 2, - 2, - 3 - ], - "Batch": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ], - "Analytical order": [ - 1, - 4, - 6, - 7, - 8, - 11, - 12, - 15, - 18, - 19, - 29 - ], - "Inject. volume (μL)": [ - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6 - ], - "Included": [ - True, - True, - True, - True, - True, - True, - True, - True, - True, - True, - True - ] + "Standard", + ], + "Class ID": [3, 5, 3, 6, 2, 2, 2, 2, 2, 2, 3], + "Batch": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "Analytical order": [1, 4, 6, 7, 8, 11, 12, 15, 18, 19, 29], + "Inject. volume (μL)": [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], + "Included": [True, True, True, True, True, True, True, True, True, True, True], } - return pd.DataFrame(data = d) + return pd.DataFrame(data=d) + -@pytest.fixture +@pytest.fixture def processed_dataframe() -> pd.DataFrame: d = { - 'sampleName': [ + "sampleName": [ "1_instrumental_blank_01", "4_Alkane_mix_04", "6_instrumental_blank_06", @@ -132,9 +88,9 @@ def processed_dataframe() -> pd.DataFrame: "15_QC_non-dilute_15", "18_QC_4_18", "19_QC_8_19", - "29_instrument_blank_29" + "29_instrument_blank_29", ], - 'sampleType': [ + "sampleType": [ "Standard", "Standard", "Standard", @@ -145,118 +101,102 @@ def processed_dataframe() -> pd.DataFrame: "QC", "QC", "QC", - "Standard" - ], - "class": [ - 3, - 5, - 3, - 6, - 2, - 2, - 2, - 2, - 2, - 2, - 3 - ], - "batch": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ], - "injectionOrder": [ - 1, - 4, - 6, - 7, - 8, - 11, - 12, - 15, - 18, - 19, - 29 - ] + "Standard", + ], + "class": [3, 5, 3, 6, 2, 2, 2, 2, 2, 2, 3], + "batch": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + "injectionOrder": [1, 4, 6, 7, 8, 11, 12, 15, 18, 19, 29], } - return pd.DataFrame(data = d) + return pd.DataFrame(data=d) + @pytest.fixture def alkanes() -> pd.DataFrame: d = { - "carbon_number": [ - 12, 13, 14, 15, 16, 17, 18, 19, 20 - ], - "rt": [ - 2.8, 3.0, 3.3, 3.7, 4.2, 4.6, 5.0, 5.4, 5.7 - ] + "carbon_number": [12, 13, 14, 15, 16, 17, 18, 19, 20], + "rt": [2.8, 3.0, 3.3, 3.7, 4.2, 4.6, 5.0, 5.4, 5.7], } - return pd.DataFrame(data = d) + return pd.DataFrame(data=d) + -@pytest.mark.parametrize("file_name", - ["batch_specification1.csv", "batch_specification1.xlsx", "batch_specification1.txt"]) +@pytest.mark.parametrize( + "file_name", + [ + "batch_specification1.csv", + "batch_specification1.xlsx", + "batch_specification1.txt", + ], +) def test_read_file(file_name: str, dataframe: pd.DataFrame): file_path = __location__.joinpath("test_data", file_name) - #file_path = os.path.join("tests", "test_data", file_name) + # file_path = os.path.join("tests", "test_data", file_name) actual = read_file(str(file_path)) assert actual.equals(dataframe) + def test_read_file_error(dataframe: pd.DataFrame): file_path = os.path.join("tests", "test_data", "batch_specification1.prn") - with pytest.raises(ValueError, match = r"Unsupported file format. Please provide a CSV, Excel, or TSV file."): + with pytest.raises( + ValueError, + match=r"Unsupported file format. Please provide a CSV, Excel, or TSV file.", + ): read_file(file_path) + def test_save_dataframe_as_tsv(dataframe: pd.DataFrame, tmp_path: str): out_path = os.path.join(tmp_path, "batch_specification1.tsv") save_dataframe_as_tsv(dataframe, out_path) - actual = pd.read_csv(out_path, sep='\t') + actual = pd.read_csv(out_path, sep="\t") assert actual.equals(dataframe) + def test_read_save_dataframe_as_tsv_error(dataframe: pd.DataFrame, tmp_path: str): out_path = os.path.join(tmp_path, "batch_specification1.prn") - with pytest.raises(ValueError, match = r"Unsupported file format. Please point to a TSV file."): + with pytest.raises(ValueError, match=r"Unsupported file format. Please point to a TSV file."): save_dataframe_as_tsv(dataframe, out_path) + @pytest.mark.skip(reason="Test fails due to a inconsistency in the input file (metadata)") def test_process_metadata_file(processed_dataframe: pd.DataFrame, tmp_path: str): file_path = os.path.join("tests", "test_data", "batch_specification1.csv") out_path = os.path.join(tmp_path, "processed_batch_specification1.tsv") - process_metadata_file(file_path, out_path) - actual = pd.read_csv(out_path, sep='\t') + process_metadata_file(file_path, out_path) + actual = pd.read_csv(out_path, sep="\t") assert actual.equals(processed_dataframe) -@pytest.mark.parametrize("file_name", - ["batch_specification1.csv", "batch_specification1.xlsx", "batch_specification1.txt"]) + +@pytest.mark.parametrize( + "file_name", + [ + "batch_specification1.csv", + "batch_specification1.xlsx", + "batch_specification1.txt", + ], +) def test_read_file_colnames_input(file_name: str, dataframe: pd.DataFrame): file_path = __location__.joinpath("test_data", file_name) - #file_path = os.path.join("tests", "test_data", file_name) + # file_path = os.path.join("tests", "test_data", file_name) actual_df = read_file(str(file_path)) actual = actual_df.columns expected = dataframe.columns assert expected.equals(actual) + def test_process_metadata_file_colnames_output(processed_dataframe: pd.DataFrame, tmp_path: str): file_path = os.path.join("tests", "test_data", "batch_specification1.csv") out_path = os.path.join(tmp_path, "processed_batch_specification1.tsv") - process_metadata_file(file_path, out_path) + process_metadata_file(file_path, out_path) expected = processed_dataframe.columns - actual_df = pd.read_csv(out_path, sep='\t') + actual_df = pd.read_csv(out_path, sep="\t") actual = actual_df.columns assert expected.equals(actual) + def test_process_alkane_ri_file(alkanes: pd.DataFrame, tmp_path: str): file_path = os.path.join("tests", "test_data", "Alkane_RI_ATHLETE_1.txt") out_path = os.path.join(tmp_path, "processed_Alkane_RI_ATHLETE_1.tsv") - process_alkane_ri_file(file_path, out_path) - actual = pd.read_csv(out_path, sep ='\t') - assert actual.equals(alkanes) \ No newline at end of file + process_alkane_ri_file(file_path, out_path) + actual = pd.read_csv(out_path, sep="\t") + assert actual.equals(alkanes)