From 5634b232c72a131466acac563cff8c18e6febe47 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 15:36:24 +0200 Subject: [PATCH 1/7] cli for process_metadata_file --- cli_rcx-tk.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 cli_rcx-tk.py diff --git a/cli_rcx-tk.py b/cli_rcx-tk.py new file mode 100644 index 0000000..27f2814 --- /dev/null +++ b/cli_rcx-tk.py @@ -0,0 +1,45 @@ +# CLI for processing the metadata files + +from pathlib import Path +import click +import os +import pandas as pd + +def read_file(file_path: str) -> pd.DataFrame: + file_extension = os.path.splitext(Path(file_path))[1].lower() + if file_extension == '.csv': + return pd.read_csv(Path(file_path), encoding='UTF-8') + elif file_extension in ['.xls', '.xlsx']: + return pd.read_excel(Path(file_path)) + elif file_extension in ['.tsv', '.txt']: + return pd.read_csv(Path(file_path), sep='\t') + else: + raise ValueError("Unsupported file format. Please provide a CSV, Excel, or TSV file.") + +def save_dataframe_as_tsv(df: pd.DataFrame, file_path: str) -> None: + if os.path.splitext(Path(file_path))[1] != ".tsv": + raise ValueError("Unsupported file format. Please point to a TSV file.") + df.to_csv(Path(file_path), sep='\t', index=False) + + +@click.command() +@click.argument('file_path') +@click.argument('out_path') + +def process_metadata_file(file_path: str, out_path: str) -> None: + columns_to_keep = { + 'File name': 'sampleName', + 'Type': 'sampleType', + 'Class ID': 'class', + 'Batch': 'batch', + 'Analytical order': 'injectionOrder' + } + + df = read_file(Path(file_path)) + df = df[list(columns_to_keep.keys())].rename(columns=columns_to_keep) + df['sampleName'] = df['sampleName'].str.replace(' ', '_') + save_dataframe_as_tsv(df, Path(out_path)) + click.echo("Done!") + +if __name__ == "__main__": + process_metadata_file() \ No newline at end of file From 5393aef92b101c0eeb0ceb7110cd398a8b1bf346 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 16:00:18 +0200 Subject: [PATCH 2/7] cli for both metadata and alkane files processing --- src/rcx_tk/__main__.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/rcx_tk/__main__.py diff --git a/src/rcx_tk/__main__.py b/src/rcx_tk/__main__.py new file mode 100644 index 0000000..a8ab013 --- /dev/null +++ b/src/rcx_tk/__main__.py @@ -0,0 +1,20 @@ +import click + +from rcx_tk.process_metadata_file import process_alkane_ri_file, process_metadata_file + + +@click.command() +@click.argument('method') +@click.argument('file_path') +@click.argument('out_path') +def main(method, file_path, out_path): + if method == "metadata": + process_metadata_file(file_path, out_path) + click.echo("Metadata done!") + elif method == "alkanes": + process_alkane_ri_file(file_path, out_path) + click.echo("Alkanes done!") + + +if __name__ == "__main__": + main() \ No newline at end of file From 250f323508c86b8267bf9103a0426316fca54c95 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 16:01:03 +0200 Subject: [PATCH 3/7] cli only for metadata deleted --- cli_rcx-tk.py | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 cli_rcx-tk.py diff --git a/cli_rcx-tk.py b/cli_rcx-tk.py deleted file mode 100644 index 27f2814..0000000 --- a/cli_rcx-tk.py +++ /dev/null @@ -1,45 +0,0 @@ -# CLI for processing the metadata files - -from pathlib import Path -import click -import os -import pandas as pd - -def read_file(file_path: str) -> pd.DataFrame: - file_extension = os.path.splitext(Path(file_path))[1].lower() - if file_extension == '.csv': - return pd.read_csv(Path(file_path), encoding='UTF-8') - elif file_extension in ['.xls', '.xlsx']: - return pd.read_excel(Path(file_path)) - elif file_extension in ['.tsv', '.txt']: - return pd.read_csv(Path(file_path), sep='\t') - else: - raise ValueError("Unsupported file format. Please provide a CSV, Excel, or TSV file.") - -def save_dataframe_as_tsv(df: pd.DataFrame, file_path: str) -> None: - if os.path.splitext(Path(file_path))[1] != ".tsv": - raise ValueError("Unsupported file format. Please point to a TSV file.") - df.to_csv(Path(file_path), sep='\t', index=False) - - -@click.command() -@click.argument('file_path') -@click.argument('out_path') - -def process_metadata_file(file_path: str, out_path: str) -> None: - columns_to_keep = { - 'File name': 'sampleName', - 'Type': 'sampleType', - 'Class ID': 'class', - 'Batch': 'batch', - 'Analytical order': 'injectionOrder' - } - - df = read_file(Path(file_path)) - df = df[list(columns_to_keep.keys())].rename(columns=columns_to_keep) - df['sampleName'] = df['sampleName'].str.replace(' ', '_') - save_dataframe_as_tsv(df, Path(out_path)) - click.echo("Done!") - -if __name__ == "__main__": - process_metadata_file() \ No newline at end of file From 0f9ca8269f977161ed222710be30c4c6729f4d1f Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 16:12:51 +0200 Subject: [PATCH 4/7] --help section for arguments added --- src/rcx_tk/__main__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rcx_tk/__main__.py b/src/rcx_tk/__main__.py index a8ab013..5971ab4 100644 --- a/src/rcx_tk/__main__.py +++ b/src/rcx_tk/__main__.py @@ -8,6 +8,13 @@ @click.argument('file_path') @click.argument('out_path') def main(method, file_path, out_path): + """Process metadata or alkane file. + + Args: + method (string): A type of the file which is provided: a metadata file or an alkane file. + file_path (path): A path to the input data. + out_path (path): A path where the processed data will be exported to. + """ if method == "metadata": process_metadata_file(file_path, out_path) click.echo("Metadata done!") From 471799566a95d1ac9318a832c9eef3cc156be1d5 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 16:24:28 +0200 Subject: [PATCH 5/7] method provided as option, not argument --- src/rcx_tk/__main__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rcx_tk/__main__.py b/src/rcx_tk/__main__.py index 5971ab4..dfbcc3f 100644 --- a/src/rcx_tk/__main__.py +++ b/src/rcx_tk/__main__.py @@ -4,14 +4,13 @@ @click.command() -@click.argument('method') +@click.option('--method', type=click.Choice(['metadata', 'alkanes']), required = True, help = 'A file type to be processed, either metadata or alkanes file.') @click.argument('file_path') @click.argument('out_path') def main(method, file_path, out_path): """Process metadata or alkane file. Args: - method (string): A type of the file which is provided: a metadata file or an alkane file. file_path (path): A path to the input data. out_path (path): A path where the processed data will be exported to. """ From f4df83be73b3f986704373910adb0f0aedb10d19 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 16:34:50 +0200 Subject: [PATCH 6/7] click added --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 3ef80b6..74334c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" python = "^3.11" pandas = "^2.2.2" pyxlsx = "^1.1.3" +click = "^8.1.7" [tool.poetry.group.dev.dependencies] build = "^1.2.1" From 5a2aa02433e352922fb6b5910e70b7b1ce27efe0 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Thu, 20 Jun 2024 17:02:11 +0200 Subject: [PATCH 7/7] cli available also via poetry --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 74334c4..37a4ead 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,4 +127,7 @@ filename = "pyproject.toml" filename = "CITATION.cff" [[tool.bumpversion.files]] -filename = "docs/conf.py" \ No newline at end of file +filename = "docs/conf.py" + +[tool.poetry.scripts] +rcx_tk = "rcx_tk.__main__:main" \ No newline at end of file