From d8114ab76e768ce1a302bf87da0354833cecdd70 Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Wed, 3 Jul 2024 09:58:29 +0200 Subject: [PATCH 1/2] keywords updated to match Issue#5 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1e30a44..9bfad9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ description = "This package adjusts and cleans the metadata file provided by a u authors = ["Zargham Ahmad " ] license = "MIT" readme = "README.md" +keywords = ["metadata", "alkanes", "metabolomics"] [tool.poetry.dependencies] python = "^3.12" From ece56c026ee46b81fae8c8d9948775ed8b7a947e Mon Sep 17 00:00:00 2001 From: KristinaGomoryova Date: Wed, 3 Jul 2024 10:45:12 +0200 Subject: [PATCH 2/2] ruff errors resolved --- pyproject.toml | 16 +++++------ src/rcx_tk/process_metadata_file.py | 44 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9bfad9b..f645889 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ extras = dev [tool.ruff] # Enable Pyflakes `E` and `F` codes by default. -select = [ +lint.select = [ "F", # Pyflakes "E", # pycodestyle (error) "W", # pycodestyle (warning) @@ -57,7 +57,7 @@ select = [ # "PLW", # Warning ] -ignore = [ +lint.ignore = [ 'D100', # Missing module docstring 'D104', # Missing public package docstring # The following list excludes rules irrelevant to the Google style @@ -76,8 +76,8 @@ ignore = [ ] # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["A", "B", "C", "D", "E", "F", "I"] -unfixable = [] +lint.fixable = ["A", "B", "C", "D", "E", "F", "I"] +lint.unfixable = [] exclude = [ ".bzr", @@ -102,16 +102,16 @@ exclude = [ ".venv", "scripts", ] -per-file-ignores = {} +lint.per-file-ignores = {} # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" target-version = "py39" line-length = 120 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["rcx_tk"] force-single-line = true no-lines-before = ["future","standard-library","third-party","first-party","local-folder"] @@ -129,4 +129,4 @@ filename = "pyproject.toml" filename = "CITATION.cff" [[tool.bumpversion.files]] -filename = "docs/conf.py" \ No newline at end of file +filename = "docs/conf.py" \ No newline at end of file diff --git a/src/rcx_tk/process_metadata_file.py b/src/rcx_tk/process_metadata_file.py index b0d9ced..eb9314d 100644 --- a/src/rcx_tk/process_metadata_file.py +++ b/src/rcx_tk/process_metadata_file.py @@ -1,54 +1,56 @@ -import pandas as pd import os +import pandas as pd + def read_file(file_path): """Reads a file and returns a DataFrame based on the file extension.""" file_extension = os.path.splitext(file_path)[1].lower() - if file_extension == '.csv': + if file_extension == ".csv": return pd.read_csv(file_path) - elif file_extension in ['.xls', '.xlsx']: + elif file_extension in [".xls", ".xlsx"]: return pd.read_excel(file_path) - elif file_extension == '.txt': - return pd.read_csv(file_path, sep='\t') + elif file_extension == ".txt": + return pd.read_csv(file_path, sep="\t") else: raise ValueError("Unsupported file format. Please provide a CSV, Excel, or TSV file.") + def save_dataframe_as_tsv(df, file_path): """Saves a DataFrame as a TSV file.""" - output_file = f'processed_{os.path.splitext(os.path.basename(file_path))[0]}.tsv' - df.to_csv(output_file, sep='\t', index=False) + output_file = f"processed_{os.path.splitext(os.path.basename(file_path))[0]}.tsv" + df.to_csv(output_file, sep="\t", index=False) print(f"Processed file saved as: {output_file}") + def process_metadata_file(file_path): """Processes a metadata file, keeping and renaming specific columns.""" columns_to_keep = { - 'File name': 'sampleName', - 'Type': 'sampleType', - 'Class ID': 'class', - 'Batch': 'batch', - 'Analytical order': 'injectionOrder' + "File name": "sampleName", + "Type": "sampleType", + "Class ID": "class", + "Batch": "batch", + "Analytical order": "injectionOrder", } - + df = read_file(file_path) df = df[list(columns_to_keep.keys())].rename(columns=columns_to_keep) - df['sampleName'] = df['sampleName'].str.replace(' ', '_') + df["sampleName"] = df["sampleName"].str.replace(" ", "_") save_dataframe_as_tsv(df, file_path) + def process_alkane_ri_file(file_path): """Processes an Alkane RI file, keeping and renaming specific columns.""" - columns_to_keep = { - 'Carbon number': 'carbon_number', - 'RT (min)': 'rt' - } - + columns_to_keep = {"Carbon number": "carbon_number", "RT (min)": "rt"} + df = read_file(file_path) df.columns = df.columns.str.strip() df = df.rename(columns=columns_to_keep) save_dataframe_as_tsv(df, file_path) + # File paths -metadata_file_path = 'metadata file path' -alkane_file_path = 'alkane file path' +metadata_file_path = "metadata file path" +alkane_file_path = "alkane file path" # Process files process_metadata_file(metadata_file_path)