From 914452f1ff7cf63d511248b1ce8bf2511f2f31ad Mon Sep 17 00:00:00 2001 From: Witold Wolski Date: Fri, 10 Nov 2023 13:19:04 +0100 Subject: [PATCH] id field in json better interpretable. Closes #125 --- proteobench/modules/dda_quant/module.py | 6 +- test/test_module_dda_quant.py | 81 ++++++++++++++++++++----- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py index 64719a22..00b745ab 100644 --- a/proteobench/modules/dda_quant/module.py +++ b/proteobench/modules/dda_quant/module.py @@ -126,12 +126,16 @@ def generate_datapoint( self, intermediate: pd.DataFrame, input_format: str, user_input: dict ) -> Datapoint: """Method used to compute metadata for the provided result.""" + current_datetime = datetime.datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f") + + result_datapoint = Datapoint( id=input_format + "_" + user_input["version"] + "_" - + str(datetime.datetime.now()), + + formatted_datetime, search_engine=input_format, software_version=user_input["version"], fdr_psm=user_input["fdr_psm"], diff --git a/test/test_module_dda_quant.py b/test/test_module_dda_quant.py index a6b20a83..d34c9ab2 100644 --- a/test/test_module_dda_quant.py +++ b/test/test_module_dda_quant.py @@ -1,3 +1,4 @@ +import datetime import os import unittest @@ -8,15 +9,19 @@ from proteobench.modules.dda_quant.module import Module from proteobench.modules.dda_quant.parse import ParseInputs from proteobench.modules.dda_quant.parse_settings import ( - DDA_QUANT_RESULTS_REPO, INPUT_FORMATS, ParseSettings) + DDA_QUANT_RESULTS_REPO, + INPUT_FORMATS, + ParseSettings, +) from proteobench.modules.dda_quant.plot import PlotDataPoint +from proteobench.modules.dda_quant.module import Datapoint # genereate_input_field TESTDATA_DIR = os.path.join(os.path.dirname(__file__), "data") TESTDATA_FILES = { - #"WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"), + # "WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"), "MaxQuant": os.path.join(TESTDATA_DIR, "MaxQuant_evidence_sample.txt"), "MSFragger": os.path.join(TESTDATA_DIR, "MSFragger_combined_ion.tsv"), "AlphaPept": os.path.join(TESTDATA_DIR, "AlphaPept_subset.csv"), @@ -59,12 +64,18 @@ def process_file(format_name: str): class TestOutputFileReading(unittest.TestCase): - supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") #"WOMBAT", + supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") # "WOMBAT", """ Simple tests for reading csv input files.""" def test_search_engines_supported(self): """Test whether the expected formats are supported.""" - for format_name in ("MaxQuant", "AlphaPept", "MSFragger", "Proline", "Sage"): #, "WOMBAT" + for format_name in ( + "MaxQuant", + "AlphaPept", + "MSFragger", + "Proline", + "Sage", + ): # , "WOMBAT" self.assertTrue(format_name in INPUT_FORMATS) def test_input_file_loading(self): @@ -128,7 +139,7 @@ class TestPlot(unittest.TestCase): """Test if the plots return a figure.""" def test_plot_metric(self): - #all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH) + # all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH) all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO) all_datapoints["old_new"] = "old" fig = PlotDataPoint().plot_metric(all_datapoints) @@ -141,12 +152,12 @@ def test_plot_bench(self): Nyeast = 1000 Necoli = 500 Nhuman = 2000 - + yeastRatio = np.random.normal(loc=-1, scale=1, size=Nyeast) humanRatio = np.random.normal(loc=0, scale=1, size=Nhuman) ecoliRatio = np.random.normal(loc=2, scale=1, size=Necoli) combined_ratios = np.concatenate([yeastRatio, humanRatio, ecoliRatio]) - + human_strings = ["HUMAN"] * Nhuman ecoli_strings = ["ECOLI"] * Necoli yeast_strings = ["YEAST"] * Nyeast @@ -154,17 +165,59 @@ def test_plot_bench(self): # Concatenate the lists to create a single list combined_list = human_strings + ecoli_strings + yeast_strings - combineddf = pd.DataFrame({'SPECIES': combined_list, '1|2_ratio': combined_ratios}) - combineddf['HUMAN'] = combineddf['SPECIES'] == 'HUMAN' - combineddf['ECOLI'] = combineddf['SPECIES'] == 'ECOLI' - combineddf['YEAST'] = combineddf['SPECIES'] == 'YEAST' - - + combineddf = pd.DataFrame( + {"SPECIES": combined_list, "1|2_ratio": combined_ratios} + ) + combineddf["HUMAN"] = combineddf["SPECIES"] == "HUMAN" + combineddf["ECOLI"] = combineddf["SPECIES"] == "ECOLI" + combineddf["YEAST"] = combineddf["SPECIES"] == "YEAST" fig = PlotDataPoint().plot_bench(combineddf) - #fig.write_html("dummy.html") + # fig.write_html("dummy.html") self.assertIsNotNone(fig) +class TestDatapoint(unittest.TestCase): + """Test if the plots return a figure.""" + + def test_Datapoint_constructor(self): + input_format = "MaxQuant" + user_input = { + "version": "1.0", + "fdr_psm": 0.01, + "fdr_peptide": 0.05, + "fdr_protein": 0.1, + "mbr": 1, + "precursor_mass_tolerance": 0.02, + "precursor_mass_tolerance_unit": "Da", + "fragment_mass_tolerance": 0.02, + "fragment_mass_tolerance_unit": "Da", + "search_enzyme_name": "Trypsin", + "allowed_missed_cleavage": 1, + "min_peptide_length": 6, + "max_peptide_length": 30, + } + current_datetime = datetime.datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f") + + result_datapoint = Datapoint( + id=input_format + "_" + user_input["version"] + "_" + formatted_datetime, + search_engine=input_format, + software_version=user_input["version"], + fdr_psm=user_input["fdr_psm"], + fdr_peptide=user_input["fdr_peptide"], + fdr_protein=user_input["fdr_protein"], + MBR=user_input["mbr"], + precursor_tol=user_input["precursor_mass_tolerance"], + precursor_tol_unit=user_input["precursor_mass_tolerance_unit"], + fragmnent_tol=user_input["fragment_mass_tolerance"], + fragment_tol_unit=user_input["fragment_mass_tolerance_unit"], + enzyme_name=user_input["search_enzyme_name"], + missed_cleavages=user_input["allowed_missed_cleavage"], + min_pep_length=user_input["min_peptide_length"], + max_pep_length=user_input["max_peptide_length"], + ) + + if __name__ == "__main__": unittest.main()