Skip to content

Commit

Permalink
id field in json better interpretable. Closes #125
Browse files Browse the repository at this point in the history
  • Loading branch information
wolski committed Nov 10, 2023
1 parent 5a90dc9 commit 914452f
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 15 deletions.
6 changes: 5 additions & 1 deletion proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,16 @@ def generate_datapoint(
self, intermediate: pd.DataFrame, input_format: str, user_input: dict
) -> Datapoint:
"""Method used to compute metadata for the provided result."""
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f")


result_datapoint = Datapoint(
id=input_format
+ "_"
+ user_input["version"]
+ "_"
+ str(datetime.datetime.now()),
+ formatted_datetime,
search_engine=input_format,
software_version=user_input["version"],
fdr_psm=user_input["fdr_psm"],
Expand Down
81 changes: 67 additions & 14 deletions test/test_module_dda_quant.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os
import unittest

Expand All @@ -8,15 +9,19 @@
from proteobench.modules.dda_quant.module import Module
from proteobench.modules.dda_quant.parse import ParseInputs
from proteobench.modules.dda_quant.parse_settings import (
DDA_QUANT_RESULTS_REPO, INPUT_FORMATS, ParseSettings)
DDA_QUANT_RESULTS_REPO,
INPUT_FORMATS,
ParseSettings,
)
from proteobench.modules.dda_quant.plot import PlotDataPoint
from proteobench.modules.dda_quant.module import Datapoint

# genereate_input_field


TESTDATA_DIR = os.path.join(os.path.dirname(__file__), "data")
TESTDATA_FILES = {
#"WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
# "WOMBAT": os.path.join(TESTDATA_DIR, "WOMBAT_stand_pep_quant_mergedproline.csv"),
"MaxQuant": os.path.join(TESTDATA_DIR, "MaxQuant_evidence_sample.txt"),
"MSFragger": os.path.join(TESTDATA_DIR, "MSFragger_combined_ion.tsv"),
"AlphaPept": os.path.join(TESTDATA_DIR, "AlphaPept_subset.csv"),
Expand Down Expand Up @@ -59,12 +64,18 @@ def process_file(format_name: str):


class TestOutputFileReading(unittest.TestCase):
supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") #"WOMBAT",
supported_formats = ("MaxQuant", "MSFragger", "AlphaPept", "Sage") # "WOMBAT",
""" Simple tests for reading csv input files."""

def test_search_engines_supported(self):
"""Test whether the expected formats are supported."""
for format_name in ("MaxQuant", "AlphaPept", "MSFragger", "Proline", "Sage"): #, "WOMBAT"
for format_name in (
"MaxQuant",
"AlphaPept",
"MSFragger",
"Proline",
"Sage",
): # , "WOMBAT"
self.assertTrue(format_name in INPUT_FORMATS)

def test_input_file_loading(self):
Expand Down Expand Up @@ -128,7 +139,7 @@ class TestPlot(unittest.TestCase):
"""Test if the plots return a figure."""

def test_plot_metric(self):
#all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
# all_datapoints = pd.read_json(DDA_QUANT_RESULTS_PATH)
all_datapoints = read_results_json_repo(DDA_QUANT_RESULTS_REPO)
all_datapoints["old_new"] = "old"
fig = PlotDataPoint().plot_metric(all_datapoints)
Expand All @@ -141,30 +152,72 @@ def test_plot_bench(self):
Nyeast = 1000
Necoli = 500
Nhuman = 2000

yeastRatio = np.random.normal(loc=-1, scale=1, size=Nyeast)
humanRatio = np.random.normal(loc=0, scale=1, size=Nhuman)
ecoliRatio = np.random.normal(loc=2, scale=1, size=Necoli)
combined_ratios = np.concatenate([yeastRatio, humanRatio, ecoliRatio])

human_strings = ["HUMAN"] * Nhuman
ecoli_strings = ["ECOLI"] * Necoli
yeast_strings = ["YEAST"] * Nyeast

# Concatenate the lists to create a single list
combined_list = human_strings + ecoli_strings + yeast_strings

combineddf = pd.DataFrame({'SPECIES': combined_list, '1|2_ratio': combined_ratios})
combineddf['HUMAN'] = combineddf['SPECIES'] == 'HUMAN'
combineddf['ECOLI'] = combineddf['SPECIES'] == 'ECOLI'
combineddf['YEAST'] = combineddf['SPECIES'] == 'YEAST'


combineddf = pd.DataFrame(
{"SPECIES": combined_list, "1|2_ratio": combined_ratios}
)
combineddf["HUMAN"] = combineddf["SPECIES"] == "HUMAN"
combineddf["ECOLI"] = combineddf["SPECIES"] == "ECOLI"
combineddf["YEAST"] = combineddf["SPECIES"] == "YEAST"

fig = PlotDataPoint().plot_bench(combineddf)
#fig.write_html("dummy.html")
# fig.write_html("dummy.html")
self.assertIsNotNone(fig)


class TestDatapoint(unittest.TestCase):
"""Test if the plots return a figure."""

def test_Datapoint_constructor(self):
input_format = "MaxQuant"
user_input = {
"version": "1.0",
"fdr_psm": 0.01,
"fdr_peptide": 0.05,
"fdr_protein": 0.1,
"mbr": 1,
"precursor_mass_tolerance": 0.02,
"precursor_mass_tolerance_unit": "Da",
"fragment_mass_tolerance": 0.02,
"fragment_mass_tolerance_unit": "Da",
"search_enzyme_name": "Trypsin",
"allowed_missed_cleavage": 1,
"min_peptide_length": 6,
"max_peptide_length": 30,
}
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S_%f")

result_datapoint = Datapoint(
id=input_format + "_" + user_input["version"] + "_" + formatted_datetime,
search_engine=input_format,
software_version=user_input["version"],
fdr_psm=user_input["fdr_psm"],
fdr_peptide=user_input["fdr_peptide"],
fdr_protein=user_input["fdr_protein"],
MBR=user_input["mbr"],
precursor_tol=user_input["precursor_mass_tolerance"],
precursor_tol_unit=user_input["precursor_mass_tolerance_unit"],
fragmnent_tol=user_input["fragment_mass_tolerance"],
fragment_tol_unit=user_input["fragment_mass_tolerance_unit"],
enzyme_name=user_input["search_enzyme_name"],
missed_cleavages=user_input["allowed_missed_cleavage"],
min_pep_length=user_input["min_peptide_length"],
max_pep_length=user_input["max_peptide_length"],
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 914452f

Please sign in to comment.