From c3ae809ddc5d51dce13c15c5580da062041871a1 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 24 Jul 2024 11:10:26 -0500 Subject: [PATCH] commenting what causes the code to break when certain attributes are left out of an evaluation set --- spras/config.py | 3 +-- spras/evaluation.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/spras/config.py b/spras/config.py index ccc507b5..4f97dfba 100644 --- a/spras/config.py +++ b/spras/config.py @@ -146,7 +146,6 @@ def process_config(self, raw_config): # Convert to dicts to simplify the yaml logging self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]} - # TODO: turn into try except try: self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standard"]} except: @@ -238,7 +237,7 @@ def process_config(self, raw_config): self.analysis_include_ml = raw_config["analysis"]["ml"]["include"] self.analysis_include_evalution = raw_config["analysis"]["evaluation"]["include"] - # the code will run correctly without this section below + # COMMENT: the code will run correctly without this section below due to empty dict in try except above # TODO: decide if this part is needed if self.gold_standards == {} and self.analysis_include_evalution == True: print("Gold standard data not provided. Evaluation analysis cannot run.") diff --git a/spras/evaluation.py b/spras/evaluation.py index 49d060dd..bde5a316 100644 --- a/spras/evaluation.py +++ b/spras/evaluation.py @@ -13,10 +13,10 @@ class Evaluation: def __init__(self, gold_standard_dict): self.label = None + self.datasets = None self.node_table = None # self.edge_table = None TODO: later iteration self.load_files_from_dict(gold_standard_dict) - self.datasets = None return def merge_gold_standard_input(gs_dict, gs_file): @@ -54,10 +54,16 @@ def load_files_from_dict(self, gold_standard_dict): returns: none """ - self.label = gold_standard_dict["label"] - self.datasets = gold_standard_dict["datasets"] + self.label = gold_standard_dict["label"] # COMMENT: cannot be empty, will break with a NoneType exception + self.datasets = gold_standard_dict["datasets"] # COMMENT: can be empty, snakemake will not run evaluation due to dataset_gold_standard_pairs in snakemake file + + try: + # COMMENT: cannot be empty, snakemake will run evaluation even if empty + node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now + except: + if not gold_standard_dict["node_files"]: + raise ValueError (f"Node_files for {self.label} is an empty list, cannot run evalution") - node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now data_loc = gold_standard_dict["data_dir"] single_node_table = pd.read_table(os.path.join(data_loc, node_data_files), header=None)