commenting what causes the code to break when certain attributes are …

…left out of an evaluation set
ntalluri · Jul 24, 2024 · c3ae809 · c3ae809
1 parent 0a1e305
commit c3ae809
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 6 deletions.
diff --git a/spras/config.py b/spras/config.py
@@ -146,7 +146,6 @@ def process_config(self, raw_config):
         # Convert to dicts to simplify the yaml logging
         self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]}
 
-        # TODO: turn into try except
         try:
             self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standard"]}
         except:
@@ -238,7 +237,7 @@ def process_config(self, raw_config):
         self.analysis_include_ml = raw_config["analysis"]["ml"]["include"]
         self.analysis_include_evalution = raw_config["analysis"]["evaluation"]["include"]
 
-        # the code will run correctly without this section below
+        # COMMENT: the code will run correctly without this section below due to empty dict in try except above 
         # TODO: decide if this part is needed
         if self.gold_standards == {} and self.analysis_include_evalution == True:
             print("Gold standard data not provided. Evaluation analysis cannot run.")

diff --git a/spras/evaluation.py b/spras/evaluation.py
@@ -13,10 +13,10 @@ class Evaluation:
 
     def __init__(self, gold_standard_dict):
         self.label = None
+        self.datasets = None
         self.node_table = None
         # self.edge_table = None TODO: later iteration
         self.load_files_from_dict(gold_standard_dict)
-        self.datasets = None
         return
 
     def merge_gold_standard_input(gs_dict, gs_file):
@@ -54,10 +54,16 @@ def load_files_from_dict(self, gold_standard_dict):
 
         returns: none
         """
-        self.label = gold_standard_dict["label"]
-        self.datasets = gold_standard_dict["datasets"]
+        self.label = gold_standard_dict["label"] # COMMENT: cannot be empty, will break with a NoneType exception 
+        self.datasets = gold_standard_dict["datasets"] # COMMENT: can be empty, snakemake will not run evaluation due to dataset_gold_standard_pairs in snakemake file
+
+        try: 
+            # COMMENT: cannot be empty, snakemake will run evaluation even if empty
+            node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now 
+        except:
+            if not gold_standard_dict["node_files"]:
+                raise ValueError (f"Node_files for {self.label} is an empty list, cannot run evalution")
 
-        node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now
         data_loc = gold_standard_dict["data_dir"]
 
         single_node_table = pd.read_table(os.path.join(data_loc, node_data_files), header=None)