From c3ae809ddc5d51dce13c15c5580da062041871a1 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Wed, 24 Jul 2024 11:10:26 -0500
Subject: [PATCH] commenting what causes the code to break when certain
 attributes are left out of an evaluation set

---
 spras/config.py     |  3 +--
 spras/evaluation.py | 14 ++++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/spras/config.py b/spras/config.py
index ccc507b5..4f97dfba 100644
--- a/spras/config.py
+++ b/spras/config.py
@@ -146,7 +146,6 @@ def process_config(self, raw_config):
         # Convert to dicts to simplify the yaml logging
         self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]}
 
-        # TODO: turn into try except
         try:
             self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standard"]}
         except:
@@ -238,7 +237,7 @@ def process_config(self, raw_config):
         self.analysis_include_ml = raw_config["analysis"]["ml"]["include"]
         self.analysis_include_evalution = raw_config["analysis"]["evaluation"]["include"]
 
-        # the code will run correctly without this section below
+        # COMMENT: the code will run correctly without this section below due to empty dict in try except above 
         # TODO: decide if this part is needed
         if self.gold_standards == {} and self.analysis_include_evalution == True:
             print("Gold standard data not provided. Evaluation analysis cannot run.")
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 49d060dd..bde5a316 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -13,10 +13,10 @@ class Evaluation:
 
     def __init__(self, gold_standard_dict):
         self.label = None
+        self.datasets = None
         self.node_table = None
         # self.edge_table = None TODO: later iteration
         self.load_files_from_dict(gold_standard_dict)
-        self.datasets = None
         return
 
     def merge_gold_standard_input(gs_dict, gs_file):
@@ -54,10 +54,16 @@ def load_files_from_dict(self, gold_standard_dict):
 
         returns: none
         """
-        self.label = gold_standard_dict["label"]
-        self.datasets = gold_standard_dict["datasets"]
+        self.label = gold_standard_dict["label"] # COMMENT: cannot be empty, will break with a NoneType exception 
+        self.datasets = gold_standard_dict["datasets"] # COMMENT: can be empty, snakemake will not run evaluation due to dataset_gold_standard_pairs in snakemake file
+
+        try: 
+            # COMMENT: cannot be empty, snakemake will run evaluation even if empty
+            node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now 
+        except:
+            if not gold_standard_dict["node_files"]:
+                raise ValueError (f"Node_files for {self.label} is an empty list, cannot run evalution")
 
-        node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now
         data_loc = gold_standard_dict["data_dir"]
 
         single_node_table = pd.read_table(os.path.join(data_loc, node_data_files), header=None)