diff --git a/Snakefile b/Snakefile
index 3035aa58..c65e738b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -35,7 +35,7 @@ def get_dataset(_datasets, label):
 algorithms = list(algorithm_params)
 algorithms_with_params = [f'{algorithm}-params-{params_hash}' for algorithm, param_combos in algorithm_params.items() for params_hash in param_combos.keys()]
 dataset_labels = list(_config.config.datasets.keys())
-dataset_gold_standard_pairs = [f"{dataset}-{gs_values['label']}" for gs_values in _config.config.gold_standards.values() for dataset in gs_values['datasets']]
+dataset_gold_standard_pairs = [f"{dataset}-{gs_values['label']}" for gs_values in _config.config.gold_standards.values() for dataset in gs_values['dataset_labels']]
 
 # Get algorithms that are running multiple parameter combinations
 def algo_has_mult_param_combos(algo):
diff --git a/config/config.yaml b/config/config.yaml
index ce832f15..20d2e4cd 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -119,17 +119,19 @@ datasets:
 
 gold_standard:
     -
+      # Labels can only contain letters, numbers, or underscores
+      label: data0
       label: gs0
       node_files: ["gs_nodes0.txt"]
       # edge_files: [] TODO: later iteration
       data_dir: "input"
       # Set of datasets (dataset labels) to compare with the specific gold standard dataset
-      datasets: ["data0"]
+      dataset_labels: ["data0"]
     -
       label: gs1
       node_files: ["gs_nodes1.txt"]
       data_dir: "input"
-      datasets: ["data1", "data0"]
+      dataset_labels: ["data1", "data0"]
 
 # If we want to reconstruct then we should set run to true.
 # TODO: if include is true above but run is false here, algs are not run.
diff --git a/spras/config.py b/spras/config.py
index 3fad39e9..8fa96694 100644
--- a/spras/config.py
+++ b/spras/config.py
@@ -151,16 +151,25 @@ def process_config(self, raw_config):
             if not bool(re.match(pattern, key)):
                 raise ValueError(f"Dataset label \'{key}\' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")
 
+        # parse gold standard information
         try:
             self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standard"]}
         except:
             self.gold_standards = {}
 
+        # check that gold_standard labels are formatted correctly
         for key in self.gold_standards:
             pattern = r'^\w+$'
             if not bool(re.match(pattern, key)):
                 raise ValueError(f"Gold standard label \'{key}\' contains invalid values. Gold standard labels can only contain letters, numbers, or underscores.")
 
+        # check that all the dataset labels in the gold standards are existing datasets labels
+        dataset_labels = set(self.datasets.keys())
+        gold_standard_dataset_labels = {dataset_label for value in self.gold_standards.values() for dataset_label in value['dataset_labels']}
+        for label in gold_standard_dataset_labels:
+            if label not in dataset_labels:
+                raise ValueError(f"Dataset label '{label}' provided in gold standards does not exist in the existing dataset labels.")
+
         # Code snipped from Snakefile that may be useful for assigning default labels
         # dataset_labels = [dataset.get('label', f'dataset{index}') for index, dataset in enumerate(datasets)]
         # Maps from the dataset label to the dataset list index
@@ -242,11 +251,8 @@ def process_config(self, raw_config):
         self.analysis_include_ml = raw_config["analysis"]["ml"]["include"]
         self.analysis_include_evalution = raw_config["analysis"]["evaluation"]["include"]
 
-        # COMMENT: the code will run correctly without this section below due to empty dict in try except above
-        # TODO: decide if this part is needed
         if self.gold_standards == {} and self.analysis_include_evalution == True:
-            print("Gold standard data not provided. Evaluation analysis cannot run.")
-            self.analysis_include_evalution = False
+            raise ValueError("Evaluation analysis cannot run as gold standard data not provided. Please set evaluation include to false or provide gold standard data.")
 
         if 'aggregate_per_algorithm' not in self.ml_params:
             self.analysis_include_ml_aggregate_algo = False
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 66893f07..a6e1a916 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -54,10 +54,10 @@ def load_files_from_dict(self, gold_standard_dict):
 
         returns: none
         """
-        self.label = gold_standard_dict["label"] # COMMENT: cannot be empty, will break with a NoneType exception
-        self.datasets = gold_standard_dict["datasets"] # COMMENT: can be empty, snakemake will not run evaluation due to dataset_gold_standard_pairs in snakemake file
+        self.label = gold_standard_dict["label"] # cannot be empty, will break with a NoneType exception
+        self.datasets = gold_standard_dict["dataset_labels"] # can be empty, snakemake will not run evaluation due to dataset_gold_standard_pairs in snakemake file
 
-        # COMMENT: cannot be empty, snakemake will run evaluation even if empty
+        # cannot be empty, snakemake will run evaluation even if empty
         node_data_files = gold_standard_dict["node_files"][0] # TODO: single file for now
 
         data_loc = gold_standard_dict["data_dir"]
@@ -80,13 +80,13 @@ def precision(file_paths: Iterable[Path], node_table: pd.DataFrame, output_file:
         @param node_table: the gold standard nodes
         @param output_file: the filename to save the precision of each pathway
         """
-        y_true = node_table['NODEID'].tolist()
+        y_true = set(node_table['NODEID'])
         results = []
 
         for file in file_paths:
             df = pd.read_table(file, sep="\t", header = 0, usecols=["Node1", "Node2"])
-            y_pred = list(set(df['Node1']).union(set(df['Node2'])))
-            all_nodes = set(y_true).union(set(y_pred))
+            y_pred = set(df['Node1']).union(set(df['Node2']))
+            all_nodes = y_true.union(y_pred)
             y_true_binary = [1 if node in y_true else 0 for node in all_nodes]
             y_pred_binary = [1 if node in y_pred else 0 for node in all_nodes]