Skip to content

Commit

Permalink
start of evaluation implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Jul 9, 2024
1 parent 0557289 commit f001a6a
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 0 deletions.
7 changes: 7 additions & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ def make_final_input(wildcards):
final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos,algorithm_params=algorithms_with_params))
final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos,algorithm_params=algorithms_with_params))

# if _config.config.evaluation_include:
# final_input.extend(expand('{out_dir}{sep}{dataset}-{goldstandard}.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos,algorithm_params=algorithms_with_params))

if len(final_input) == 0:
# No analysis added yet, so add reconstruction output files if they exist.
# (if analysis is specified, these should be implicitly run).
Expand Down Expand Up @@ -153,6 +156,10 @@ rule merge_input:
dataset_dict = get_dataset(_config.config.datasets, wildcards.dataset)
runner.merge_input(dataset_dict, output.dataset_file)

# TODO: add a merge input for gold standard data?
# may need to update runner.py to add a merge_gs_input function


# The checkpoint is like a rule but can be used in dynamic workflows
# The workflow directed acyclic graph is re-evaluated after the checkpoint job runs
# If the checkpoint has not executed for the provided wildcard values, it will be run and then the rest of the
Expand Down
9 changes: 9 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ datasets:
other_files: []
# Relative path from the spras directory
data_dir: "input"

gold_standard:
-
label: gs
node_files: ["gs_nodes.txt"]
# edge_files: [] TODO: later iteration
data_dir: "input"

# If we want to reconstruct then we should set run to true.
# TODO: if include is true above but run is false here, algs are not run.
Expand Down Expand Up @@ -156,3 +163,5 @@ analysis:
linkage: 'ward'
# 'euclidean', 'manhattan', 'cosine'
metric: 'euclidean'
evaluation:
include: true
46 changes: 46 additions & 0 deletions spras/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import pickle as pkl
import warnings

import pandas as pd

class Evaluation:

def __init__(self, gold_standard_dict):
self.label = None
self.node_table = None
# self.edge_table = None TODO: later iteration
self.load_files_from_dict(gold_standard_dict)
return

def to_file(self, file_name):
"""
Saves dataset object to pickle file
"""
with open(file_name, "wb") as f:
pkl.dump(self, f)

@classmethod
def from_file(cls, file_name):
"""
Loads dataset object from a pickle file.
Usage: dataset = Dataset.from_file(pickle_file)
"""
with open(file_name, "rb") as f:
return pkl.load(f)

def load_files_from_dict(self, gold_standard_dict):

self.label = gold_standard_dict["label"]
node_data_files = gold_standard_dict["node_files"]
data_loc = gold_standard_dict["data_dir"]

single_node_table = pd.read_table(os.path.join(data_loc, node_file))
self.node_table = single_node_table

# self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID])
# for loop? and read in node dataset into a pandas df

def percision_recall():
None
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html

0 comments on commit f001a6a

Please sign in to comment.