Skip to content

Commit

Permalink
Merge branch 'master' into implement-eval
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri authored Aug 15, 2024
2 parents fe5811a + b4f8d51 commit 8493f97
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 1 deletion.
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ algorithms:
# Assume that if a dataset label does not change, the lists of associated input files do not change
datasets:
-
# Labels can only contain letters, numbers, or underscores
label: data0
node_files: ["node-prizes.txt", "sources.txt", "targets.txt"]
# DataLoader.py can currently only load a single edge file, which is the primary network
Expand Down
2 changes: 1 addition & 1 deletion config/egfr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ datasets:
data_dir: input
edge_files:
- phosphosite-irefindex13.0-uniprot.txt
label: tps-egfr
label: tps_egfr
node_files:
- tps-egfr-prizes.txt
other_files: []
Expand Down
5 changes: 5 additions & 0 deletions spras/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ def process_config(self, raw_config):
# When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
# Convert to dicts to simplify the yaml logging
self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]}

for key in self.datasets:
pattern = r'^\w+$'
if not bool(re.match(pattern, key)):
raise ValueError(f"Dataset label \'{key}\' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")

try:
self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standard"]}
Expand Down
17 changes: 17 additions & 0 deletions test/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,20 @@ def test_config_container_registry(self):
test_config["container_registry"]["owner"] = ""
config.init_global(test_config)
assert (config.config.container_prefix == config.DEFAULT_CONTAINER_PREFIX)

def test_error_dataset_label(self):
test_config = get_test_config()
error_test_dicts = [{"label":"test$"}, {"label":"@test'"}, {"label":"[test]"}, {"label":"test-test"}, {"label":"✉"}]

for test_dict in error_test_dicts:
test_config["datasets"]= [test_dict]
with pytest.raises(ValueError): #raises error if any chars other than letters, numbers, or underscores are in dataset label
config.init_global(test_config)

def test_correct_dataset_label(self):
test_config = get_test_config()
correct_test_dicts = [{"label":"test"}, {"label":"123"}, {"label":"test123"}, {"label":"123test"}, {"label":"_"}, {"label":"test_test"}, {"label":"_test"}, {"label":"test_"}]

for test_dict in correct_test_dicts:
test_config["datasets"]= [test_dict]
config.init_global(test_config) # no error should be raised

0 comments on commit 8493f97

Please sign in to comment.