-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
101 lines (79 loc) · 2.89 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from snakemake.io import directory
VERSIONS = ['1']
OUTPUT_DIRS = "data"
MERGED_FILE = "merged.ttl"
ro_crate_metadata_dir = "ro-crate-metadata/"
rule all:
input:
"ro-crate-metadata"
rule source_ro_crates:
output:
"created_files.json"
shell:
"""
# Create the output directory if it doesn't exist:
mkdir -p {OUTPUT_DIRS}
# Add the current directory to PYTHONPATH, creating it if it doesn't exist
export PYTHONPATH="${{PYTHONPATH:+$PYTHONPATH:}}$(pwd)"
# Run the source_crates script to download the RO Crate metadata,
# then check the output files and generate created_files.json:
# - all versions of all workflows:
# python workflowhub_graph/source_crates.py --prod --all-versions
# python workflowhub_graph/check_outputs.py --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
# - all versions of first 10 workflows:
python workflowhub_graph/source_crates.py --workflow-ids 1-20 --prod --all-versions
python workflowhub_graph/check_outputs.py --workflow-ids 1-20 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
"""
rule report_created_files:
input:
"created_files.json"
shell:
"""
echo "Files created:"
cat created_files.json
"""
rule merge_files:
input:
"created_files.json"
output:
MERGED_FILE
run:
import json
import os
# Load the list of created files:
with open("created_files.json") as f:
created_files = json.load(f)
files_to_merge = [f"data/{os.path.basename(file)}" for file in created_files]
# If no files are available to merge, raise an exception:
if not files_to_merge:
raise ValueError("No files in to merge in data directory.")
file_patterns = " ".join(files_to_merge)
# Merge the JSON-LD files into a single RDF graph and output as a TTL file
shell(f"""
python workflowhub_graph/merge.py {output[0]} -p "data/*.json"
""")
rule create_ro_crate:
input:
MERGED_FILE
params:
workflow_file = "Snakefile"
output:
directory("ro-crate-metadata/")
shell:
"""
# Create a new virtual environment
python -m venv rocrate_env
# Activate the virtual environment
source rocrate_env/bin/activate
# Upgrade pip to avoid any potential issues
pip install --upgrade pip
# pip uninstall urllib3
# Install required packages
pip install requests urllib3 rocrate
# Run the create_ro_crate script
python workflowhub_graph/create_ro_crate.py {input} {params.workflow_file} {output}
# Deactivate the virtual environment
deactivate
# Remove the virtual environment to clean up
rm -rf rocrate_env
"""