-
Notifications
You must be signed in to change notification settings - Fork 0
/
workflows.py
300 lines (293 loc) · 17.2 KB
/
workflows.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
################################################################
# Python Script for Setting Up and Running Workflows on OSCAR
################################################################
from getpass import getpass, getuser
from subprocess import call, PIPE, Popen
import shlex
#import fileinput
#import sys
import os
import re
# Note: Requires python 3 to run
#
# Overview: This python script asks the user for some basic inputs and then, based on these inputs, sets up their desired workflow tool on OSCAR
# Create a function to replace text in config file that gets used in main while loop
def replacetext(search_text,replace_text, file_changing):
# Opening the file in read and write mode
with open(file_changing,'r+') as f:
# Reading the file data and store it in a file variable
file = f.read()
# Replacing the pattern with the string in the file data
file = re.sub(search_text, replace_text, file)
# Setting the position to the top of the page to insert data
f.seek(0)
# Writing replaced data in the file
f.write(file)
# Truncating the file size
f.truncate()
# Return string updating user
return "HPC Default Resources Updated"
# Create function to get Nextflow system defaults
def get_defaults(string_you_want):
cmd = "grep %s %s" % (string_you_want, next_config)
args = shlex.split(cmd)
process = Popen(args, stdout=PIPE, stderr=PIPE)
result = process.communicate()
result=result[0]
result=result[2:len(result)]
result=str(result).replace("b'memory", "memory")
result=result.replace("//default mem", "")
result=result.replace("//default time", "")
result=result.replace("//default cpus", "")
result=result.split("\n")[0]
result=result.replace("\\n", "")
result=result[0:len(result)-1]
result=re.sub(' +', ' ', result)
return result
# Introductory message
print("\nWelcome to a program designed to help you easily set up and run workflow management systems on OSCAR!\n")
# get home directory, username, and config information for use later
homedir = os.path.expanduser("~")
username = getuser()
next_config=homedir + '/nextflow_setup/nextflow.config'
snake_config=homedir + '/.config/snakemake/oscar/config.yaml'
# Main while loop
while True:
# First ask user if they'd like to use Nextflow or Snakemake
program=input("Please type which software you wish to use: Nextflow or Snakemake? ")
program=program.lower()
program=program.strip()
# If Nextflow, we first make sure we have set up the environment so that user can run Nextflow on OSCAR
if program in ("nextflow", "snakemake"):
# Nextflow software
if program=="nextflow":
# Set up environment, if it doesnt exist
try:
os.system('cp -r ~/workflows_on_OSCAR/install_me/nextflow_setup ~/nextflow_setup')
bash_file='bash ' + homedir + '/nextflow_setup/nextflow_env.sh'
os.system(bash_file)
except OSError:
print("Nextflow software setup error, please contact: [email protected]")
break
# Ask for inputs for scm file so that Nextflow has github access
git_user=input("What is your GitHub user name? ")
git_token = getpass(prompt="What is your GitHub token (we will keep this secret) - [Hit Enter when Done]? ")
# Reconfigure scm file
scm_file=homedir + '/nextflow_setup/scm'
newfile=open(scm_file,'w')
txt= "providers {\n\n github {\n user = " + git_user + "\n password = " + git_token + "\n }\n\n}"
newfile.write(txt)
# Make scm file readable, writeable only by user to protect git token
os.system('chmod u+rw,go-rw ' + scm_file)
# Move to needed location in order for nextflow to work with nf-core
os.system('mv ' + scm_file + ' $HOME/.nextflow/scm')
# Prompt user to specify HPC resources, if desired
default_resources=get_defaults("//default")
print("Currently the Nextflow default for HPC resources is: " + default_resources)
hpc="empty"
while True:
if hpc in ("yes", "no"):
break
else:
hpc=input("Do you want to change these default resources for your Nextflow pipeline [Yes or No]? ")
hpc=hpc.lower()
hpc=hpc.strip()
if hpc not in ("yes", "no"):
print("Please enter either Yes or No.")
if hpc=="yes":
while True:
try:
mem=input("How much memory (in GB) would you like? ")
mem=int(''.join(filter(str.isdigit, mem)))
if mem < 1 :
print("Enter an integer greater than zero.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
while True:
try:
time=input("How much time (in hours, with 1 being the minimum) would you like? ")
time=int(''.join(filter(str.isdigit, time)))
if time < 1:
print("Please enter an integer greater than zero.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
while True:
try:
cpu_request=input("How many cores (i.e., cpus) would you like? ")
cpu_request=int(''.join(filter(str.isdigit, cpu_request)))
if cpu_request < 1:
print("Please enter an integer greater than zero.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
default_numbers=[int(s) for s in default_resources.replace(".", " ").split() if s.isdigit()]
replacements = {"memory = " + str(default_numbers[0]) + ".GB //default mem": 'memory = ' + str(mem) + '.GB //default mem',
'time = ' + str(default_numbers[1]) + '.h //default time':'time = ' + str(time) + '.h //default time',
'cpus = ' + str(default_numbers[2]) + ' //default cpus':'cpus = ' + str(cpu_request) + ' //default cpus'}
for i in replacements.keys():
replacetext(i, replacements[i], next_config)
# Move nextflow.config file to .nextflow folder so it is a software default
os.system('cp ' + next_config + ' $HOME/.nextflow/config')
# Print output messaging for user
print("\nOUTPUT MESSAGE:")
header="""
******************************************************************
NEXTFLOW is now set up and configured and ready to run on OSCAR!
******************************************************************
"""
print(header)
print_1=get_defaults("//default")
print("\nYour default resources for Nextflow are: " + print_1 + "\n")
instructions = """
To further customize your pipeline for efficiency, you can enter the following
label '<LabelName>' options right within processes in your Nextflow .nf script:
1. label 'OSCAR_small_job' (comes with memory = 4 GB, time = 1 hour, cpus = 1)
2. label 'OSCAR_medium_job' (comes with memory = 8 GB, time = 16 hours, cpus = 2)
3. label 'OSCAR_large_job' (comes with memory = 16 GB, time = 24 hours, cpus = 2)
"""
print(instructions)
print("\nREADME:")
print("\nPlease see https://github.com/compbiocore/workflows_on_OSCAR for further details on how to add the above label options to your workflow.\n")
print("Note the setup is designed such that pipelines downloaded from nf-core with their own resource specs within the .nf script will override your defaults.\n")
next_dir=homedir + "/nextflow_env_" + username + "/bin/activate"
print("To run Nextflow commands, you must first type and run the nextflow_start command.\n")
print("To further learn how to easily run your Nextflow pipelines on OSCAR, use the Nextflow template shell script located in your ~/nextflow_setup directory.\n")
if hpc=="no":
# Move nextflow.config file to .nextflow folder so it is a software default
os.system('cp ' + next_config + ' $HOME/.nextflow/config')
# Print output messaging for user
print("Keeping defaults!")
print("\nOUTPUT MESSAGE:")
header="""
******************************************************************
NEXTFLOW is now set up and configured and ready to run on OSCAR!
******************************************************************
"""
print(header)
print_2=get_defaults("//default")
print("\nYour default resources for Nextflow are: " + print_2 + "\n")
instructions = """
To further customize your pipeline for efficiency, you can enter the following
label '<LabelName>' options right within processes in your Nextflow .nf script:
1. label 'OSCAR_small_job' (comes with memory = 4 GB, time = 1 hour, cpus = 1)
2. label 'OSCAR_medium_job' (comes with memory = 8 GB, time = 16 hours, cpus = 2)
3. label 'OSCAR_large_job' (comes with memory = 16 GB, time = 24 hours, cpus = 2)
"""
print(instructions)
print("\nREADME:")
print("\nPlease see https://github.com/compbiocore/workflows_on_OSCAR for further details on how to add the above label options to your workflow.\n")
print("Note the setup is designed such that pipelines downloaded from nf-core with their own resource specs within the .nf script will override your defaults.\n")
next_dir=homedir + "/nextflow_env_" + username + "/bin/activate"
print("To run Nextflow commands, you must first type and run the nextflow_start command.\n")
print("To further learn how to easily run your Nextflow pipelines on OSCAR, use the Nextflow template shell script located in your ~/nextflow_setup directory.\n")
break
# Snakemake software
if program=="snakemake":
# Set up environment, if it doesnt exist
try:
os.system('cp -r ~/workflows_on_OSCAR/install_me/snakemake_setup ~/snakemake_setup')
bash_file='bash ' + homedir + '/snakemake_setup/snakemake_env.sh'
os.system(bash_file)
except OSError:
print("Snakemake software setup error, please contact: [email protected]")
break
# Ask for inputs for configuring setup
user_email=input("\nWhat is your Brown email address? ")
new_mail="mail-user: " + user_email
os.system("sed -i.bak 's/mail-user:.*/" + new_mail + "/g' " + snake_config)
# Prompt user to specify HPC resources, if desired
default_resources=os.popen('grep "^default-resources:" ' + snake_config).read()
default_resources=default_resources.split('[', 1)[1].split(']')[0]
print("\nCurrently the Snakeflow default for HPC resources is: " + default_resources)
hpc="empty"
while True:
if hpc in ("yes", "no"):
break
else:
hpc=input("Do you want to change these default resources for your Snakemake pipeline [Yes or No]? ")
hpc=hpc.lower()
hpc=hpc.strip()
if hpc not in ("yes", "no"):
print("Please enter either Yes or No.")
if hpc=="yes":
while True:
try:
mem=input("How much memory (in GB) would you like? ")
mem=int(''.join(filter(str.isdigit, mem)))
mem=int(mem)*1000
if mem < 1000 or mem >= 200000:
print("Enter an integer greater than zero and less than 200.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
while True:
try:
time=input("How much time (in minutes, with 5 being the minimum) would you like? ")
time=int(''.join(filter(str.isdigit, time)))
if time < 5:
print("Please enter an integer equal to or greater than 5.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
while True:
try:
cpu_request=input("How many cores (i.e., cpus) would you like? ")
cpu_request=int(''.join(filter(str.isdigit, cpu_request)))
if cpu_request < 1 or cpu_request >= 32:
print("Please enter an integer greater than zero and less than 32.")
else:
break
except ValueError:
print("Invalid entry, please give a number.")
default_numbers=default_resources.replace("=", " ")
default_numbers=default_numbers.replace(",", " ")
default_numbers=[int(s) for s in default_numbers.split() if s.isdigit()]
replacements = {"cpus=" + str(default_numbers[0]) + ",": 'cpus=' + str(cpu_request) + ',',
'mem_mb=' + str(default_numbers[1]) + ',':'mem_mb=' + str(mem) + ',',
'time_min=' + str(default_numbers[2]) + ']':'time_min=' + str(time) + ']'}
for i in replacements.keys():
replacetext(i, replacements[i], snake_config)
# Print output messaging for user
print("\nOUTPUT MESSAGE:")
header="""
***********************************************************************
~~Snakemake~~ is now set up and configured and ready to run on OSCAR!
***********************************************************************
"""
print(header)
print_1=os.popen('grep "^default-resources:" ' + snake_config).read()
print_1=print_1.split('[', 1)[1].split(']')[0]
print("\nYour default resources for Snakemake are: " + print_1 + "\n")
print("\nREADME:")
print("\nPlease see https://github.com/compbiocore/workflows_on_OSCAR for further details on how to configure and run your Snakemake workflows.\n")
print("Note that the setup is designed such that snakefiles can specify tasks with their own resource specs that override the defaults.\n")
print("To run Snakemake commands, you must first type and run the snakemake_start command.\n")
print("To further learn now to easily run your Snakemake pipelines on OSCAR, use the Snakemake template shell script located in your ~/snakemake_setup directory.\n")
if hpc=="no":
# Print output messaging for user
print("Keeping defaults!")
print("\nOUTPUT MESSAGE:")
header="""
***********************************************************************
~~Snakemake~~ is now set up and configured and ready to run on OSCAR!
***********************************************************************
"""
print(header)
print_1=os.popen('grep "^default-resources:" ' + snake_config).read()
print_1=print_1.split('[', 1)[1].split(']')[0]
print("\nYour default resources for Snakemake are: " + print_1 + "\n")
print("\nREADME:")
print("\nPlease see https://github.com/compbiocore/workflows_on_OSCAR for further details on how to configure and run your Snakemake workflows.\n")
print("Note that the setup is designed such that snakefiles can specify tasks with their own resource specs that override the defaults.\n")
print("To run Snakemake commands, you must first type and run the snakemake_start command.\n")
print("To further learn how to easily run your Snakemake pipelines on OSCAR, use the Snakemake template shell script located in your ~/snakemake_setup directory.\n")
break
else:
print("Please re-enter a valid option.")