forked from yeatmanlab/pyAFQ
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cloudknot_hcp_example.py
209 lines (174 loc) · 8.21 KB
/
cloudknot_hcp_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""
==========================
AFQ with HCP data
==========================
This example demonstrates how to use the AFQ API to analyze HCP data.
For this example to run properly, you will need to gain access to the HCP data.
This can be done by following this instructions on the webpage
`here <https://wiki.humanconnectome.org/display/PublicData/How+To+Connect+to+Connectome+Data+via+AWS>`_.
We will use the ``Cloudknot`` library to run our AFQ analysis in the AWS
Batch service (see also
`this example <http://yeatmanlab.github.io/pyAFQ/auto_examples/cloudknot_example.html>`_).
In the following we will use ``Cloudknot`` to run multiple
configurations of pyAFQ on the HCP dataset. Specifically, here we will run
pyAFQ with different tractography seeding strategies.
"""
##########################################################################
# Import cloudknot and set the correct region. The HCP data is stored in `us-east-1`, so it's best
# to analyze it there.
import configparser
import itertools
import cloudknot as ck
import os.path as op
ck.set_region('us-east-1')
##########################################################################
# Define a function to run. This function allows us to pass in the subject ID for the subjects we would
# like to analyze , as well as strategies for seeding tractography (different masks and/or different
# numbers of seeds per voxel).
def afq_process_subject(subject, seed_mask, n_seeds,
aws_access_key, aws_secret_key):
# define a function that each job will run
# In this case, each process does a single subject
import logging
import s3fs
# all imports must be at the top of the function
# cloudknot installs the appropriate packages from pip
from AFQ.data.fetch import fetch_hcp
from AFQ.api.group import GroupAFQ
import AFQ.definitions.image as afm
# set logging level to your choice
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
# Download the given subject to the AWS Batch machine from s3
_, hcp_bids = fetch_hcp(
[subject],
profile_name=False,
study="HCP_1200",
aws_access_key_id=aws_access_key,
aws_secret_access_key=aws_secret_key)
# We make a new seed mask for each process based off of the
# seed_mask argument, which is a string.
# This is to avoid any complications with pickling the masks.
if seed_mask == "roi":
seed_mask_obj = afm.RoiImage()
elif seed_mask == "fa":
seed_mask_obj = afm.ScalarImage("dti_fa")
else:
seed_mask_obj = afm.FullImage()
# Determined if n_seeds is per voxel or not
if n_seeds > 3:
random_seeds = True
else:
random_seeds = False
# set the tracking_params based off our inputs
tracking_params = {
"seed_mask": seed_mask_obj,
"n_seeds": n_seeds,
"random_seeds": random_seeds}
# use segmentation file from HCP to get a brain mask,
# where everything not labelled 0 is considered a part of the brain
brain_mask_definition = afm.LabelledImageFile(
suffix='seg', filters={'scope': 'dmriprep'},
exclusive_labels=[0])
# define the api GroupAFQ object
myafq = GroupAFQ(
hcp_bids,
brain_mask_definition=brain_mask_definition,
tracking_params=tracking_params)
# export_all runs the entire pipeline and creates many useful derivates
myafq.export_all()
# upload the results to some location on s3
myafq.upload_to_s3(
s3fs.S3FileSystem(),
(f"my_study_bucket/my_study_prefix_{seed_mask}_{n_seeds}"
f"/derivatives/afq"))
##########################################################################
# In this example, we will process the data from the following subjects
subjects = ["103818", "105923", "111312"]
##########################################################################
# We will test combinations of different conditions:
# subjects, seed masks, and number of seeds
seed_mask = ["fa", "roi"]
n_seeds = [1, 2, 1000000, 2000000]
##########################################################################
# The following function creates all the combinations of the above lists, such that every subject is
# run with every mask and every number of seeds.
args = list(itertools.product(subjects, seed_mask, n_seeds))
##########################################################################
# We assume that the credentials for HCP usage are stored in the home directory in a
# `~/.aws/credentials` file. This is where these credentials are stored if the AWS CLI is used to
# configure the profile. We use the standard lib ``configparser`` library
# to get the relevant hcp keys from there.
CP = configparser.ConfigParser()
CP.read_file(open(op.join(op.expanduser('~'), '.aws', 'credentials')))
CP.sections()
aws_access_key = CP.get('hcp', 'AWS_ACCESS_KEY_ID')
aws_secret_key = CP.get('hcp', 'AWS_SECRET_ACCESS_KEY')
##########################################################################
# The following function will attach your AWS keys to each list in a list of lists
# We use this with each list being a list of arguments,
# and we append the AWS keys to each list of arguments, so that we can pass
# them into the function to be used on AWS Batch to download the data into the
# AWS Batch machines.
def attach_keys(list_of_arg_lists):
new_list_of_arg_lists = []
for args in list_of_arg_lists:
arg_ls = list(args)
arg_ls.extend([aws_access_key, aws_secret_key])
new_list_of_arg_lists.append(arg_ls)
return new_list_of_arg_lists
##########################################################################
# This calls the function to attach the access keys to the argument list
args = attach_keys(args)
##########################################################################
# Define the :meth:`Knot` object to run your jobs on. See
# `this example <http://yeatmanlab.github.io/pyAFQ/auto_examples/cloudknot_example.html>`_ for more
# details about the arguments to the object.
knot = ck.Knot(
name='afq-hcp-tractography-201110-0',
func=afq_process_subject,
base_image='python:3.8',
image_github_installs="https://github.com/yeatmanlab/pyAFQ.git",
pars_policies=('AmazonS3FullAccess',),
bid_percentage=100)
##########################################################################
# This launches a process for each combination.
# Because `starmap` is `True`, each list in `args` will be unfolded
# and passed into `afq_process_subject` as arguments.
result_futures = knot.map(args, starmap=True)
##########################################################################
# The following function can be called repeatedly in a jupyter notebook
# to view the progress of jobs::
#
# knot.view_jobs()
#
# You can also view the status of a specific job::
#
# knot.jobs[0].status
##########################################################################
# When all jobs are finished, remember to clobber the knot to destroy all the resources that were
# created in AWS.
result_futures.result() # waits for futures to resolve, not needed in notebook
knot.clobber(clobber_pars=True, clobber_repo=True, clobber_image=True)
##########################################################################
# We continue processing to create another knot which takes the resulting profiles of each
# combination and combines them all into one csv file
def afq_combine_profiles(seed_mask, n_seeds):
from AFQ.api import download_and_combine_afq_profiles
download_and_combine_afq_profiles(
"my_study_bucket", f"my_study_prefix_{seed_mask}_{n_seeds}")
knot2 = ck.Knot(
name='afq_combine_subjects-201110-0',
func=afq_combine_profiles,
base_image='python:3.8',
image_github_installs="https://github.com/yeatmanlab/pyAFQ.git",
pars_policies=('AmazonS3FullAccess',),
bid_percentage=100)
##########################################################################
# the arguments to this call to :meth:`map` are all the different configurations of pyAFQ that we ran
seed_mask = ["fa", "roi"]
n_seeds = [1, 2, 1000000, 2000000]
args = list(itertools.product(seed_mask, n_seeds))
result_futures2 = knot2.map(args, starmap=True)
result_futures2.result()
knot2.clobber(clobber_pars=True, clobber_repo=True, clobber_image=True)