-
Notifications
You must be signed in to change notification settings - Fork 12
/
get_vertex_df.py
203 lines (148 loc) · 9.37 KB
/
get_vertex_df.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import sys
import numpy as np
import os
import pandas as pd
from os.path import exists
from nibabel.freesurfer.io import read_morph_data, read_annot
from nibabel.freesurfer.mghformat import load
from collections import defaultdict
from MIND_helpers import calculate_mind_network, is_outlier
def get_vertex_df(surf_dir, features, parcellation):
'''
INPUT SPECIFICATIONS:
• surf_dir (str) : This is a string the location containing all relevant directories output by FreeSurfer (i.e. label, mri, surf).
• features (list):
This function accepts the "features" argument as a list containing items in the following forms:
str:
• One of ['CT','Vol','SA','MC','SD']. In this form, the function will automatically assume that the requested features are found in the surf_dir/surf directory and correspond to the following files:
CT: ?h.thickness
Vol: ?h.volume
SA: ?h.area
MC: ?h.curv
SD: ?h.sulc
• You may also pass a string in the form 'thickness', 'volume', 'sulc', etc, which refer directly to default files already found in the surf_dir/surf directory for both rh and lh.
For example, the function will interpret the entry 'thickness' to refer to the files surf_dir/surf/lh.thickness and surf_dir/surf/rh.thickness
• Finally, you may pass a string in the form of a FULL path as follows: with a question mark '?' indicating the position specifying the hemisphere such as "full/path/to/?h.feature".
Using this formulation will cause the command to look for files that exactly match both "full/path/to/lh.feature" and "full/path/to/rh.feature". If the left and right version of the files aren't exactly the same otherwise, this won't work.
tuple: (path/to/lh_surface_feature, path/to/rh_surface_feature)
• If you would rather pass other features directly into the function, you must specify the locations (using paths) of both the left and right versions of each desired feature as a tuple.
**The files must be readable by nibabel's read_morph_data function, i.e. in FreeSurfer's surface format!***
So for example, if you have used the provided register_and_vol2surf function to generate surface maps of fractional anisotropy in a separate folder, you could pass them as an element in the list like:
(path/to/lh.FA.mgh, path/to/rh.FA.mgh)
A valid list of feature values combining these different input types would therefore be: ['CT','SD',(path/to/lh_feature1, path/to/rh_feature1), (path/to/lh_feature2, path/to/rh_feature2)]
• parcellation (str): This is a string the location containing parcellation scheme to be used. The files 'lh.' + parcellation + '.annot' and 'rh.' + parcellation + '.annot' must exist inside the surf_dir/label directory.
'''
#specify data locations
surfer_location = surf_dir + '/'
#Check inputs!
if (exists(surfer_location + '/label/lh.' + parcellation + '.annot') == False) or (exists(surfer_location + '/label/rh.' + parcellation + '.annot') == False):
raise Exception('Parcellation files not found.')
all_shorthand_features = ['CT','Vol','SA','MC','SD']
all_shorthand_features_dict = dict(zip(all_shorthand_features, ['thickness','volume','area','curv','sulc']))
lh_feature_locs = []
rh_feature_locs = []
#Check feature inputs, store location of files
for feature in features:
if feature in all_shorthand_features:
lh_loc = surfer_location + 'surf/lh.' + all_shorthand_features_dict[feature]
rh_loc = surfer_location + 'surf/rh.' + all_shorthand_features_dict[feature]
if (exists(lh_loc) == False) or (exists(rh_loc) == False):
raise Exception('Feature for input "' + feature +'" not found.')
else:
lh_feature_locs.append(lh_loc)
rh_feature_locs.append(rh_loc)
elif type(feature) is str:
if len(feature.split('/')) == 1:
lh_loc = surfer_location + 'surf/lh.' + feature
rh_loc = surfer_location + 'surf/rh.' + feature
if (exists(lh_loc) == False) or (exists(rh_loc) == False):
raise Exception('Feature for input "' + feature +'" not found.')
else:
lh_feature_locs.append(lh_loc)
rh_feature_locs.append(rh_loc)
else:
lh_loc = feature.split('/')
lh_loc[-1] = 'l' + lh_loc[-1][1:]
lh_loc = '/'.join(lh_loc)
rh_loc = feature.split('/')
rh_loc[-1] = 'r' + rh_loc[-1][1:]
rh_loc = '/'.join(rh_loc)
if (exists(lh_loc) == False) or (exists(rh_loc) == False):
raise Exception('Feature for input "' + feature +'" not found.')
else:
lh_feature_locs.append(lh_loc)
rh_feature_locs.append(rh_loc)
elif type(feature) is tuple:
lh_loc = feature[0]
rh_loc = feature[1]
if (exists(lh_loc) == False) or (exists(rh_loc) == False):
raise Exception('Feature for input "' + feature[0] +'" or "' + feature[1] +'" not found.')
else:
lh_feature_locs.append(lh_loc)
rh_feature_locs.append(rh_loc)
else:
raise Exception('Unrecognized format for feature input: ', feature)
#Get annotation files
lh_annot = read_annot(surfer_location + '/label/lh.' + parcellation + '.annot', orig_ids = True)
rh_annot = read_annot(surfer_location + '/label/rh.' + parcellation + '.annot', orig_ids = True)
annot_dict = {'lh':lh_annot, 'rh':rh_annot}
'''
The regions in the lh and rh need to be renamed and distinct.
So, here we append lh_ or rh_ to the front of each region name and make a conversion dict.
This will likely need to change for different processing pipelines (FS versions) and datasets etc.
so make sure this dict is correct and looks good.
'''
lh_region_names = ['lh_' + str(x).split("'")[1] for x in lh_annot[2]]
rh_region_names = ['rh_' + str(x).split("'")[1] for x in rh_annot[2]]
lh_convert_dict = dict(zip(lh_annot[1][:,-1], lh_region_names))
rh_convert_dict = dict(zip(rh_annot[1][:,-1], rh_region_names))
convert_dicts = {'lh': lh_convert_dict,\
'rh': rh_convert_dict}
used_labels_l = np.intersect1d(np.unique(lh_annot[0]), list(lh_convert_dict.keys()))
used_labels_r = np.intersect1d(np.unique(rh_annot[0]), list(rh_convert_dict.keys()))
used_labels = {'lh': used_labels_l,\
'rh': used_labels_r}
used_regions_l = np.array([value for key, value in lh_convert_dict.items() if key in used_labels_l])
used_regions_r = np.array([value for key, value in rh_convert_dict.items() if key in used_labels_r])
combined_regions = np.hstack((used_regions_l, used_regions_r))
unknown_regions = [x for x in combined_regions if (('?' in x) | ('unknown' in x) | ('Unknown' in x) | ('Medial_Wall' in x) | (len(x) == 3))]
combined_regions = np.array([x for x in combined_regions if x not in unknown_regions])
vertex_data_dict = defaultdict()
#Now load up all the vertex-level data!
for hemi in ['lh','rh']:
print(hemi)
hemi_data_dict = defaultdict()
if hemi == 'lh':
print('Loading left hemisphere data:')
for i, lh_feature_loc in enumerate(lh_feature_locs):
print(lh_feature_loc)
#check for mgh/mgz format vs regular curv files
if lh_feature_loc.endswith('mgh') or lh_feature_loc.endswith('mgz'):
hemi_data_dict['Feature_' + str(i)] = load(lh_feature_loc).get_fdata().flatten()
else:
hemi_data_dict['Feature_' + str(i)] = read_morph_data(lh_feature_loc)
elif hemi == 'rh':
print('Loading right hemisphere data:')
for i, rh_feature_loc in enumerate(rh_feature_locs):
if rh_feature_loc.endswith('mgh') or rh_feature_loc.endswith('mgz'):
hemi_data_dict['Feature_' + str(i)] = load(rh_feature_loc).get_fdata().flatten()
else:
hemi_data_dict['Feature_' + str(i)] = read_morph_data(rh_feature_loc)
used_features = list(hemi_data_dict.keys())
print(used_features)
hemi_data = np.zeros((len(used_features) + 1, len(annot_dict[hemi][0])))
hemi_data[0] = annot_dict[hemi][0]
for i, feature in enumerate(used_features):
print(i, feature)
hemi_data[i + 1] = hemi_data_dict[feature]
col_names = ['Label'] + used_features
hemi_data = pd.DataFrame(hemi_data.T, columns = col_names)
#Select only the vertices that map to regions.
hemi_data = hemi_data.loc[hemi_data['Label'].isin(used_labels[hemi])]
hemi_data["Label"] = hemi_data["Label"].map(convert_dicts[hemi])
vertex_data_dict[hemi] = hemi_data
vertex_data = pd.concat([vertex_data_dict['lh'], vertex_data_dict['rh']], ignore_index = True)
#Output data
print("features used: ")
print(used_features)
return vertex_data, combined_regions, used_features