-
Notifications
You must be signed in to change notification settings - Fork 6
/
data_simulation.py
146 lines (128 loc) · 5.4 KB
/
data_simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
This is a fork from parietal.probabilistic_parcellation.data_simulation.
Do not use it if you ahve access to the original one.
This module generates spatial data
to be studied in the parcellation framework
Author: Bertrand Thirion, 2013
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import Ward, KMeans
def generate_data(mu=0, sigma1=1, sigma2=1, n_voxels=1, n_subjects=1, seed=1,
beta=None):
"""Generate data according to a mixed effects variance model"""
if seed is not None:
np.random.seed([seed])
if beta == None:
beta = sigma2 * np.random.randn(n_subjects)
u = np.repeat(np.arange(n_subjects), n_voxels)
X = np.zeros((n_voxels * n_subjects, n_subjects))
X[np.arange(n_voxels * n_subjects), u] = 1
y = mu + np.dot(X, beta) + sigma1 * np.random.randn(n_voxels * n_subjects)
return y, u
def generate_data_jitter(mu=0, sigma1=1, sigma2=1, masks=[], seed=1, beta=None):
"""Generate data according to a mixed effects variance model,
but with additional jitter"""
n_subjects = len(masks)
if seed is not None:
np.random.seed([seed])
if beta == None:
beta = sigma2 * np.random.randn(n_subjects)
u = np.hstack([np.ones(masks[s].sum()) * s for s in range(n_subjects)])\
.astype(np.int)
X = np.zeros((len(u), n_subjects))
X[np.arange(len(u)), u] = 1
y = mu + np.dot(X, beta) + sigma1 * np.random.randn(len(u))
return y, u
def generate_spatial_data(shape=(40, 40), n_subjects=1, n_parcels=1, mask=None,
mu=None, sigma1=None, sigma2=None, model='ward',
seed=1, smooth=0, jitter=0., verbose=0):
""" Generate a dataset
Parameters
==========
shape: tuple, optional
dimensions of the spatial model (assuming a grid)
n_subjects: int, optional, the number of subjects considered
n_parcels: int, optional, the number of generated parcels
mask: array of shape (shape), domain-defining binary mask
mu: array of shape (n_parcels), the mean of the simulated parcels
sigma1: array of shape (n_parcels),
the first-level variance of the simulated parcels
sigma2: array of shape (n_parcels),
the second-level variance of the simulated parcels
model: string, one of ['ward, kmeans'],
model used to generate the parcellation
seed: int, optional, random generator seed
smooth: float optional,
posterior smoothing of the data
jitter: float, optional,
spatial jitter on the positions
verbose: boolean, optional, verbosity mode
Returns
=======
xyz: array of shape (n_voxels, 3) the coordinates of the spatial data
label: array of shape (n_voxels) indexes defining the spatial model
X: array of shape(n_voxels, 1), signal attached to the voxels
"""
from scipy.ndimage import gaussian_filter
# Create the spatial model
if mask is None:
mask = np.ones(np.prod(shape))
xyz = np.indices(shape).reshape(len(shape), np.prod(shape)).T
else:
xyz = np.vstack(np.where(mask)).T
if model == 'kmeans':
spatial_model = KMeans(n_clusters=n_parcels).fit(xyz)
label = spatial_model.labels_
elif model == 'ward':
connectivity = grid_to_graph(*shape, mask=mask).tocsr()
label = Ward(n_clusters=n_parcels, connectivity=connectivity).fit(
np.random.randn(mask.sum(), 100)).labels_
from sklearn import neighbors
spatial_model = neighbors.KNeighborsClassifier(3)
spatial_model.fit(xyz, label)
else:
raise ValueError('%s Unknown simulation model' % model)
if jitter > 0:
labels = [spatial_model.predict(
xyz + jitter * np.random.rand(1, xyz.shape[1]))
for subj in range(n_subjects)]
X = np.zeros((xyz.shape[0], n_subjects))
# Generate the functional data
if mu == None:
mu = np.zeros(n_parcels)
if sigma1 == None:
sigma1 = np.ones(n_parcels)
if sigma2 == None:
sigma2 = np.ones(n_parcels)
beta_ = np.random.randn(n_subjects)
for k in range(n_parcels):
if jitter > 0:
mask = [label_ == k for label_ in labels]
else:
mask = [label == k for subj in range(n_subjects)]
x, subj = generate_data_jitter(mu[k], sigma1[k], sigma2[k], mask,
seed=seed, beta=beta_ * sigma2[k])
for n_subj in range(n_subjects):
X[mask[n_subj], n_subj] = x[subj == n_subj]
if smooth > 0: # smooth the data
for subj in range(n_subjects):
X[:, subj] = gaussian_filter(
np.reshape(X[:, subj], shape), sigma=smooth).ravel()
if verbose:
fig = plt.figure(figsize=(10, 1.5))
plt.subplot(1, n_subjects + 1, 1)
plt.imshow(np.reshape(label, shape), interpolation='nearest',
cmap=plt.cm.spectral)
plt.title('Template')
plt.axis('off')
for ns in range(n_subjects):
plt.subplot(1, n_subjects + 1, 2 + ns)
plt.imshow(np.reshape(X[:, ns], shape), interpolation='nearest')
plt.title('subject %d' % ns)
plt.axis('off')
plt.subplots_adjust(left=.01, bottom=.01, right=.99, top=.99,
wspace=.05, hspace=.01)
fig.set_figheight(1.5)
return xyz, label, X