Skip to content

Commit

Permalink
adding python source code for parcellation
Browse files Browse the repository at this point in the history
  • Loading branch information
ccraddock committed May 14, 2013
1 parent 2bfde74 commit 1f95ad9
Show file tree
Hide file tree
Showing 9 changed files with 1,237 additions and 0 deletions.
115 changes: 115 additions & 0 deletions binfile_parcellation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#### binfile_parcellation.py
# Copyright (C) 2010 R. Cameron Craddock ([email protected])
#
# This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
# data. It performs subject level normalized clustering of connectivity matrices.
#
# For more information refer to:
#
# Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole
# brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping,
# doi: 10.1002/hbm.21333.
#
# Documentation, updated source code and other information can be found at the NITRC web page:
# http://www.nitrc.org/projects/cluster_roi/
#
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
####

# this scripts requires NumPy (numpy.scipy.org), SciPy (www.scipy.org), and python_ncut_lib
# distributed with this script to be installed in a directory that is
# accessible through PythonPath (the current directory will do for python_ncult_lib.py).
import time as time
from numpy import *
from scipy.sparse import csc_matrix
from scipy.sparse.linalg.eigen.arpack import eigen_symmetric
from scipy.linalg import norm, svd
from python_ncut_lib import *

# binfile_parcellate( infile, outfile, K ):
#
# This function performs normalized cut clustering on the connectivity matrix specified by infile
# into sets of K clusters.
# infile: .NPY or .bin file containing a representation of the connectivity matrix to be
# clustered. This file contains a single vector of length 3*N, in which the first
# N values correspond to the i indices, the second N values correspond to the j
# indices and the last N values correspond to the weights w_ij of the similarity
# matrix W. For more information on constructing the input files refer to
# make_local_connectivity_tcorr.py, make_local_connectivity_scorr.py or
# make_local_connectivity_ones.py.
# outfile: a prefix for the output file, this name will be suffixed by _K.npy where K
# corresponds to the clustering level
# K: list of numbers of clusters that will be generated. If this is a single number
# then only that clustering will be generated. If this is a list of numbers, then
# the normalized cut algorithm will be run several times, once for each k in the
# list, and a seperate output file will be generated for each clustering
#
def binfile_parcellate( infile, outfile, K ):

# check how long it takes
start=time.time()
print 'started at ',start

# read in the file, I used to use .bin files, but now I use .npy as they
# contain informaiton about datatype, still support both
if( infile.endswith(".npy") ):
print "Reading",infile,"as a npy filetype"
a=load(infile)
else:
print "Reading",infile,"as a binary file of doubles"
fileobj=open(infile, 'rb')
a=fromfile(fileobj)
fileobj.close()

# calculate the number of non-zero weights in the connectivity matrix
n=len(a)/3

# reshape the 1D vector read in from infile in to a 3xN array
a=reshape(a,(3,n))
m=max(max(a[0,:]),max(a[1,:]))+1

# make the sparse matrix, CSC format is supposedly efficient for matrix arithmetic
W=csc_matrix((a[2,:],(a[0,:],a[1,:])), shape=(m,m))

print 'finished reading in data and calculating connectivity after ',time.time()-start,'\n'

# we only have to calculate the eigendecomposition of the LaPlacian once, for the
# largest number of clusters provided. This provides a significant speedup, without
# any difference to the results.
Kmax=max(K)
eigenval,eigenvec = ncut(W,Kmax)

print 'finished calculating eigenvectors ',time.time()-start,'\n'

# calculate each desired clustering result
for k in K:
eigk=eigenvec[:,:k]
eigenvec_discrete = discretisation(eigk)
print 'finished discretisation ',k,' at ',time.time()-start,'\n'

# transform the discretised eigenvectors into a single vector
# where the value corresponds to the cluster # of the corresponding
# ROI
group_img=eigenvec_discrete[:,0]

for i in range(1,k):
group_img=group_img+(i+1)*eigenvec_discrete[:,i]

# apply the suffix to the output filename and write out results
# as a .npy file
outname=outfile+'_'+str(k)+'.npy'
save(outname,group_img.todense())

print 'finished after ',time.time()-start,'\n'
110 changes: 110 additions & 0 deletions group_binfile_parcellation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#### group_binfile_parcellation.py
# Copyright (C) 2010 R. Cameron Craddock ([email protected])
#
# This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
# data. It performs group level normalized clustering of connectivity matrices. This is one of two methods
# proposed in Craddock (2011) for group level clustering. Results of subject level clusterings are combined
# and then clustered. This is referred to as 2-level clustering in the paper.
#
# For more information refer to:
#
# Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole
# brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping,
# doi: 10.1002/hbm.21333.
#
# Documentation, updated source code and other information can be found at the NITRC web page:
# http://www.nitrc.org/projects/cluster_roi/
#
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
####

# this scripts requires NumPy (numpy.scipy.org), SciPy (www.scipy.org), and python_ncut_lib
# distributed with this script to be installed in a directory that is
# accessible through PythonPath (the current directory will do for python_ncult_lib.py).
from numpy import array
from scipy import *
from scipy.sparse import *
from python_ncut_lib import *

# group_img=group_binfile_parcellate( infiles, outfile, K,n_voxels ):
#
# This function performs group level clustering of individual level clustering results. Each single
# subject clustering is converted into a coincidence matrix W, where w_ij = 1 if voxels i and j are
# in the same cluster, and zero otherwise. Coincidence matrices are averaged across subjects and
# then submitted to normalized cut clustering to obtain K clusters.
# infiles: list of .NPY or .bin file containing single subject clustering results. Each of these
# files is a 1D vector where the ith value corresponds to the cluster assignment of
# voxel i. This assumes that the vectors are in the same order. These files are
# generated by binfile_parcellation.py
# outfile: the name of the output file, a n_voxels x 1 vector is written to this file, where
# the ith value corresponds to the cluster to which voxel i is assigned
# K: The number of clusters that will be generated. This is a single number. This assumes
# that each of the input files were clustered to K
# n_voxels: Number of voxels in the _mask_ used to generate the subject specific connectivity
# matrices
# group_img: (output) n_voxels x 1 vector indicating cluster assignments
def group_binfile_parcellate( infiles, outfile, K, n_voxels ):

# read in all of the files, construct coincidence matrices, and average them
for i in range(0,len(infiles)):

# read in the files
if infiles[i].endswith(".npy"):
print "Reading",infiles[i],"as a npy filetype"
imdat=load(infiles[i])
else:
print "Reading",infiles[i],"as a binary file of doubles"
fid=open(infiles[i], 'rb')
imdat=fromfile(fid)
fid.close()

# construct the coincidences between the voxels
sparse_i=[]
sparse_j=[]
for j in range(1,K+1):
grp_ndx=nonzero(imdat==j)[0]
ff=tile(grp_ndx,(len(grp_ndx),1))
sparse_i=append(sparse_i, reshape(ff,prod(shape(ff))))
sparse_j=append(sparse_j, reshape(ff.transpose(),prod(shape(ff))))

# sum the coincidence matrices across input files
if i==0:
W=csc_matrix((ones(len(sparse_i)),(sparse_i,sparse_j)),(n_voxels,n_voxels),dtype=double)
else:
print 'adding ',i
W=W+csc_matrix((ones(len(sparse_i)),(sparse_i,sparse_j)),(n_voxels,n_voxels),dtype=double)

# set the diagonal to zeros as is customary with affinity matrices
W=W-spdiags(W.diagonal(),[0],n_voxels,n_voxels,"csc")
print "diag is ",sum(W.diagonal()),"\n"

# divide by the number of input files, to calculate the average
W=W/len(infiles)
print "finished reading in data and calculating connectivity\n"

# perform clustering
eigenval,eigenvec = ncut(W,K)
eigenvec_discrete = discretisation(eigenvec)

## write out the results
group_img=eigenvec_discrete[:,0]

for i in range(1,K):
group_img=group_img+(i+1)*eigenvec_discrete[:,i]

save(outfile,group_img.todense())
print "finished group parcellation\n"

return array(group_img.todense())
127 changes: 127 additions & 0 deletions group_mean_binfile_parcellation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#### group_binfile_parcellation.py
# Copyright (C) 2010 R. Cameron Craddock ([email protected])
#
# This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
# data. It performs group level normalized clustering of connectivity matrices. This is one of two methods
# proposed in Craddock (2011) for group level clustering. Individual connectivity matrices are averaged
# and then clustered. This is referred to as group-mean clustering in the paper.
#
# For more information refer to:
#
# Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole
# brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping,
# doi: 10.1002/hbm.21333.
#
# Documentation, updated source code and other information can be found at the NITRC web page:
# http://www.nitrc.org/projects/cluster_roi/
#
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
####

# this scripts requires NumPy (numpy.scipy.org), SciPy (www.scipy.org), and python_ncut_lib
# distributed with this script to be installed in a directory that is
# accessible through PythonPath (the current directory will do for python_ncult_lib.py).
import time as time
from numpy import *
from scipy.sparse import csc_matrix
from scipy.sparse.linalg.eigen.arpack import eigen_symmetric
from scipy.linalg import norm, svd
from python_ncut_lib import *


# group_mean_binfile_parcellate( infiles, outfile, K ):
#
# This function performs group level clustering of individual level clustering results. Each single
# subject clustering is converted into a coincidence matrix W, where w_ij = 1 if voxels i and j are
# in the same cluster, and zero otherwise. Coincidence matrices are averaged across subjects and
# then submitted to normalized cut clustering to obtain K clusters.
# infiles: list of .NPY or .bin file containing single subject clustering results. Each of these
# contains a 3*N x 1 vector where the first N values coorespond to the i indices,
# the next N correspond to the j indices and the last N values correspond to the
# weights w_ij. For more information on constructing these files refer to
# make_local_connectivity_tcorr.py, make_local_connectivity_scorr.py, or
# make_local_connectivity_ones.py.
# outfile: a prefix for the output file, this name will be suffixed by _K.npy where K
# corresponds to the clustering level
# K: list of numbers of clusters that will be generated. If this is a single number
# then only that clustering will be generated. If this is a list of numbers, then
# the normalized cut algorithm will be run several times, once for each k in the
# list, and a seperate output file will be generated for each clustering
# n_voxels: Number of voxels in the _mask_ used to generate the subject specific connectivity
# matrices
def group_mean_binfile_parcellate( infiles, outfile, K, n_voxels ):
# index
start=time.time()

print 'started at ',start

# read in the files, convert them to similarity matrices, and then average them
for i in range(0,len(infiles)):

# read in the file
if infiles[i].endswith(".npy"):
print "Reading",infiles[i],"as a npy filetype"
a=load(infiles[i])
else:
print "Reading",infiles[i],"as a binary file of doubles"
fileobj=open(infiles[i], 'rb')
a=fromfile(fileobj)
fileobj.close()
n=len(a)/3
a=reshape(a,(3,n))
# determine all of the voxel indices represented
vx_ndx=unique(a[-2,:])
# make the sparse matrix, CSC format is supposedly efficient for matrix arithmetic
if i==0:
W=csc_matrix((a[2,:],(a[0,:],a[1,:])), shape=(n_voxels,n_voxels))
else:
print 'adding ',i
W=W+csc_matrix((a[2,:],(a[0,:],a[1,:])), shape=(n_voxels,n_voxels))

# complete the average
W=W/len(infiles)

vx_ndx=unique(W.nonzero()[0])

print 'finished reading in data and calculating connectivity after ',time.time()-start,'\n'

# we only have to calculate the eigendecomposition of the LaPlacian once, for the
# largest number of clusters provided. This provides a significant speedup, without
# any difference to the results.
Kmax=max(K)
eigenval,eigenvec = ncut(W,Kmax)

print 'finished calculating eigenvectors ',time.time()-start,'\n'

# calculate each desired clustering result
for k in K:
eigk=eigenvec[:,:k]
eigenvec_discrete = discretisation(eigk)
print 'finished discretisation ',k,' at ',time.time()-start,'\n'

# transform the discretised eigenvectors into a single vector
# where the value corresponds to the cluster # of the corresponding
# ROI
group_img=eigenvec_discrete[:,0]
for i in range(1,k):
group_img=group_img+(i+1)*eigenvec_discrete[:,i]

# write out the results
outname=outfile+'_'+str(k)+'.npy'
save(outname,group_img.todense())

print 'finished ',k,' after ',time.time()-start,'\n'

print 'finished after ',time.time()-start,'\n'
Loading

0 comments on commit 1f95ad9

Please sign in to comment.