forked from JuliaML/MLDatasets.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add TemporalBrains dataset (JuliaML#222)
* init * Add create_dataset * Export TemporalBrains * add struct and constructor * Optimized version * Update docs * Add tests * Add spaces Co-authored-by: Carlo Lucibello <[email protected]> * Add link * Improve docstring * Add `TemporalBrains` to docs * Improve * Fix & for `julia 1.6` --------- Co-authored-by: Carlo Lucibello <[email protected]>
- Loading branch information
1 parent
60a2f05
commit c4188ab
Showing
4 changed files
with
98 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,4 +32,5 @@ Reddit | |
TUDataset | ||
METRLA | ||
PEMSBAY | ||
TemporalBrains | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
function __init__temporalbrains() | ||
DEPNAME = "TemporalBrains" | ||
LINK = "http://www-sop.inria.fr/members/Aurora.Rossi/index.html" | ||
register(ManualDataDep(DEPNAME, | ||
""" | ||
Dataset: $DEPNAME | ||
Website : $LINK | ||
""")) | ||
end | ||
|
||
|
||
function tb_datadir(dir = nothing) | ||
dir = isnothing(dir) ? datadep"TemporalBrains" : dir | ||
LINK = "http://www-sop.inria.fr/members/Aurora.Rossi/data/LabelledTBN.zip" | ||
if length(readdir((dir))) == 0 | ||
DataDeps.fetch_default(LINK, dir) | ||
currdir = pwd() | ||
cd(dir) # Needed since `unpack` extracts in working dir | ||
DataDeps.unpack(joinpath(dir, "LabelledTBN.zip")) | ||
# conditions when unzipped folder is our required data dir | ||
cd(currdir) | ||
end | ||
@assert isdir(dir) | ||
return dir | ||
end | ||
|
||
|
||
function create_tbdataset(dir, thre) | ||
name_filelabels = joinpath(dir, "LabelledTBN", "labels.txt") | ||
filelabels = open(name_filelabels, "r") | ||
temporalgraphs = Vector{MLDatasets.TemporalSnapshotsGraph}(undef, 1000) | ||
|
||
for (i,line) in enumerate(eachline(filelabels)) | ||
id, gender, age = split(line) | ||
name_network_file = joinpath(dir, "LabelledTBN", "networks", id * "_ws60_wo30_tuk0_pearson_schaefer_100.txt") | ||
|
||
data = readdlm(name_network_file,',',Float32; skipstart = 1) | ||
|
||
data_thre = view(data,view(data,:,4) .> thre,:) | ||
data_thre_int = Int.(view(data_thre,:,1:3)) | ||
|
||
activation = [zeros(Float32, 102) for _ in 1:27] | ||
for t in 1:27 | ||
for n in 1:102 | ||
rows = ((view(data_thre_int,:,1).==n) .& (view(data_thre_int,:,3).==t)) | ||
activation[t][n] = mean(view(data_thre,rows,4)) | ||
end | ||
end | ||
|
||
temporalgraphs[i] = TemporalSnapshotsGraph(num_nodes=ones(Int,27)*102, edge_index = (data_thre_int[:,1], data_thre_int[:,2], data_thre_int[:,3]), node_data= activation, graph_data= (g = gender, a = age)) | ||
end | ||
return temporalgraphs | ||
end | ||
|
||
""" | ||
TemporalBrains(; dir = nothing, threshold_value = 0.6) | ||
The TemporalBrains dataset contains a collection of temporal brain networks (as `TemporalSnapshotsGraph`s) of 1000 subjects obtained from resting-state fMRI data from the [Human Connectome Project (HCP)](https://www.humanconnectome.org/study/hcp-young-adult/document/extensively-processed-fmri-data-documentation). | ||
The number of nodes is fixed for each of the 27 snapshots at 102, while the edges change over time. | ||
For each `Graph` snapshot, the feature of a node represents the average activation of the node during that snapshot and it is contained in `Graphs.node_data`. | ||
Each `TemporalSnapshotsGraph` has a label representing their gender ("M" for male and "F" for female) and age range (22-25, 26-30, 31-35 and 36+) contained as a named tuple in `graph_data`. | ||
The `threshold_value` is used to binarize the edge weights and is set to 0.6 by default. | ||
""" | ||
struct TemporalBrains <: AbstractDataset | ||
graphs::Vector{MLDatasets.TemporalSnapshotsGraph} | ||
end | ||
|
||
function TemporalBrains(;threshold_value = 0.6, dir=nothing) | ||
create_default_dir("TemporalBrains") | ||
dir = tb_datadir(dir) | ||
graphs = create_tbdataset(dir, threshold_value) | ||
return TemporalBrains(graphs) | ||
end | ||
|
||
Base.length(d::TemporalBrains) = length(d.graphs) | ||
Base.getindex(d::TemporalBrains, ::Colon) = d.graphs[1] | ||
Base.getindex(d::TemporalBrains, i) = getindex(d.graphs, i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters