Skip to content

Commit

Permalink
data processing notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
j1c committed Apr 13, 2024
1 parent 755c0e8 commit a1d4aab
Showing 1 changed file with 261 additions and 0 deletions.
261 changes: 261 additions & 0 deletions docs/paper/data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Preprocessing\n",
"\n",
"The outputs from the `m2g` pipeline is available in our open-access AWS S3 bucket: `s3://open-neurodata/m2`. You can use the file tree to browse the outputs [http://open-neurodata.s3-website-us-east-1.amazonaws.com/](http://open-neurodata.s3-website-us-east-1.amazonaws.com/)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/j1c/miniconda3/envs/m2g/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import boto3\n",
"from botocore import UNSIGNED\n",
"from botocore.client import Config\n",
"\n",
"from pathlib import Path\n",
"import numpy as np\n",
"\n",
"from graspologic.utils import import_edgelist, pass_to_ranks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"modalities = [\"Diffusion\", \"Functional\"]\n",
"diffusion_datasets = [\n",
" \"SWU4\",\n",
" \"HNU1\",\n",
" \"NKIENH\",\n",
" \"XHCUMS\",\n",
" \"BNU1\",\n",
" \"BNU3\",\n",
" \"NKI1\",\n",
" \"NKI24\",\n",
" \"IPCAS8\",\n",
" \"MRN_1\",\n",
"]\n",
"functional_datasets = [\n",
" \"NYU_2\",\n",
" \"SWU4\",\n",
" \"HNU1\",\n",
" \"XHCUMS\",\n",
" \"UPSM_1\",\n",
" \"BNU3\",\n",
" \"IPCAS7\",\n",
" \"SWU1\",\n",
" \"IPCAS1\",\n",
" \"BNU1\",\n",
"]\n",
"\n",
"datasets = {\"Diffusion\": diffusion_datasets, \"Functional\": functional_datasets}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fetch from S3 and Download to Local\n",
"\n",
"The files will be stored at `m2g/docs/paper/data/` directory."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading m2g/Diffusion/SWU4-8-27-20-m2g-native-csa-det/... Total files: 422\n",
"Downloading m2g/Diffusion/HNU1-8-27-20-m2g-native-csa-det/... Total files: 300\n",
"Downloading m2g/Diffusion/NKIENH-11-01-20-m2g-native-csa-det/... Total files: 129\n",
"Downloading m2g/Diffusion/XHCUMS-8-27-20-m2g-native-csa-det/... Total files: 117\n",
"Downloading m2g/Diffusion/BNU1-8-27-20-m2g-native-csa-det/... Total files: 114\n",
"Downloading m2g/Diffusion/BNU3-11-01-20-m2g-native-csa-det/... Total files: 47\n",
"Downloading m2g/Diffusion/NKI1-8-24-20-m2g-native-csa-det/... Total files: 40\n",
"Downloading m2g/Diffusion/NKI24-11-01-20-m2g-native-csa-det/... Total files: 38\n",
"Downloading m2g/Diffusion/IPCAS8-8-27-20-m2g-native-csa-det/... Total files: 26\n",
"Downloading m2g/Diffusion/MRN_1-8-27-20-m2g-native-csa-det/... Total files: 19\n",
"Downloading m2g/Functional/NYU_2-11-27-20-m2g-func/... Total files: 494\n",
"Downloading m2g/Functional/SWU4-11-12-20-m2g-func/... Total files: 425\n",
"Downloading m2g/Functional/HNU1-11-12-20-m2g-func/... Total files: 300\n",
"Downloading m2g/Functional/XHCUMS-11-27-20-m2g-func/... Total files: 247\n",
"Downloading m2g/Functional/UPSM_1-11-27-20-m2g-func/... Total files: 230\n",
"Downloading m2g/Functional/BNU3-11-12-20-m2g-func/... Total files: 144\n",
"Downloading m2g/Functional/IPCAS7-11-27-20-m2g-func/... Total files: 144\n",
"Downloading m2g/Functional/SWU1-11-27-20-m2g-func/... Total files: 119\n",
"Downloading m2g/Functional/IPCAS1-11-27-20-m2g-func/... Total files: 118\n",
"Downloading m2g/Functional/BNU1-11-12-20-m2g-func/... Total files: 106\n"
]
}
],
"source": [
"parcellation = \"DKT_space-MNI152NLin6_res-2x2x2\"\n",
"bucket = \"open-neurodata\"\n",
"\n",
"for modality in modalities:\n",
" if modality == \"Diffusion\":\n",
" parcellation = \"DKT_space-MNI152NLin6_res-2x2x2\"\n",
" else:\n",
" parcellation = \"DKT_space-MNI152NLin6_res-2x2x2.nii.gz\"\n",
"\n",
" prefix = f\"m2g/{modality}/\"\n",
"\n",
" s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))\n",
" resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, Delimiter=\"/\")\n",
"\n",
" dataset_fullnames = []\n",
" for dset in datasets[modality]:\n",
" for r in resp.get(\"CommonPrefixes\"):\n",
" if dset in r.get(\"Prefix\"):\n",
" dataset_fullnames.append(r.get(\"Prefix\"))\n",
"\n",
" for dset, dset_abbrev in zip(dataset_fullnames, datasets[modality]):\n",
" prefix = f\"{dset}Connectomes/{parcellation}/\"\n",
"\n",
" resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, Delimiter=\"/\")\n",
" contents = resp[\"Contents\"]\n",
"\n",
" files = []\n",
" for obj in contents:\n",
" key = obj[\"Key\"]\n",
" if modality == \"Functional\":\n",
" if key.endswith(\".csv\") and \"abs\" in key:\n",
" files.append(key)\n",
" else:\n",
" if key.endswith(\".csv\"):\n",
" files.append(key)\n",
"\n",
" print(f\"Downloading {dset}... Total files: {len(files)}\")\n",
"\n",
" # Save to data folder\n",
" p = Path(f\"./data/{modality}/{dset_abbrev}\")\n",
" p.mkdir(parents=True, exist_ok=True)\n",
"\n",
" # Download files\n",
" for f in files:\n",
" out = p / Path(f).name\n",
" if not out.exists():\n",
" s3.download_file(bucket, f, out)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute mean connectomes\n",
"\n",
"This data will be used for plotting in Figure 2."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computing mean graph for Diffusion SWU4... Total files: 422\n",
"Computing mean graph for Diffusion HNU1... Total files: 300\n",
"Computing mean graph for Diffusion NKIENH... Total files: 129\n",
"Computing mean graph for Diffusion XHCUMS... Total files: 117\n",
"Computing mean graph for Diffusion BNU1... Total files: 114\n",
"Computing mean graph for Diffusion BNU3... Total files: 47\n",
"Computing mean graph for Diffusion NKI1... Total files: 40\n",
"Computing mean graph for Diffusion NKI24... Total files: 38\n",
"Computing mean graph for Diffusion IPCAS8... Total files: 26\n",
"Computing mean graph for Diffusion MRN_1... Total files: 19\n",
"Computing mean graph for Functional NYU_2... Total files: 494\n",
"Computing mean graph for Functional SWU4... Total files: 425\n",
"Computing mean graph for Functional HNU1... Total files: 300\n",
"Computing mean graph for Functional XHCUMS... Total files: 247\n",
"Computing mean graph for Functional UPSM_1... Total files: 230\n",
"Computing mean graph for Functional BNU3... Total files: 144\n",
"Computing mean graph for Functional IPCAS7... Total files: 144\n",
"Computing mean graph for Functional SWU1... Total files: 119\n",
"Computing mean graph for Functional IPCAS1... Total files: 118\n",
"Computing mean graph for Functional BNU1... Total files: 106\n"
]
}
],
"source": [
"out_dir = Path(f\"./data/mean_connectomes/\")\n",
"out_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"for modality, dsets in datasets.items():\n",
" if modality == \"Functional\":\n",
" keyword = \"*abs*\"\n",
" else:\n",
" keyword = \"*\"\n",
"\n",
" for dset in dsets:\n",
" p = Path(f\"./data/{modality}/{dset}\")\n",
" files = list(p.glob(keyword))\n",
"\n",
" print(\n",
" f\"Computing mean graph for {modality} {dset}... Total files: {len(files)}\"\n",
" )\n",
"\n",
" graphs = import_edgelist(files, \"csv\")\n",
" graphs = [pass_to_ranks(g) for g in graphs]\n",
"\n",
" # Compute mean graph\n",
" mean_graph = np.array(graphs).mean(axis=0)\n",
"\n",
" # Save mean graph\n",
" np.save(out_dir / f\"{len(files):>03}_{modality}_{dset}\", mean_graph)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "m2g",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit a1d4aab

Please sign in to comment.