Skip to content

Commit

Permalink
Merge pull request #22 from flatironinstitute/file-browser-jupyter
Browse files Browse the repository at this point in the history
Adding interactive path selections to Jupyter notebooks
  • Loading branch information
geoffwoollard authored Jun 25, 2024
2 parents bd57d28 + 779c086 commit da82e94
Show file tree
Hide file tree
Showing 6 changed files with 365 additions and 125 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,9 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Tutorials folder
tutorials/*

# Config file templates
config_files/*
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ dependencies = [
"aspire",
"jupyter",
"osfclient",
"seaborn"
"seaborn",
"ipyfilechooser",
]

[project.optional-dependencies]
Expand Down
187 changes: 156 additions & 31 deletions tutorials/1_tutorial_preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:12.854854Z",
"start_time": "2024-06-17T15:40:12.829426Z"
}
},
"outputs": [],
"source": [
"%load_ext autoreload\n",
Expand All @@ -13,14 +18,21 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:20.557563Z",
"start_time": "2024-06-17T15:40:18.847242Z"
}
},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import torch\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
"import numpy as np\n",
"import yaml\n",
"from ipyfilechooser import FileChooser"
]
},
{
Expand All @@ -29,26 +41,94 @@
"source": [
"# Creating your submission_config file\n",
"\n",
"This file will tell the preprocessing pipeline how each submission should be processed. I will show two examples of possible submissions and explain how different parameters affect the pipeline."
"This file will tell the preprocessing pipeline how each submission should be processed. I will show two examples of possible submissions and explain how different parameters affect the pipeline.\n",
"\n",
"Note that the submission directories need maps numbered 01.mrc to 80.mrc and a populations.txt file that has the corresponding populations weights (no header) each separated by a newline (so 80 rows in total), such that the total sums to 1."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:28.587603Z",
"start_time": "2024-06-17T15:40:28.496607Z"
}
},
"outputs": [],
"source": [
"# Select path to submissions\n",
"path_to_sub_set = FileChooser(os.path.expanduser(\"~\"))\n",
"path_to_sub_set.show_only_dirs = True\n",
"display(path_to_sub_set)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:21.422797Z",
"start_time": "2024-06-13T07:40:21.365912Z"
}
},
"outputs": [],
"source": [
"# Select path to submissions (submission 1)\n",
"submission1_path = FileChooser(path_to_sub_set.selected_path)\n",
"submission1_path.show_only_dirs = True\n",
"display(submission1_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:31.706352Z",
"start_time": "2024-06-13T07:40:31.645910Z"
}
},
"outputs": [],
"source": [
"# Select path to submissions (submission 2)\n",
"submission2_path = FileChooser(path_to_sub_set.selected_path)\n",
"submission2_path.show_only_dirs = True\n",
"display(submission2_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:40.403450Z",
"start_time": "2024-06-13T07:40:40.335296Z"
}
},
"outputs": [],
"source": [
"# Select path to Ground Truth\n",
"path_gt = FileChooser(os.path.expanduser(\"~\"))\n",
"path_gt.show_only_dirs = True\n",
"display(path_gt)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:59.387306Z",
"start_time": "2024-06-13T07:40:59.348942Z"
}
},
"outputs": [],
"source": [
"path_to_sub_set = \"/path/to/submissions/\"\n",
"\n",
"submission1_path = os.path.join(path_to_sub_set, \"path/to/submission1\")\n",
"submission2_path = os.path.join(path_to_sub_set, \"path/to/submission2\")\n",
"\n",
"path_gt = \"/path/to/ground_truth/\"\n",
"\n",
"submission_config = {\n",
" \"gt\": {\n",
" \"name\": \"gt\",\n",
" \"path\": path_gt,\n",
" \"path\": path_gt.selected_path,\n",
" \"box_size\": 224,\n",
" \"pixel_size\": 1.073 * 2,\n",
" \"ref_align_fname\": \"1.mrc\",\n",
Expand All @@ -58,22 +138,27 @@
" \"align\": 0,\n",
" \"box_size\": 144,\n",
" \"pixel_size\": 1.073 * 2,\n",
" \"path\": submission1_path,\n",
" \"path\": submission1_path.selected_path,\n",
" },\n",
" 1: {\n",
" \"name\": \"submission2\",\n",
" \"align\": 1,\n",
" \"box_size\": 288,\n",
" \"pixel_size\": 1.073,\n",
" \"path\": submission2_path,\n",
" \"path\": submission2_path.selected_path,\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:41:01.194466Z",
"start_time": "2024-06-13T07:41:01.153354Z"
}
},
"outputs": [],
"source": [
"# create submission config\n",
Expand All @@ -97,13 +182,60 @@
"\n",
"Note: make sure to activate your environment and have the package installed!\n",
"\n",
"You can run the following cell to visualize your volumes (more precisely, a projection of them)"
"You can run the following cell to visualize your volumes (more precisely, a projection of them)\n",
"\n",
"IMPORTANT: The execution of the previous program relies on the existence of file to be saved at {{ submission1_path.selected_path }} with a specific formatting. The file must be named \"populations.txt\", and should be formatted as a single row/column CSV file containing the populations computed from your results. If the previous file is not included, the execution of the program will result in a runtime error."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:43:02.295840Z",
"start_time": "2024-06-13T07:43:02.231814Z"
}
},
"outputs": [],
"source": [
"# Select path to Config file\n",
"# An example of this file is available in the path ../config_files/config_preproc.yaml\n",
"config_preproc_path = FileChooser(os.path.expanduser(\"~\"))\n",
"config_preproc_path.filter_pattern = '*.yaml'\n",
"display(config_preproc_path)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:43:16.259106Z",
"start_time": "2024-06-13T07:43:16.215287Z"
}
},
"outputs": [],
"source": [
"# Get output path from config file\n",
"with open(config_preproc_path.value, \"r\") as f:\n",
" config_preproc = yaml.safe_load(f)\n",
"output_path = config_preproc[\"output_path\"]\n",
"\n",
"if os.path.isabs(output_path):\n",
" full_output_path = output_path\n",
"else:\n",
" full_output_path = os.path.join(os.getcwd(), '..', output_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:43:22.054839Z",
"start_time": "2024-06-13T07:43:18.169467Z"
}
},
"outputs": [],
"source": [
"n_submissions = 2 # change this to however many submissions you preprocessed\n",
Expand All @@ -115,25 +247,18 @@
" 0, 20\n",
" ) # doing random volumes to check that everything went fine\n",
"\n",
" submission = torch.load(f\"/path/to/output/submission_{i}.pt\")\n",
" submission = torch.load(os.path.join(full_output_path, f\"submission_{i}.pt\"))\n",
" print(submission[\"volumes\"].shape, submission[\"id\"])\n",
" ax.flatten()[i].imshow(submission[\"volumes\"][idx].sum(axis=0))\n",
" ax.flatten()[i].set_title(submission[\"id\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "cryo-challenge-kernel",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "cryo-challenge-kernel"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -145,7 +270,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.8.17"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit da82e94

Please sign in to comment.