Skip to content

Commit

Permalink
remove hard coded path to populations.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
DSilva27 committed Jul 11, 2024
1 parent 7ee6824 commit 5b00f48
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 30 deletions.
16 changes: 10 additions & 6 deletions src/cryo_challenge/_preprocessing/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ class SubmissionPreprocessingDataLoader(Dataset):

def __init__(self, submission_config):
self.submission_config = submission_config
self.submission_paths, self.gt_path = self.extract_submission_paths()
self.submission_paths, self.population_files, self.gt_path = (
self.extract_submission_paths()
)
self.subs_index = [int(idx) for idx in list(self.submission_config.keys())[1:]]
path_to_gt_ref = os.path.join(
self.gt_path, self.submission_config["gt"]["ref_align_fname"]
Expand Down Expand Up @@ -65,6 +67,8 @@ def validate_submission_config(self):
raise ValueError(f"Pixel size not found for submission {key}")
if "align" not in value.keys():
raise ValueError(f"Align not found for submission {key}")
if "populations_file" not in value.keys():
raise ValueError(f"Population file not found for submission {key}")

if not os.path.exists(value["path"]):
raise ValueError(f"Path {value['path']} does not exist")
Expand Down Expand Up @@ -135,13 +139,16 @@ def help(cls):

def extract_submission_paths(self):
submission_paths = []
population_files = []
for key, value in self.submission_config.items():
if key == "gt":
gt_path = value["path"]

else:
submission_paths.append(value["path"])
return submission_paths, gt_path
population_files.append(value["populations_file"])

return submission_paths, population_files, gt_path

def __len__(self):
return len(self.submission_paths)
Expand All @@ -154,10 +161,7 @@ def __getitem__(self, idx):

assert len(vol_paths) > 0, "No volumes found in submission directory"

populations = np.loadtxt(
os.path.join(self.submission_paths[idx], "populations.txt")
)
populations = torch.from_numpy(populations)
populations = torch.from_numpy(np.loadtxt(self.population_files[idx]))

vol0 = mrcfile.open(vol_paths[0], mode="r")
volumes = torch.zeros(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"align": 1,
"box_size": 244,
"pixel_size": 2.146,
"path": "tests/data/unprocessed_dataset_2_submissions/submission_x"
"path": "tests/data/unprocessed_dataset_2_submissions/submission_x",
"populations_file": "tests/data/unprocessed_dataset_2_submissions/submission_x/populations.txt"
}
}
}
79 changes: 57 additions & 22 deletions tutorials/1_tutorial_preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:12.854854Z",
Expand All @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:20.557563Z",
Expand All @@ -30,7 +30,6 @@
"import os\n",
"import torch\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import yaml\n",
"from ipyfilechooser import FileChooser"
]
Expand Down Expand Up @@ -80,6 +79,17 @@
"display(submission1_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select path to populations (submission 1)\n",
"submission1_pop_path = FileChooser(path_to_sub_set.selected_path)\n",
"display(submission1_pop_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -97,6 +107,26 @@
"display(submission2_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select path to populations (submission 2)\n",
"submission2_pop_path = FileChooser(path_to_sub_set.selected_path)\n",
"display(submission2_pop_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"submission2_pop_path.selected"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -116,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:59.387306Z",
Expand All @@ -139,20 +169,22 @@
" \"box_size\": 144,\n",
" \"pixel_size\": 1.073 * 2,\n",
" \"path\": submission1_path.selected_path,\n",
" \"populations_file\": submission1_pop_path.selected,\n",
" },\n",
" 1: {\n",
" \"name\": \"submission2\",\n",
" \"align\": 1,\n",
" \"box_size\": 288,\n",
" \"pixel_size\": 1.073,\n",
" \"path\": submission2_path.selected_path,\n",
" \"populations_file\": submission2_pop_path.selected,\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:41:01.194466Z",
Expand All @@ -174,17 +206,22 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"After you create your submission_config, simply grab a copy of the file \"config_preproc.yaml\" from the provided config_files, and change the path for the \"submission_config_file\" to the file we created in the previous cell. Also change the path for the output. The rest of the parameters you can leave untouched. Please see the publication \"Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5\" for more details. Then simply run\n",
"Lastly, to run the preprocessing pipeline follow these steps\n",
"\n",
"```bash\n",
"cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml\n",
"```\n",
"0. Make sure to activate your environment and have the package installed!\n",
"\n",
"Note: make sure to activate your environment and have the package installed!\n",
"1. Grab a copy of the file `config_preproc.yaml`from our config file templates.\n",
"\n",
"You can run the following cell to visualize your volumes (more precisely, a projection of them)\n",
"2. In the copied config file, update the value of `submission_config_file` to match the path to the file we created in the last cell.\n",
"\n",
"IMPORTANT: The execution of the previous program relies on the existence of file to be saved at {{ submission1_path.selected_path }} with a specific formatting. The file must be named \"populations.txt\", and should be formatted as a single row/column CSV file containing the populations computed from your results. If the previous file is not included, the execution of the program will result in a runtime error."
"3. Optionally, change the other parameters. \n",
" * Most of the parameters (BOT_* and thresh_percentile) are for the alignment. For details on how they work, please see the publication \"Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5\" for more details. \n",
"\n",
" * The other parameters are self explanatory, \"seed_flavor_assignment\" changes which submission gets assigned which ice cream flavor, keep this if you want to revert anonymity.\n",
"\n",
"4. Run the command: `cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml`\n",
"\n",
"You can run the following cell to visualize your volumes (more precisely, a projection of them)\n"
]
},
{
Expand All @@ -201,13 +238,13 @@
"# Select path to Config file\n",
"# An example of this file is available in the path ../config_files/config_preproc.yaml\n",
"config_preproc_path = FileChooser(os.path.expanduser(\"~\"))\n",
"config_preproc_path.filter_pattern = '*.yaml'\n",
"config_preproc_path.filter_pattern = \"*.yaml\"\n",
"display(config_preproc_path)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:43:16.259106Z",
Expand All @@ -224,7 +261,7 @@
"if os.path.isabs(output_path):\n",
" full_output_path = output_path\n",
"else:\n",
" full_output_path = os.path.join(os.getcwd(), '..', output_path)"
" full_output_path = os.path.join(os.getcwd(), \"..\", output_path)"
]
},
{
Expand All @@ -240,12 +277,10 @@
"source": [
"n_submissions = 2 # change this to however many submissions you preprocessed\n",
"\n",
"fig, ax = plt.subplots(2, 6, figsize=(20, 8)) # change values here too\n",
"fig, ax = plt.subplots(1, 2, figsize=(10, 4)) # change values here too\n",
"\n",
"for i in range(n_submissions):\n",
" idx = np.random.randint(\n",
" 0, 20\n",
" ) # doing random volumes to check that everything went fine\n",
" idx = 0\n",
"\n",
" submission = torch.load(os.path.join(full_output_path, f\"submission_{i}.pt\"))\n",
" print(submission[\"volumes\"].shape, submission[\"id\"])\n",
Expand All @@ -256,9 +291,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "cryo-challenge-kernel",
"language": "python",
"name": "python3"
"name": "cryo-challenge-kernel"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -270,7 +305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.17"
"version": "3.10.10"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 5b00f48

Please sign in to comment.