Skip to content

Commit

Permalink
merge from main to include testing
Browse files Browse the repository at this point in the history
  • Loading branch information
DSilva27 committed Jul 11, 2024
2 parents 6719dda + 5b07377 commit 22504f5
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 40 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
tests/data
tests/data/dataset_2_submissions
tests/data/Ground_truth
tests/results
tests/data/unprocessed_dataset_2_submissions/submission_x/*.mrc
tests/data/unprocessed_dataset_2_submissions/submission_x/populations.txt

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 1 addition & 1 deletion config_files/config_svd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ submission_list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
experiment_mode: "all_vs_ref" # options are "all_vs_all", "all_vs_ref"

power_spectrum_normalization:
ref_vol_key: "Vanilla" # which submission should be used
ref_vol_key: "FLAVOR" # which submission should be used
ref_vol_index: 0 # which volume of that submission should be used

# optional unless experiment_mode is "all_vs_ref"
Expand Down
33 changes: 20 additions & 13 deletions src/cryo_challenge/_preprocessing/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ class SubmissionPreprocessingDataLoader(Dataset):

def __init__(self, submission_config):
self.submission_config = submission_config
self.submission_paths, self.gt_path = self.extract_submission_paths()
self.validate_submission_config()

self.submission_paths, self.population_files, self.gt_path = (
self.extract_submission_paths()
)
self.subs_index = [int(idx) for idx in list(self.submission_config.keys())[1:]]
path_to_gt_ref = os.path.join(
self.gt_path, self.submission_config["gt"]["ref_align_fname"]
Expand Down Expand Up @@ -53,12 +57,16 @@ def validate_submission_config(self):
raise ValueError("Box size not found for ground truth")
if "pixel_size" not in value.keys():
raise ValueError("Pixel size not found for ground truth")
if "ref_align_fname" not in value.keys():
raise ValueError(
"Reference align file name not found for ground truth"
)
continue
else:
if "path" not in value.keys():
raise ValueError(f"Path not found for submission {key}")
if "id" not in value.keys():
raise ValueError(f"ID not found for submission {key}")
if "name" not in value.keys():
raise ValueError(f"Name not found for submission {key}")
if "box_size" not in value.keys():
raise ValueError(f"Box size not found for submission {key}")
if "pixel_size" not in value.keys():
Expand All @@ -74,11 +82,10 @@ def validate_submission_config(self):
if not os.path.isdir(value["path"]):
raise ValueError(f"Path {value['path']} is not a directory")

ids = list(self.submission_config.keys())[1:]
if ids != list(range(len(ids))):
raise ValueError(
"Submission IDs should be integers starting from 0 and increasing by 1"
)
if not os.path.exists(value["populations_file"]):
raise ValueError(
f"Population file {value['populations_file']} does not exist"
)

return

Expand Down Expand Up @@ -137,13 +144,16 @@ def help(cls):

def extract_submission_paths(self):
submission_paths = []
population_files = []
for key, value in self.submission_config.items():
if key == "gt":
gt_path = value["path"]

else:
submission_paths.append(value["path"])
return submission_paths, gt_path
population_files.append(value["populations_file"])

return submission_paths, population_files, gt_path

def __len__(self):
return len(self.submission_paths)
Expand All @@ -156,10 +166,7 @@ def __getitem__(self, idx):

assert len(vol_paths) > 0, "No volumes found in submission directory"

populations = np.loadtxt(self.submission_config["populations_file"]).astype(
float
)
populations = torch.from_numpy(populations)
populations = torch.from_numpy(np.loadtxt(self.population_files[idx]))

vol0 = mrcfile.open(vol_paths[0], mode="r")
volumes = torch.zeros(
Expand Down
3 changes: 2 additions & 1 deletion src/cryo_challenge/_preprocessing/preprocessing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def preprocess_submissions(submission_dataset, config):
print(f" submission saved as submission_{idx}.pt")
print(f"Preprocessing submission {idx} complete")

with open("hash_table.json", "w") as f:
hash_table_path = os.path.join(config["output_path"], "hash_table.json")
with open(hash_table_path, "w") as f:
json.dump(hash_table, f, indent=4)

return
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ cvxpy_solver: ECOS
optimal_q_kl:
n_iter: 100000
break_atol: 0.0001
output_fname: results/test_distribution_to_distribution_submission_0.pkl
output_fname: tests/results/test_distribution_to_distribution_submission_0.pkl
2 changes: 1 addition & 1 deletion tests/config_files/test_config_map_to_map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ data:
metadata: tests/data/Ground_truth/test_metadata_10.csv
mask:
do: true
volume: data/Ground_truth/mask_dilated_wide_224x224.mrc
volume: tests/data/Ground_truth/mask_dilated_wide_224x224.mrc
analysis:
metrics:
- l2
Expand Down
2 changes: 1 addition & 1 deletion tests/config_files/test_config_svd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ experiment_mode: "all_vs_ref" # options are "all_vs_all", "all_vs_ref"
# optional unless experiment_mode is "all_vs_ref"

power_spectrum_normalization:
ref_vol_key: "Mango" # which submission should be used
ref_vol_key: "Coffee" # which submission should be used
ref_vol_index: 0 # which volume of that submission should be used

path_to_reference: tests/data/Ground_truth/test_maps_gt_flat_10.pt
Expand Down
2 changes: 1 addition & 1 deletion tests/scripts/fetch_test_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ln -s $ADIR/tests/data/dataset_2_submissions/test_submission_0_n8.pt $ADIR/tests
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/test_maps_gt_flat_10.pt?download=true -O tests/data/Ground_truth/test_maps_gt_flat_10.pt
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/test_metadata_10.csv?download=true -O tests/data/Ground_truth/test_metadata_10.csv
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/1.mrc?download=true -O tests/data/Ground_truth/1.mrc
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/mask_dilated_wide_224x224.mrc?download=true -O data/Ground_truth/mask_dilated_wide_224x224.mrc
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/mask_dilated_wide_224x224.mrc?download=true -O tests/data/Ground_truth/mask_dilated_wide_224x224.mrc
for FILE in 1.mrc 2.mrc 3.mrc 4.mrc populations.txt
do
wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/unprocessed_dataset_2_submissions/submission_x/${FILE}?download=true -O tests/data/unprocessed_dataset_2_submissions/submission_x/${FILE}
Expand Down
75 changes: 55 additions & 20 deletions tutorials/1_tutorial_preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:12.854854Z",
Expand All @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-17T15:40:20.557563Z",
Expand All @@ -30,7 +30,6 @@
"import os\n",
"import torch\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import yaml\n",
"from ipyfilechooser import FileChooser"
]
Expand Down Expand Up @@ -80,6 +79,17 @@
"display(submission1_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select path to populations (submission 1)\n",
"submission1_pop_path = FileChooser(path_to_sub_set.selected_path)\n",
"display(submission1_pop_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -97,6 +107,26 @@
"display(submission2_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select path to populations (submission 2)\n",
"submission2_pop_path = FileChooser(path_to_sub_set.selected_path)\n",
"display(submission2_pop_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"submission2_pop_path.selected"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -116,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:40:59.387306Z",
Expand All @@ -139,20 +169,22 @@
" \"box_size\": 144,\n",
" \"pixel_size\": 1.073 * 2,\n",
" \"path\": submission1_path.selected_path,\n",
" \"populations_file\": submission1_pop_path.selected,\n",
" },\n",
" 1: {\n",
" \"name\": \"submission2\",\n",
" \"align\": 1,\n",
" \"box_size\": 288,\n",
" \"pixel_size\": 1.073,\n",
" \"path\": submission2_path.selected_path,\n",
" \"populations_file\": submission2_pop_path.selected,\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:41:01.194466Z",
Expand All @@ -174,17 +206,22 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"After you create your submission_config, simply grab a copy of the file \"config_preproc.yaml\" from the provided config_files, and change the path for the \"submission_config_file\" to the file we created in the previous cell. Also change the path for the output. The rest of the parameters you can leave untouched. Please see the publication \"Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5\" for more details. Then simply run\n",
"Lastly, to run the preprocessing pipeline follow these steps\n",
"\n",
"```bash\n",
"cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml\n",
"```\n",
"0. Make sure to activate your environment and have the package installed!\n",
"\n",
"Note: make sure to activate your environment and have the package installed!\n",
"1. Grab a copy of the file `config_preproc.yaml`from our config file templates.\n",
"\n",
"You can run the following cell to visualize your volumes (more precisely, a projection of them)\n",
"2. In the copied config file, update the value of `submission_config_file` to match the path to the file we created in the last cell.\n",
"\n",
"IMPORTANT: The execution of the previous program relies on the existence of file to be saved at {{ submission1_path.selected_path }} with a specific formatting. The file must be named \"populations.txt\", and should be formatted as a single row/column CSV file containing the populations computed from your results. If the previous file is not included, the execution of the program will result in a runtime error."
"3. Optionally, change the other parameters. \n",
" * Most of the parameters (BOT_* and thresh_percentile) are for the alignment. For details on how they work, please see the publication \"Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5\" for more details. \n",
"\n",
" * The other parameters are self explanatory, \"seed_flavor_assignment\" changes which submission gets assigned which ice cream flavor, keep this if you want to revert anonymity.\n",
"\n",
"4. Run the command: `cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml`\n",
"\n",
"You can run the following cell to visualize your volumes (more precisely, a projection of them)\n"
]
},
{
Expand All @@ -207,7 +244,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-13T07:43:16.259106Z",
Expand Down Expand Up @@ -240,12 +277,10 @@
"source": [
"n_submissions = 2 # change this to however many submissions you preprocessed\n",
"\n",
"fig, ax = plt.subplots(2, 6, figsize=(20, 8)) # change values here too\n",
"fig, ax = plt.subplots(1, 2, figsize=(10, 4)) # change values here too\n",
"\n",
"for i in range(n_submissions):\n",
" idx = np.random.randint(\n",
" 0, 20\n",
" ) # doing random volumes to check that everything went fine\n",
" idx = 0\n",
"\n",
" submission = torch.load(os.path.join(full_output_path, f\"submission_{i}.pt\"))\n",
" print(submission[\"volumes\"].shape, submission[\"id\"])\n",
Expand All @@ -256,9 +291,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "cryo-challenge-kernel",
"language": "python",
"name": "python3"
"name": "cryo-challenge-kernel"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -270,7 +305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.17"
"version": "3.10.10"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 22504f5

Please sign in to comment.