diff --git a/.github/workflows/main_merge_check.yml b/.github/workflows/main_merge_check.yml new file mode 100644 index 0000000..8e6f5c5 --- /dev/null +++ b/.github/workflows/main_merge_check.yml @@ -0,0 +1,14 @@ +name: Check merging branch + +on: + pull_request: + +jobs: + check_branch: + runs-on: ubuntu-latest + steps: + - name: Check branch + if: github.base_ref == 'main' && github.head_ref != 'dev' + run: | + echo "ERROR: You can only merge to main from dev." + exit 1 \ No newline at end of file diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 829a237..6c665b3 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -15,48 +15,30 @@ jobs: strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] + fail-fast: false + steps: - - uses: actions/checkout@v3 - + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - - name: Cache dependencies - id: cache_deps - uses: actions/cache@v3 - with: - path: | - ${{ env.pythonLocation }} - key: venv-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('**/pyproject.toml') }} - - - name: Cache test data - id: cache_test_data - uses: actions/cache@v3 - with: - path: | - tests/data - data - key: venv-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('**/tests/scripts/fetch_test_data.sh') }} + cache: 'pip' # caching pip dependencies - name: Install dependencies - if: ${{ steps.cache_deps.outputs.cache-hit != 'true' }} run: | python -m pip install --upgrade pip pip install . pip install pytest omegaconf - + - name: Get test data from OSF - if: ${{ steps.cache_test_data.outputs.cache-hit != 'true' }} run: | sh tests/scripts/fetch_test_data.sh - + - name: Test with pytest run: | pytest tests/test_preprocessing.py pytest tests/test_svd.py pytest tests/test_map_to_map.py pytest tests/test_distribution_to_distribution.py - diff --git a/.gitignore b/.gitignore index d7aaad0..5ede44a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,15 @@ +# downloaded data +data/dataset_2_submissions +data/dataset_1_submissions +data/dataset_2_ground_truth + +# data for testing and resulting outputs +tests/data/Ground_truth +tests/data/dataset_2_submissions/ +tests/data/unprocessed_dataset_2_submissions/submission_x/ +tests/results/ + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -158,9 +170,3 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ - -# Tutorials folder -tutorials/* - -# Config file templates -config_files/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d1bb35..e3c79b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,6 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.3.4 diff --git a/README.md b/README.md index 9dfb755..165b497 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@
- + @@ -10,13 +10,13 @@
- +
- + This repository contains the code used to analyse the submissions for the [Inaugural Flatiron Cryo-EM Heterogeneity Challenge](https://www.simonsfoundation.org/flatiron/center-for-computational-biology/structural-and-molecular-biophysics-collaboration/heterogeneity-in-cryo-electron-microscopy/). # Scope @@ -26,13 +26,13 @@ This repository explains how to preprocess a submission (80 maps and correspondi This is a work in progress, while the code will probably not change, we are still writting better tutorials, documentation, and other ideas for analyzing the data. We are also in the process of making it easier for other people to contribute with their own metrics and methods. We are also in the process of distributing the code to PyPi. # Accesing the data -The data is available via the Open Science Foundation project [The Inaugural Flatiron Institute Cryo-EM Heterogeneity Community Challenge](https://osf.io/8h6fz/). You can download via a web browser, or programatically with wget as per [this script](https://github.com/flatironinstitute/Cryo-EM-Heterogeneity-Challenge-1/blob/main/tests/scripts/fetch_test_data.sh). +The data is available via the Open Science Foundation project [The Inaugural Flatiron Institute Cryo-EM Heterogeneity Community Challenge](https://osf.io/8h6fz/). You can download via a web browser, or programatically with wget as per [this script](https://github.com/flatironinstitute/Cryo-EM-Heterogeneity-Challenge-1/blob/main/data/fetch_data.sh). **_NOTE_**: We recommend downloadaing the data with the script and wget as the downloads from the web browser might be unstable. # Installation -## Stable installation +## Stable installation Installing this repository is simply. We recommend creating a virtual environment (using conda or pyenv), since we have dependencies such as PyTorch or Aspire, which are better dealt with in an isolated environment. After creating your environment, make sure to activate it and run ```bash @@ -63,7 +63,7 @@ pytest tests/test_distribution_to_distribution.py If you want to run our code on the full challenge data, or you own local data, please complete the following steps ### 1. Download the full challenge data from [The Inaugural Flatiron Institute Cryo-EM Heterogeneity Community Challenge](https://osf.io/8h6fz/) -You can do this through the web browser, or programatically with wget (you can get inspiration from [this script](https://github.com/flatironinstitute/Cryo-EM-Heterogeneity-Challenge-1/blob/main/tests/scripts/fetch_test_data.sh), which is just for the test data, not the full datasets) +You can do this through the web browser, or programatically with wget (you can use [this script](https://github.com/flatironinstitute/Cryo-EM-Heterogeneity-Challenge-1/blob/main/data/fetch_data.sh), this will download around 220 GB of data) ### 2. Modify the config files and run the commands on the full challenge data Point to the path where the data is locally diff --git a/config_files/config_distribution_to_distribution.yaml b/config_files/config_distribution_to_distribution.yaml index d8b2416..da8b3d9 100644 --- a/config_files/config_distribution_to_distribution.yaml +++ b/config_files/config_distribution_to_distribution.yaml @@ -4,7 +4,7 @@ metrics: - corr - bioem - fsc -gt_metadata_fname: data/metadata.csv +gt_metadata_fname: data/dataset_2_ground_truth/metadata.csv n_replicates: 30 n_pool_microstate: 5 replicate_fraction: 0.9 diff --git a/config_files/config_map_to_map_distance_matrix.yaml b/config_files/config_map_to_map_distance_matrix.yaml index d0ea180..3c0994c 100644 --- a/config_files/config_map_to_map_distance_matrix.yaml +++ b/config_files/config_map_to_map_distance_matrix.yaml @@ -2,16 +2,16 @@ data: n_pix: 224 psize: 2.146 submission: - fname: data/submission_0.pt + fname: data/dataset_2_ground_truth/submission_0.pt volume_key: volumes metadata_key: populations label_key: id ground_truth: - volumes: data/maps_gt_flat.pt - metadata: data/metadata.csv + volumes: data/dataset_2_ground_truth/maps_gt_flat.pt + metadata: data/dataset_2_ground_truth/metadata.csv mask: do: true - volume: data/mask_dilated_wide_224x224.mrc + volume: data/dataset_2_ground_truth/mask_dilated_wide_224x224.mrc analysis: metrics: - l2 diff --git a/config_files/config_plotting.yaml b/config_files/config_plotting.yaml index 9dd30f4..65b3e92 100644 --- a/config_files/config_plotting.yaml +++ b/config_files/config_plotting.yaml @@ -1,4 +1,4 @@ -gt_metadata: path/to/metadata.csv +gt_metadata: data/dataset_2_ground_truth/metadata.csv map2map_results: - path/to/map2map_results_1.pkl diff --git a/config_files/config_preproc.yaml b/config_files/config_preproc.yaml index 4c503cc..0a087c9 100644 --- a/config_files/config_preproc.yaml +++ b/config_files/config_preproc.yaml @@ -1,5 +1,4 @@ submission_config_file: submission_config.json -seed_flavor_assignment: 0 thresh_percentile: 93.0 BOT_box_size: 32 BOT_loss: wemd diff --git a/config_files/config_svd.yaml b/config_files/config_svd.yaml index 327812e..4e3f9bd 100644 --- a/config_files/config_svd.yaml +++ b/config_files/config_svd.yaml @@ -3,7 +3,7 @@ box_size_ds: 32 submission_list: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] experiment_mode: "all_vs_ref" # options are "all_vs_all", "all_vs_ref" # optional unless experiment_mode is "all_vs_ref" -path_to_reference: /path/to/reference +path_to_reference: /path/to/reference/volumes.pt dtype: "float32" # options are "float32", "float64" output_options: # path will be created if it does not exist diff --git a/data/fetch_data.sh b/data/fetch_data.sh new file mode 100644 index 0000000..54ecb0d --- /dev/null +++ b/data/fetch_data.sh @@ -0,0 +1,21 @@ +mkdir -p data/dataset_2_submissions data/dataset_1_submissions data/dataset_2_ground_truth + +# dataset 1 submissions +for i in {0..10} +do + wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/dataset_1_submissions/submission_${i}.pt?download=true -O data/dataset_1_submissions/submission_${i}.pt +done + +# dataset 2 submissions +for i in {0..11} +do + wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/dataset_2_submissions/submission_${i}.pt?download=true -O data/dataset_2_submissions/submission_${i}.pt +done + +# ground truth + +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/maps_gt_flat.pt?download=true -O data/dataset_2_ground_truth/maps_gt_flat.pt + +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/metadata.csv?download=true -O data/dataset_2_ground_truth/metadata.csv + +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/mask_dilated_wide_224x224.mrc?download=true -O data/dataset_2_ground_truth/mask_dilated_wide_224x224.mrc diff --git a/pyproject.toml b/pyproject.toml index ba0facf..597890e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,29 +38,29 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "torch<=2.3.1", - "numpy<=2.0.0", - "natsort<=8.4.0", - "pandas<=2.2.2", - "dataclasses_json<=0.6.7", - "mrcfile<=1.5.0", - "scipy<=1.13.1", - "cvxpy<=1.5.2", - "POT<=0.9.3", - "aspire<=0.12.2", - "jupyter<=1.0.0", - "osfclient<=0.0.5", - "seaborn<=0.13.2", - "ipyfilechooser<=0.6.0", + "torch", + "numpy", + "natsort", + "pandas", + "dataclasses_json", + "mrcfile", + "scipy", + "cvxpy", + "POT", + "aspire", + "jupyter", + "osfclient", + "seaborn", + "ipyfilechooser", + "omegaconf" ] [project.optional-dependencies] dev = [ - "pytest<=8.2.2", + "pytest", "mypy", "pre-commit", "ruff", - "omegaconf<=2.3.0" ] [project.urls] diff --git a/src/cryo_challenge/__init__.py b/src/cryo_challenge/__init__.py index e69de29..cafea4e 100644 --- a/src/cryo_challenge/__init__.py +++ b/src/cryo_challenge/__init__.py @@ -0,0 +1 @@ +from cryo_challenge.__about__ import __version__ \ No newline at end of file diff --git a/src/cryo_challenge/_preprocessing/dataloader.py b/src/cryo_challenge/_preprocessing/dataloader.py index 2593c2a..27ca57a 100644 --- a/src/cryo_challenge/_preprocessing/dataloader.py +++ b/src/cryo_challenge/_preprocessing/dataloader.py @@ -25,7 +25,11 @@ class SubmissionPreprocessingDataLoader(Dataset): def __init__(self, submission_config): self.submission_config = submission_config - self.submission_paths, self.gt_path = self.extract_submission_paths() + self.validate_submission_config() + + self.submission_paths, self.population_files, self.gt_path = ( + self.extract_submission_paths() + ) self.subs_index = [int(idx) for idx in list(self.submission_config.keys())[1:]] path_to_gt_ref = os.path.join( self.gt_path, self.submission_config["gt"]["ref_align_fname"] @@ -53,30 +57,40 @@ def validate_submission_config(self): raise ValueError("Box size not found for ground truth") if "pixel_size" not in value.keys(): raise ValueError("Pixel size not found for ground truth") + if "ref_align_fname" not in value.keys(): + raise ValueError( + "Reference align file name not found for ground truth" + ) continue else: if "path" not in value.keys(): raise ValueError(f"Path not found for submission {key}") - if "id" not in value.keys(): - raise ValueError(f"ID not found for submission {key}") + if "name" not in value.keys(): + raise ValueError(f"Name not found for submission {key}") if "box_size" not in value.keys(): raise ValueError(f"Box size not found for submission {key}") if "pixel_size" not in value.keys(): raise ValueError(f"Pixel size not found for submission {key}") if "align" not in value.keys(): raise ValueError(f"Align not found for submission {key}") - + if "populations_file" not in value.keys(): + raise ValueError(f"Population file not found for submission {key}") + if "flip" not in value.keys(): + raise ValueError(f"Flip not found for submission {key}") + if "submission_version" not in value.keys(): + raise ValueError( + f"Submission version not found for submission {key}" + ) if not os.path.exists(value["path"]): raise ValueError(f"Path {value['path']} does not exist") if not os.path.isdir(value["path"]): raise ValueError(f"Path {value['path']} is not a directory") - ids = list(self.submission_config.keys())[1:] - if ids != list(range(len(ids))): - raise ValueError( - "Submission IDs should be integers starting from 0 and increasing by 1" - ) + if not os.path.exists(value["populations_file"]): + raise ValueError( + f"Population file {value['populations_file']} does not exist" + ) return @@ -135,13 +149,16 @@ def help(cls): def extract_submission_paths(self): submission_paths = [] + population_files = [] for key, value in self.submission_config.items(): if key == "gt": gt_path = value["path"] else: submission_paths.append(value["path"]) - return submission_paths, gt_path + population_files.append(value["populations_file"]) + + return submission_paths, population_files, gt_path def __len__(self): return len(self.submission_paths) @@ -151,13 +168,9 @@ def __getitem__(self, idx): glob.glob(os.path.join(self.submission_paths[idx], "*.mrc")) ) vol_paths = [vol_path for vol_path in vol_paths if "mask" not in vol_path] - assert len(vol_paths) > 0, "No volumes found in submission directory" - populations = np.loadtxt( - os.path.join(self.submission_paths[idx], "populations.txt") - ) - populations = torch.from_numpy(populations) + populations = torch.from_numpy(np.loadtxt(self.population_files[idx])) vol0 = mrcfile.open(vol_paths[0], mode="r") volumes = torch.zeros( diff --git a/src/cryo_challenge/_preprocessing/preprocessing_pipeline.py b/src/cryo_challenge/_preprocessing/preprocessing_pipeline.py index 5fc3db9..90ccc51 100644 --- a/src/cryo_challenge/_preprocessing/preprocessing_pipeline.py +++ b/src/cryo_challenge/_preprocessing/preprocessing_pipeline.py @@ -1,5 +1,4 @@ import torch -import numpy as np import json import os @@ -40,34 +39,7 @@ def save_submission(volumes, populations, submission_id, submission_index, confi def preprocess_submissions(submission_dataset, config): - np.random.seed(config["seed_flavor_assignment"]) - ice_cream_flavors = [ - "Chocolate", - "Vanilla", - "Cookies N' Cream", - "Mint Chocolate Chip", - "Strawberry", - "Butter Pecan", - "Salted Caramel", - "Pistachio", - "Rocky Road", - "Coffee", - "Cookie Dough", - "Chocolate Chip", - "Neapolitan", - "Cherry", - "Rainbow Sherbet", - "Peanut Butter", - "Cotton Candy", - "Lemon Sorbet", - "Mango", - "Black Raspberry", - ] - - n_subs = max(submission_dataset.subs_index) + 1 - random_mapping = np.random.choice(len(ice_cream_flavors), n_subs, replace=False) hash_table = {} - box_size_gt = submission_dataset.submission_config["gt"]["box_size"] pixel_size_gt = submission_dataset.submission_config["gt"]["pixel_size"] vol_gt_ref = submission_dataset.vol_gt_ref @@ -75,9 +47,12 @@ def preprocess_submissions(submission_dataset, config): for i in range(len(submission_dataset)): idx = submission_dataset.subs_index[i] - hash_table[submission_dataset.submission_config[str(idx)]["name"]] = ( - ice_cream_flavors[random_mapping[idx]] - ) + sub_flavor = submission_dataset.submission_config[str(idx)]["flavor_name"] + sub_name = submission_dataset.submission_config[str(idx)]["name"] + hash_table[sub_flavor] = { + "name": sub_name, + "filename": f"submission_{idx}.pt", + } print(f"Preprocessing submission {idx}...") @@ -107,6 +82,11 @@ def preprocess_submissions(submission_dataset, config): print(" Centering submission") volumes = center_submission(volumes, pixel_size=pixel_size_gt) + # flip handedness + if submission_dataset.submission_config[str(idx)]["flip"] == 1: + print(" Flipping handedness of submission") + volumes = volumes.flip(-1) + # align to GT if submission_dataset.submission_config[str(idx)]["align"] == 1: print(" Aligning submission to ground truth") @@ -114,17 +94,34 @@ def preprocess_submissions(submission_dataset, config): # save preprocessed volumes print(" Saving preprocessed submission") + submission_version = submission_dataset.submission_config[str(idx)][ + "submission_version" + ] + if str(submission_version) == "0": + submission_version = "" + else: + submission_version = f" {submission_version}" + print(f" SUBMISSION VERSION {submission_version}") + submission_id = ( + submission_dataset.submission_config[str(idx)]["flavor_name"] + + submission_version + ) + print(f"SUBMISSION ID {submission_id}") + save_submission( volumes, submission_dataset[i]["populations"], - ice_cream_flavors[random_mapping[idx]], + submission_id, idx, config, ) print(f" submission saved as submission_{idx}.pt") print(f"Preprocessing submission {idx} complete") - with open("hash_table.json", "w") as f: + hash_table_path = os.path.join( + config["output_path"], "submission_to_icecream_table.json" + ) + with open(hash_table_path, "w") as f: json.dump(hash_table, f, indent=4) return diff --git a/src/cryo_challenge/data/_validation/config_validators.py b/src/cryo_challenge/data/_validation/config_validators.py index 93316a0..b2fa933 100644 --- a/src/cryo_challenge/data/_validation/config_validators.py +++ b/src/cryo_challenge/data/_validation/config_validators.py @@ -1,7 +1,7 @@ from numbers import Number import pandas as pd import os -from typing import List + def validate_generic_config(config: dict, reference: dict) -> None: """ @@ -48,7 +48,6 @@ def validate_config_preprocessing(config_data: dict) -> None: "BOT_loss": str, "BOT_iter": Number, "BOT_refine": bool, - "seed_flavor_assignment": int, } validate_generic_config(config_data, keys_and_types) return diff --git a/tests/config_files/test_config_distribution_to_distribution.yaml b/tests/config_files/test_config_distribution_to_distribution.yaml index a1f03af..05b6317 100644 --- a/tests/config_files/test_config_distribution_to_distribution.yaml +++ b/tests/config_files/test_config_distribution_to_distribution.yaml @@ -9,4 +9,4 @@ cvxpy_solver: ECOS optimal_q_kl: n_iter: 100000 break_atol: 0.0001 -output_fname: results/test_distribution_to_distribution_submission_0.pkl \ No newline at end of file +output_fname: tests/results/test_distribution_to_distribution_submission_0.pkl diff --git a/tests/config_files/test_config_map_to_map.yaml b/tests/config_files/test_config_map_to_map.yaml index 563dd5b..cbf6d09 100644 --- a/tests/config_files/test_config_map_to_map.yaml +++ b/tests/config_files/test_config_map_to_map.yaml @@ -1,17 +1,17 @@ data: n_pix: 224 - psize: 2.146 + psize: 2.146 submission: fname: tests/data/dataset_2_submissions/test_submission_0_n8.pt volume_key: volumes metadata_key: populations label_key: id ground_truth: - volumes: tests/data/Ground_truth/test_maps_gt_flat_10.pt - metadata: tests/data/Ground_truth/test_metadata_10.csv - mask: + volumes: tests/data/Ground_truth/test_maps_gt_flat_10.pt + metadata: tests/data/Ground_truth/test_metadata_10.csv + mask: do: true - volume: data/Ground_truth/mask_dilated_wide_224x224.mrc + volume: tests/data/Ground_truth/mask_dilated_wide_224x224.mrc analysis: metrics: - l2 @@ -24,4 +24,4 @@ analysis: normalize: do: true method: median_zscore -output: tests/results/test_map_to_map_distance_matrix_submission_0.pkl \ No newline at end of file +output: tests/results/test_map_to_map_distance_matrix_submission_0.pkl diff --git a/tests/config_files/test_config_preproc.yaml b/tests/config_files/test_config_preproc.yaml index 0a27d7e..0d0fec5 100644 --- a/tests/config_files/test_config_preproc.yaml +++ b/tests/config_files/test_config_preproc.yaml @@ -1,5 +1,4 @@ submission_config_file: tests/data/unprocessed_dataset_2_submissions/submission_x/submission_config.json -seed_flavor_assignment: 0 thresh_percentile: 93.0 BOT_box_size: 32 BOT_loss: wemd diff --git a/tests/data/unprocessed_dataset_2_submissions/submission_x/submission_config.json b/tests/data/unprocessed_dataset_2_submissions/submission_x/submission_config.json index 87184aa..b8318b9 100644 --- a/tests/data/unprocessed_dataset_2_submissions/submission_x/submission_config.json +++ b/tests/data/unprocessed_dataset_2_submissions/submission_x/submission_config.json @@ -9,8 +9,12 @@ "0": { "name": "raw_submission_in_testdata", "align": 1, + "flavor_name": "test flavor", "box_size": 244, "pixel_size": 2.146, - "path": "tests/data/unprocessed_dataset_2_submissions/submission_x" + "path": "tests/data/unprocessed_dataset_2_submissions/submission_x", + "flip": 1, + "populations_file": "tests/data/unprocessed_dataset_2_submissions/submission_x/populations.txt", + "submission_version": "1.0" } -} \ No newline at end of file +} diff --git a/tests/scripts/fetch_test_data.sh b/tests/scripts/fetch_test_data.sh index 5b58f23..c252871 100644 --- a/tests/scripts/fetch_test_data.sh +++ b/tests/scripts/fetch_test_data.sh @@ -1,11 +1,11 @@ -mkdir -p tests/data/dataset_2_submissions data/dataset_2_submissions tests/results tests/data/unprocessed_dataset_2_submissions/submission_x tests/data/Ground_truth/ data/Ground_truth +mkdir -p tests/data/dataset_2_submissions tests/data/dataset_2_submissions tests/results tests/data/unprocessed_dataset_2_submissions/submission_x tests/data/Ground_truth/ tests/data/Ground_truth wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/dataset_2_submissions/test_submission_0_n8.pt?download=true -O tests/data/dataset_2_submissions/test_submission_0_n8.pt ADIR=$(pwd) ln -s $ADIR/tests/data/dataset_2_submissions/test_submission_0_n8.pt $ADIR/tests/data/dataset_2_submissions/submission_0.pt # symlink for svd which needs submission_0.pt for filename wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/test_maps_gt_flat_10.pt?download=true -O tests/data/Ground_truth/test_maps_gt_flat_10.pt -wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/test_metadata_10.csv?download=true -O tests/data/Ground_truth/test_metadata_10.csv -wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/1.mrc?download=true -O tests/data/Ground_truth/1.mrc -wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/mask_dilated_wide_224x224.mrc?download=true -O data/Ground_truth/mask_dilated_wide_224x224.mrc +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/test_metadata_10.csv?download=true -O tests/data/Ground_truth/test_metadata_10.csv +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/Ground_truth/1.mrc?download=true -O tests/data/Ground_truth/1.mrc +wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/Ground_truth/mask_dilated_wide_224x224.mrc?download=true -O tests/data/Ground_truth/mask_dilated_wide_224x224.mrc for FILE in 1.mrc 2.mrc 3.mrc 4.mrc populations.txt do wget https://files.osf.io/v1/resources/8h6fz/providers/dropbox/tests/unprocessed_dataset_2_submissions/submission_x/${FILE}?download=true -O tests/data/unprocessed_dataset_2_submissions/submission_x/${FILE} diff --git a/tutorials/1_tutorial_preprocessing.ipynb b/tutorials/1_tutorial_preprocessing.ipynb index 0c718e4..cc6a459 100644 --- a/tutorials/1_tutorial_preprocessing.ipynb +++ b/tutorials/1_tutorial_preprocessing.ipynb @@ -136,6 +136,7 @@ " 0: {\n", " \"name\": \"submission1\",\n", " \"align\": 0,\n", + " \"flip\": 0,\n", " \"box_size\": 144,\n", " \"pixel_size\": 1.073 * 2,\n", " \"path\": submission1_path.selected_path,\n", @@ -143,6 +144,7 @@ " 1: {\n", " \"name\": \"submission2\",\n", " \"align\": 1,\n", + " \"flip\": 1,\n", " \"box_size\": 288,\n", " \"pixel_size\": 1.073,\n", " \"path\": submission2_path.selected_path,\n",