Merge pull request #79 from computational-cell-analytics/first-release

Prepare first release
computational-cell-analytics · Dec 10, 2024 · 1810805 · 1810805
2 parents 458ec90 + 03d42a8
commit 1810805
Show file tree

Hide file tree

Showing 20 changed files with 682 additions and 367 deletions.
diff --git a/examples/.gitignore b/examples/.gitignore
@@ -0,0 +1,5 @@
+data/
+set_up_pool.py
+*.h5
+*.tif
+*.mrc
diff --git a/examples/analysis_pipeline.py b/examples/analysis_pipeline.py
@@ -0,0 +1,161 @@
+import napari
+import pandas as pd
+import numpy as np
+
+from scipy.ndimage import binary_closing
+from skimage.measure import regionprops
+from skimage.segmentation import find_boundaries
+
+from synapse_net.distance_measurements import measure_segmentation_to_object_distances
+from synapse_net.file_utils import read_mrc
+from synapse_net.imod.to_imod import convert_segmentation_to_spheres
+from synapse_net.inference import compute_scale_from_voxel_size, get_model, run_segmentation
+from synapse_net.sample_data import get_sample_data
+
+
+def segment_structures(tomogram, voxel_size):
+    # Segment the synaptic vesicles. The data will automatically be resized
+    # to match the average voxel size of the training data.
+    model_name = "vesicles_3d"  # This is the name for the vesicle model for EM tomography.
+    model = get_model(model_name)  # Load the corresponding model.
+    # Compute the scale to match the tomogram voxel size to the training data.
+    scale = compute_scale_from_voxel_size(voxel_size, model_name)
+    vesicles = run_segmentation(tomogram, model, model_name, scale=scale)
+
+    # Segment the active zone.
+    model_name = "active_zone"
+    model = get_model(model_name)
+    scale = compute_scale_from_voxel_size(voxel_size, model_name)
+    active_zone = run_segmentation(tomogram, model, model_name, scale=scale)
+
+    # Segment the synaptic compartments.
+    model_name = "compartments"
+    model = get_model(model_name)
+    scale = compute_scale_from_voxel_size(voxel_size, model_name)
+    compartments = run_segmentation(tomogram, model, model_name, scale=scale)
+
+    return {"vesicles": vesicles, "active_zone": active_zone, "compartments": compartments}
+
+
+def n_vesicles(mask, ves):
+    return len(np.unique(ves[mask])) - 1
+
+
+def postprocess_segmentation(segmentations):
+    # We find the compartment corresponding to the presynaptic terminal
+    # by selecting the compartment with most vesicles. We filter out all
+    # vesicles that do not overlap with this compartment.
+
+    vesicles, compartments = segmentations["vesicles"], segmentations["compartments"]
+
+    # First, we find the compartment with most vesicles.
+    props = regionprops(compartments, intensity_image=vesicles, extra_properties=[n_vesicles])
+    compartment_ids = [prop.label for prop in props]
+    vesicle_counts = [prop.n_vesicles for prop in props]
+    compartments = (compartments == compartment_ids[np.argmax(vesicle_counts)]).astype("uint8")
+
+    # Filter all vesicles that are not in the compartment.
+    props = regionprops(vesicles, compartments)
+    filter_ids = [prop.label for prop in props if prop.max_intensity == 0]
+    vesicles[np.isin(vesicles, filter_ids)] = 0
+
+    segmentations["vesicles"], segmentations["compartments"] = vesicles, compartments
+
+    # We also apply closing to the active zone segmentation to avoid gaps and then
+    # intersect it with the boundary of the presynaptic compartment.
+    active_zone = segmentations["active_zone"]
+    active_zone = binary_closing(active_zone, iterations=4)
+    boundary = find_boundaries(compartments)
+    active_zone = np.logical_and(active_zone, boundary).astype("uint8")
+    segmentations["active_zone"] = active_zone
+
+    return segmentations
+
+
+def measure_distances(segmentations, voxel_size):
+    # Here, we measure the distances from each vesicle to the active zone.
+    # We use the function 'measure_segmentation_to_object_distances' for this,
+    # which uses an euclidean distance transform scaled with the voxel size
+    # to determine distances.
+    vesicles, active_zone = segmentations["vesicles"], segmentations["active_zone"]
+    voxel_size = tuple(voxel_size[ax] for ax in "zyx")
+    distances, _, _, vesicle_ids = measure_segmentation_to_object_distances(
+        vesicles, active_zone, resolution=voxel_size
+    )
+    # We convert the result to a pandas data frame.
+    return pd.DataFrame({"vesicle_id": vesicle_ids, "distance": distances})
+
+
+def assign_vesicle_pools(vesicle_attributes):
+    # We assign the vesicles to their respective pool, 'docked' and 'non-attached',
+    # based on the criterion of being within 2 nm from the active zone.
+    # We add the pool assignment as a new column to the dataframe with vesicle attributes.
+    docked_vesicle_distance = 2  # nm
+    vesicle_attributes["pool"] = vesicle_attributes["distance"].apply(
+        lambda x: "docked" if x < docked_vesicle_distance else "non-attached"
+    )
+    return vesicle_attributes
+
+
+def visualize_results(tomogram, segmentations, vesicle_attributes):
+    # Here, we visualize the segmentation and pool assignment result in napari.
+
+    # Create a segmentation to visualize the vesicle pools.
+    docked_ids = vesicle_attributes[vesicle_attributes.pool == "docked"].vesicle_id
+    non_attached_ids = vesicle_attributes[vesicle_attributes.pool == "non-attached"].vesicle_id
+    vesicles = segmentations["vesicles"]
+    vesicle_pools = np.isin(vesicles, docked_ids).astype("uint8")
+    vesicle_pools[np.isin(vesicles, non_attached_ids)] = 2
+
+    # Create a napari viewer, add the tomogram data and the segmentation results.
+    viewer = napari.Viewer()
+    viewer.add_image(tomogram)
+    for name, segmentation in segmentations.items():
+        viewer.add_labels(segmentation, name=name)
+    viewer.add_labels(vesicle_pools)
+    napari.run()
+
+
+def save_analysis(segmentations, vesicle_attributes, save_path):
+    # Here, we compute the radii and centroid positions of the vesicles,
+    # add them to the vesicle attributes and then save all vesicle attributes to
+    # an excel table. You can use this table for evaluation of the analysis.
+    vesicles = segmentations["vesicles"]
+    coordinates, radii = convert_segmentation_to_spheres(vesicles, radius_factor=0.7)
+    vesicle_attributes["radius"] = radii
+    for ax_id, ax_name in enumerate("zyx"):
+        vesicle_attributes[f"center-{ax_name}"] = coordinates[:, ax_id]
+    vesicle_attributes.to_excel(save_path, index=False)
+
+
+def main():
+    """This script implements an example analysis pipeline with SynapseNet and applies it to a tomogram.
+    Here, we analyze docked and non-attached vesicles in a sample tomogram."""
+
+    # Load the tomogram for our sample data.
+    mrc_path = get_sample_data("tem_tomo")
+    tomogram, voxel_size = read_mrc(mrc_path)
+
+    # Segment synaptic vesicles, the active zone, and the synaptic compartment.
+    segmentations = segment_structures(tomogram, voxel_size)
+
+    # Post-process the segmentations, to find the presynaptic terminal,
+    # filter out vesicles not in the terminal, and to 'snape' the AZ to the presynaptic boundary.
+    segmentations = postprocess_segmentation(segmentations)
+
+    # Measure the distances between the AZ and vesicles.
+    vesicle_attributes = measure_distances(segmentations, voxel_size)
+
+    # Assign the vesicle pools, 'docked' and 'non-attached' vesicles, based on the distances.
+    vesicle_attributes = assign_vesicle_pools(vesicle_attributes)
+
+    # Visualize the results.
+    visualize_results(tomogram, segmentations, vesicle_attributes)
+
+    # Compute the vesicle radii and combine and save all measurements.
+    save_path = "analysis_results.xlsx"
+    save_analysis(segmentations, vesicle_attributes, save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/domain_adaptation.py b/examples/domain_adaptation.py
@@ -4,35 +4,41 @@
 a different electron tomogram with different specimen and sample preparation.
 You don't need any annotations in the new domain to run this script.
 
-You can download example data for this script from:
-- Adaptation to 2d TEM data: TODO zenodo link
-- Adaptation to different tomography data: TODO zenodo link
+We use data from the SynapseNet publication for this example:
+- Adaptation to 2d TEM data: https://doi.org/10.5281/zenodo.14236381
+- Adaptation to different tomography data (3d data): https://doi.org/10.5281/zenodo.14232606
+
+It is of course possible to adapt it to your own data.
 """
 
 import os
 from glob import glob
 
 from sklearn.model_selection import train_test_split
+from synapse_net.inference.inference import get_model_path
+from synapse_net.sample_data import download_data_from_zenodo
 from synapse_net.training import mean_teacher_adaptation
-from synapse_net.tools.util import get_model_path
 
 
 def main():
     # Choose whether to adapt the model to 2D or to 3D data.
-    train_2d_model = True
-
-    # TODO adjust to zenodo downloads
-    # These are the data folders for the example data downloaded from zenodo.
-    # Update these paths to apply the script to your own data.
-    # Check out the example data to see the data format for training.
-    data_root_folder_2d = "./data/2d_tem/train_unlabeled"
-    data_root_folder_3d = "./data/..."
+    train_2d_model = False
 
-    # Choose the correct data folder depending on 2d/3d training.
-    data_root_folder = data_root_folder_2d if train_2d_model else data_root_folder_3d
+    # Download the training data from zenodo.
+    # You have to replace this if you want to train on your own data.
+    # The training data should be stored in an hdf5 file per tomogram,
+    # with tomgoram data stored in the internal dataset 'raw'.
+    if train_2d_model:
+        data_root = "./data/2d_tem"
+        download_data_from_zenodo(data_root, "2d_tem")
+        train_root_folder = os.path.join(data_root, "train_unlabeled")
+    else:
+        data_root = "./data/inner_ear_ribbon_synapse"
+        download_data_from_zenodo(data_root, "inner_ear_ribbon_synapse")
+        train_root_folder = data_root
 
     # Get all files with ending .h5 in the training folder.
-    files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
+    files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))
 
     # Crate a train / val split.
     train_ratio = 0.85

diff --git a/examples/network_training.py b/examples/network_training.py
@@ -5,30 +5,36 @@
 to adapt an already trained network to your data without the need for
 additional annotations then check out `domain_adaptation.py`.
 
-You can download example data for this script from:
-TODO zenodo link to Single-Ax / Chemical Fix data.
+We will use the data from our manuscript here:
+https://doi.org/10.5281/zenodo.14330011
+
+You can also use your own data, if you prepare it in the same format.
 """
 import os
 from glob import glob
 
 from sklearn.model_selection import train_test_split
+from synapse_net.sample_data import download_data_from_zenodo
 from synapse_net.training import supervised_training
 
 
 def main():
-    # This is the folder that contains your training data.
-    # The example was designed so that it runs for the sample data downloaded to './data'.
-    # If you want to train on your own data than change this filepath accordingly.
-    # TODO update to match zenodo download
-    data_root_folder = "./data/vesicles/train"
+    # Download the training data from zenodo.
+    # You have to replace this if you want to train on your own data.
+    # The training data should be stored in an hdf5 file per tomogram,
+    # with tomgoram data stored in the internal dataset 'raw'
+    # and the vesicle annotations stored in the internal dataset 'labels/vesicles'.
+    data_root = "./data/training_data"
+    download_data_from_zenodo(data_root, "training_data")
+    train_root_folder = os.path.join(data_root, "vesicles/train")
 
     # The training data should be saved as .h5 files, with:
     # an internal dataset called 'raw' that contains the image data
     # and another dataset that contains the training annotations.
     label_key = "labels/vesicles"
 
     # Get all files with the ending .h5 in the training folder.
-    files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
+    files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))
 
     # Crate a train / val split.
     train_ratio = 0.85

diff --git a/scripts/cooper/README.md b/scripts/cooper/README.md
@@ -19,9 +19,9 @@ $ micromamba activate sam
 The segmentation scripts (`run_..._segmentation.py`) all work similarly and can either run segmentation for a single mrc file or for all mrcs in a folder structure.
 For example, you can run vesicle segmentation like this:
 ```
-$ python run_vesicle_segmentation.py -i /path/to/input_folder -o /path/to/output_folder -m /path/to/vesicle_model.pt
+$ python run_vesicle_segmentation.py -i /path/to/input_folder -o /path/to/output_folder
 ```
-The filepath after `-i` specifices the location of the folder with the mrcs to be segmented, the segmentation results will be stored (as tifs) in the folder following `-o` and `-m` is used to specify the path to the segmentation model.
+The filepath after `-i` specifices the location of the folder with the mrcs to be segmented and the segmentation results will be stored (as tifs) in the folder following `-o`.
 To segment vesicles with an additional mask, you can use the `--mask_path` option.
 
 The segmentation scripts accept additional parameters, e.g. `--force` to overwrite existing segmentations in the output folder (by default these are skipped to avoid unnecessary computation) and `--tile_shape <TILE_Z> <TILE_Y> <TILE_X>` to specify a different tile shape (which may be necessary to avoid running out of GPU memory).

diff --git a/scripts/cooper/run_compartment_segmentation.py b/scripts/cooper/run_compartment_segmentation.py
@@ -2,13 +2,20 @@
 from functools import partial
 
 from synapse_net.inference.compartments import segment_compartments
+from synapse_net.inference.inference import get_model_path
 from synapse_net.inference.util import inference_helper, parse_tiling
 
 
 def run_compartment_segmentation(args):
     tiling = parse_tiling(args.tile_shape, args.halo)
+
+    if args.model is None:
+        model_path = get_model_path("compartments")
+    else:
+        model_path = args.model
+
     segmentation_function = partial(
-        segment_compartments, model_path=args.model_path, verbose=False, tiling=tiling, scale=[0.25, 0.25, 0.25]
+        segment_compartments, model_path=model_path, verbose=False, tiling=tiling, scale=[0.25, 0.25, 0.25]
     )
     inference_helper(
         args.input_path, args.output_path, segmentation_function, force=args.force, data_ext=args.data_ext
@@ -26,7 +33,7 @@ def main():
         help="The filepath to directory where the segmentation will be saved."
     )
     parser.add_argument(
-        "--model_path", "-m", required=True, help="The filepath to the compartment model."
+        "--model", "-m", help="The filepath to the compartment model."
     )
     parser.add_argument(
         "--force", action="store_true",

diff --git a/scripts/cooper/run_mitochondria_segmentation.py b/scripts/cooper/run_mitochondria_segmentation.py
@@ -2,13 +2,19 @@
 from functools import partial
 
 from synapse_net.inference.mitochondria import segment_mitochondria
+from synapse_net.inference.inference import get_model_path
 from synapse_net.inference.util import inference_helper, parse_tiling
 
 
 def run_mitochondria_segmentation(args):
+    if args.model is None:
+        model_path = get_model_path("mitochondria")
+    else:
+        model_path = args.model
+
     tiling = parse_tiling(args.tile_shape, args.halo)
     segmentation_function = partial(
-        segment_mitochondria, model_path=args.model_path, verbose=False, tiling=tiling, scale=[0.5, 0.5, 0.5]
+        segment_mitochondria, model_path=model_path, verbose=False, tiling=tiling, scale=[0.5, 0.5, 0.5]
     )
     inference_helper(
         args.input_path, args.output_path, segmentation_function,
@@ -27,7 +33,7 @@ def main():
         help="The filepath to directory where the segmentation will be saved."
     )
     parser.add_argument(
-        "--model_path", "-m", required=True, help="The filepath to the mitochondria model."
+        "--model", "-m", help="The filepath to the mitochondria model."
     )
     parser.add_argument(
         "--force", action="store_true",

diff --git a/scripts/cooper/run_vesicle_segmentation.py b/scripts/cooper/run_vesicle_segmentation.py
@@ -2,13 +2,19 @@
 from functools import partial
 
 from synapse_net.inference.vesicles import segment_vesicles
+from synapse_net.inference.inference import get_model_path
 from synapse_net.inference.util import inference_helper, parse_tiling
 
 
 def run_vesicle_segmentation(args):
+    if args.model is None:
+        model_path = get_model_path("vesicles_3d")
+    else:
+        model_path = args.model
+
     tiling = parse_tiling(args.tile_shape, args.halo)
     segmentation_function = partial(
-        segment_vesicles, model_path=args.model_path, verbose=False, tiling=tiling,
+        segment_vesicles, model_path=model_path, verbose=False, tiling=tiling,
         exclude_boundary=not args.include_boundary
     )
     inference_helper(
@@ -28,7 +34,7 @@ def main():
         help="The filepath to directory where the segmentations will be saved."
     )
     parser.add_argument(
-        "--model_path", "-m", required=True, help="The filepath to the vesicle model."
+        "--model_path", "-m", help="The filepath to the vesicle model."
     )
     parser.add_argument(
         "--mask_path", help="The filepath to a tif file with a mask that will be used to restrict the segmentation."

diff --git a/scripts/prepare_zenodo_uploads.py b/scripts/prepare_zenodo_uploads.py
@@ -56,7 +56,7 @@ def _export_az(train_root, test_tomos, name):
 
     for tomo in tqdm(tomograms):
         fname = os.path.basename(tomo)
-        if tomo in test_tomos:
+        if fname in test_tomos:
             out_path = os.path.join(test_out, fname)
         else:
             out_path = os.path.join(train_out, fname)

diff --git a/synapse_net/__version__.py b/synapse_net/__version__.py
@@ -1 +1 @@
-__version__ = "0.0.1"
+__version__ = "0.1.0"