Skip to content

Commit

Permalink
Merge pull request #79 from computational-cell-analytics/first-release
Browse files Browse the repository at this point in the history
Prepare first release
  • Loading branch information
constantinpape authored Dec 10, 2024
2 parents 458ec90 + 03d42a8 commit 1810805
Show file tree
Hide file tree
Showing 20 changed files with 682 additions and 367 deletions.
5 changes: 5 additions & 0 deletions examples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
data/
set_up_pool.py
*.h5
*.tif
*.mrc
161 changes: 161 additions & 0 deletions examples/analysis_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import napari
import pandas as pd
import numpy as np

from scipy.ndimage import binary_closing
from skimage.measure import regionprops
from skimage.segmentation import find_boundaries

from synapse_net.distance_measurements import measure_segmentation_to_object_distances
from synapse_net.file_utils import read_mrc
from synapse_net.imod.to_imod import convert_segmentation_to_spheres
from synapse_net.inference import compute_scale_from_voxel_size, get_model, run_segmentation
from synapse_net.sample_data import get_sample_data


def segment_structures(tomogram, voxel_size):
# Segment the synaptic vesicles. The data will automatically be resized
# to match the average voxel size of the training data.
model_name = "vesicles_3d" # This is the name for the vesicle model for EM tomography.
model = get_model(model_name) # Load the corresponding model.
# Compute the scale to match the tomogram voxel size to the training data.
scale = compute_scale_from_voxel_size(voxel_size, model_name)
vesicles = run_segmentation(tomogram, model, model_name, scale=scale)

# Segment the active zone.
model_name = "active_zone"
model = get_model(model_name)
scale = compute_scale_from_voxel_size(voxel_size, model_name)
active_zone = run_segmentation(tomogram, model, model_name, scale=scale)

# Segment the synaptic compartments.
model_name = "compartments"
model = get_model(model_name)
scale = compute_scale_from_voxel_size(voxel_size, model_name)
compartments = run_segmentation(tomogram, model, model_name, scale=scale)

return {"vesicles": vesicles, "active_zone": active_zone, "compartments": compartments}


def n_vesicles(mask, ves):
return len(np.unique(ves[mask])) - 1


def postprocess_segmentation(segmentations):
# We find the compartment corresponding to the presynaptic terminal
# by selecting the compartment with most vesicles. We filter out all
# vesicles that do not overlap with this compartment.

vesicles, compartments = segmentations["vesicles"], segmentations["compartments"]

# First, we find the compartment with most vesicles.
props = regionprops(compartments, intensity_image=vesicles, extra_properties=[n_vesicles])
compartment_ids = [prop.label for prop in props]
vesicle_counts = [prop.n_vesicles for prop in props]
compartments = (compartments == compartment_ids[np.argmax(vesicle_counts)]).astype("uint8")

# Filter all vesicles that are not in the compartment.
props = regionprops(vesicles, compartments)
filter_ids = [prop.label for prop in props if prop.max_intensity == 0]
vesicles[np.isin(vesicles, filter_ids)] = 0

segmentations["vesicles"], segmentations["compartments"] = vesicles, compartments

# We also apply closing to the active zone segmentation to avoid gaps and then
# intersect it with the boundary of the presynaptic compartment.
active_zone = segmentations["active_zone"]
active_zone = binary_closing(active_zone, iterations=4)
boundary = find_boundaries(compartments)
active_zone = np.logical_and(active_zone, boundary).astype("uint8")
segmentations["active_zone"] = active_zone

return segmentations


def measure_distances(segmentations, voxel_size):
# Here, we measure the distances from each vesicle to the active zone.
# We use the function 'measure_segmentation_to_object_distances' for this,
# which uses an euclidean distance transform scaled with the voxel size
# to determine distances.
vesicles, active_zone = segmentations["vesicles"], segmentations["active_zone"]
voxel_size = tuple(voxel_size[ax] for ax in "zyx")
distances, _, _, vesicle_ids = measure_segmentation_to_object_distances(
vesicles, active_zone, resolution=voxel_size
)
# We convert the result to a pandas data frame.
return pd.DataFrame({"vesicle_id": vesicle_ids, "distance": distances})


def assign_vesicle_pools(vesicle_attributes):
# We assign the vesicles to their respective pool, 'docked' and 'non-attached',
# based on the criterion of being within 2 nm from the active zone.
# We add the pool assignment as a new column to the dataframe with vesicle attributes.
docked_vesicle_distance = 2 # nm
vesicle_attributes["pool"] = vesicle_attributes["distance"].apply(
lambda x: "docked" if x < docked_vesicle_distance else "non-attached"
)
return vesicle_attributes


def visualize_results(tomogram, segmentations, vesicle_attributes):
# Here, we visualize the segmentation and pool assignment result in napari.

# Create a segmentation to visualize the vesicle pools.
docked_ids = vesicle_attributes[vesicle_attributes.pool == "docked"].vesicle_id
non_attached_ids = vesicle_attributes[vesicle_attributes.pool == "non-attached"].vesicle_id
vesicles = segmentations["vesicles"]
vesicle_pools = np.isin(vesicles, docked_ids).astype("uint8")
vesicle_pools[np.isin(vesicles, non_attached_ids)] = 2

# Create a napari viewer, add the tomogram data and the segmentation results.
viewer = napari.Viewer()
viewer.add_image(tomogram)
for name, segmentation in segmentations.items():
viewer.add_labels(segmentation, name=name)
viewer.add_labels(vesicle_pools)
napari.run()


def save_analysis(segmentations, vesicle_attributes, save_path):
# Here, we compute the radii and centroid positions of the vesicles,
# add them to the vesicle attributes and then save all vesicle attributes to
# an excel table. You can use this table for evaluation of the analysis.
vesicles = segmentations["vesicles"]
coordinates, radii = convert_segmentation_to_spheres(vesicles, radius_factor=0.7)
vesicle_attributes["radius"] = radii
for ax_id, ax_name in enumerate("zyx"):
vesicle_attributes[f"center-{ax_name}"] = coordinates[:, ax_id]
vesicle_attributes.to_excel(save_path, index=False)


def main():
"""This script implements an example analysis pipeline with SynapseNet and applies it to a tomogram.
Here, we analyze docked and non-attached vesicles in a sample tomogram."""

# Load the tomogram for our sample data.
mrc_path = get_sample_data("tem_tomo")
tomogram, voxel_size = read_mrc(mrc_path)

# Segment synaptic vesicles, the active zone, and the synaptic compartment.
segmentations = segment_structures(tomogram, voxel_size)

# Post-process the segmentations, to find the presynaptic terminal,
# filter out vesicles not in the terminal, and to 'snape' the AZ to the presynaptic boundary.
segmentations = postprocess_segmentation(segmentations)

# Measure the distances between the AZ and vesicles.
vesicle_attributes = measure_distances(segmentations, voxel_size)

# Assign the vesicle pools, 'docked' and 'non-attached' vesicles, based on the distances.
vesicle_attributes = assign_vesicle_pools(vesicle_attributes)

# Visualize the results.
visualize_results(tomogram, segmentations, vesicle_attributes)

# Compute the vesicle radii and combine and save all measurements.
save_path = "analysis_results.xlsx"
save_analysis(segmentations, vesicle_attributes, save_path)


if __name__ == "__main__":
main()
36 changes: 21 additions & 15 deletions examples/domain_adaptation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,41 @@
a different electron tomogram with different specimen and sample preparation.
You don't need any annotations in the new domain to run this script.
You can download example data for this script from:
- Adaptation to 2d TEM data: TODO zenodo link
- Adaptation to different tomography data: TODO zenodo link
We use data from the SynapseNet publication for this example:
- Adaptation to 2d TEM data: https://doi.org/10.5281/zenodo.14236381
- Adaptation to different tomography data (3d data): https://doi.org/10.5281/zenodo.14232606
It is of course possible to adapt it to your own data.
"""

import os
from glob import glob

from sklearn.model_selection import train_test_split
from synapse_net.inference.inference import get_model_path
from synapse_net.sample_data import download_data_from_zenodo
from synapse_net.training import mean_teacher_adaptation
from synapse_net.tools.util import get_model_path


def main():
# Choose whether to adapt the model to 2D or to 3D data.
train_2d_model = True

# TODO adjust to zenodo downloads
# These are the data folders for the example data downloaded from zenodo.
# Update these paths to apply the script to your own data.
# Check out the example data to see the data format for training.
data_root_folder_2d = "./data/2d_tem/train_unlabeled"
data_root_folder_3d = "./data/..."
train_2d_model = False

# Choose the correct data folder depending on 2d/3d training.
data_root_folder = data_root_folder_2d if train_2d_model else data_root_folder_3d
# Download the training data from zenodo.
# You have to replace this if you want to train on your own data.
# The training data should be stored in an hdf5 file per tomogram,
# with tomgoram data stored in the internal dataset 'raw'.
if train_2d_model:
data_root = "./data/2d_tem"
download_data_from_zenodo(data_root, "2d_tem")
train_root_folder = os.path.join(data_root, "train_unlabeled")
else:
data_root = "./data/inner_ear_ribbon_synapse"
download_data_from_zenodo(data_root, "inner_ear_ribbon_synapse")
train_root_folder = data_root

# Get all files with ending .h5 in the training folder.
files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))

# Crate a train / val split.
train_ratio = 0.85
Expand Down
22 changes: 14 additions & 8 deletions examples/network_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,36 @@
to adapt an already trained network to your data without the need for
additional annotations then check out `domain_adaptation.py`.
You can download example data for this script from:
TODO zenodo link to Single-Ax / Chemical Fix data.
We will use the data from our manuscript here:
https://doi.org/10.5281/zenodo.14330011
You can also use your own data, if you prepare it in the same format.
"""
import os
from glob import glob

from sklearn.model_selection import train_test_split
from synapse_net.sample_data import download_data_from_zenodo
from synapse_net.training import supervised_training


def main():
# This is the folder that contains your training data.
# The example was designed so that it runs for the sample data downloaded to './data'.
# If you want to train on your own data than change this filepath accordingly.
# TODO update to match zenodo download
data_root_folder = "./data/vesicles/train"
# Download the training data from zenodo.
# You have to replace this if you want to train on your own data.
# The training data should be stored in an hdf5 file per tomogram,
# with tomgoram data stored in the internal dataset 'raw'
# and the vesicle annotations stored in the internal dataset 'labels/vesicles'.
data_root = "./data/training_data"
download_data_from_zenodo(data_root, "training_data")
train_root_folder = os.path.join(data_root, "vesicles/train")

# The training data should be saved as .h5 files, with:
# an internal dataset called 'raw' that contains the image data
# and another dataset that contains the training annotations.
label_key = "labels/vesicles"

# Get all files with the ending .h5 in the training folder.
files = sorted(glob(os.path.join(data_root_folder, "**", "*.h5"), recursive=True))
files = sorted(glob(os.path.join(train_root_folder, "**", "*.h5"), recursive=True))

# Crate a train / val split.
train_ratio = 0.85
Expand Down
4 changes: 2 additions & 2 deletions scripts/cooper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ $ micromamba activate sam
The segmentation scripts (`run_..._segmentation.py`) all work similarly and can either run segmentation for a single mrc file or for all mrcs in a folder structure.
For example, you can run vesicle segmentation like this:
```
$ python run_vesicle_segmentation.py -i /path/to/input_folder -o /path/to/output_folder -m /path/to/vesicle_model.pt
$ python run_vesicle_segmentation.py -i /path/to/input_folder -o /path/to/output_folder
```
The filepath after `-i` specifices the location of the folder with the mrcs to be segmented, the segmentation results will be stored (as tifs) in the folder following `-o` and `-m` is used to specify the path to the segmentation model.
The filepath after `-i` specifices the location of the folder with the mrcs to be segmented and the segmentation results will be stored (as tifs) in the folder following `-o`.
To segment vesicles with an additional mask, you can use the `--mask_path` option.

The segmentation scripts accept additional parameters, e.g. `--force` to overwrite existing segmentations in the output folder (by default these are skipped to avoid unnecessary computation) and `--tile_shape <TILE_Z> <TILE_Y> <TILE_X>` to specify a different tile shape (which may be necessary to avoid running out of GPU memory).
Expand Down
11 changes: 9 additions & 2 deletions scripts/cooper/run_compartment_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,20 @@
from functools import partial

from synapse_net.inference.compartments import segment_compartments
from synapse_net.inference.inference import get_model_path
from synapse_net.inference.util import inference_helper, parse_tiling


def run_compartment_segmentation(args):
tiling = parse_tiling(args.tile_shape, args.halo)

if args.model is None:
model_path = get_model_path("compartments")
else:
model_path = args.model

segmentation_function = partial(
segment_compartments, model_path=args.model_path, verbose=False, tiling=tiling, scale=[0.25, 0.25, 0.25]
segment_compartments, model_path=model_path, verbose=False, tiling=tiling, scale=[0.25, 0.25, 0.25]
)
inference_helper(
args.input_path, args.output_path, segmentation_function, force=args.force, data_ext=args.data_ext
Expand All @@ -26,7 +33,7 @@ def main():
help="The filepath to directory where the segmentation will be saved."
)
parser.add_argument(
"--model_path", "-m", required=True, help="The filepath to the compartment model."
"--model", "-m", help="The filepath to the compartment model."
)
parser.add_argument(
"--force", action="store_true",
Expand Down
10 changes: 8 additions & 2 deletions scripts/cooper/run_mitochondria_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
from functools import partial

from synapse_net.inference.mitochondria import segment_mitochondria
from synapse_net.inference.inference import get_model_path
from synapse_net.inference.util import inference_helper, parse_tiling


def run_mitochondria_segmentation(args):
if args.model is None:
model_path = get_model_path("mitochondria")
else:
model_path = args.model

tiling = parse_tiling(args.tile_shape, args.halo)
segmentation_function = partial(
segment_mitochondria, model_path=args.model_path, verbose=False, tiling=tiling, scale=[0.5, 0.5, 0.5]
segment_mitochondria, model_path=model_path, verbose=False, tiling=tiling, scale=[0.5, 0.5, 0.5]
)
inference_helper(
args.input_path, args.output_path, segmentation_function,
Expand All @@ -27,7 +33,7 @@ def main():
help="The filepath to directory where the segmentation will be saved."
)
parser.add_argument(
"--model_path", "-m", required=True, help="The filepath to the mitochondria model."
"--model", "-m", help="The filepath to the mitochondria model."
)
parser.add_argument(
"--force", action="store_true",
Expand Down
10 changes: 8 additions & 2 deletions scripts/cooper/run_vesicle_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
from functools import partial

from synapse_net.inference.vesicles import segment_vesicles
from synapse_net.inference.inference import get_model_path
from synapse_net.inference.util import inference_helper, parse_tiling


def run_vesicle_segmentation(args):
if args.model is None:
model_path = get_model_path("vesicles_3d")
else:
model_path = args.model

tiling = parse_tiling(args.tile_shape, args.halo)
segmentation_function = partial(
segment_vesicles, model_path=args.model_path, verbose=False, tiling=tiling,
segment_vesicles, model_path=model_path, verbose=False, tiling=tiling,
exclude_boundary=not args.include_boundary
)
inference_helper(
Expand All @@ -28,7 +34,7 @@ def main():
help="The filepath to directory where the segmentations will be saved."
)
parser.add_argument(
"--model_path", "-m", required=True, help="The filepath to the vesicle model."
"--model_path", "-m", help="The filepath to the vesicle model."
)
parser.add_argument(
"--mask_path", help="The filepath to a tif file with a mask that will be used to restrict the segmentation."
Expand Down
2 changes: 1 addition & 1 deletion scripts/prepare_zenodo_uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def _export_az(train_root, test_tomos, name):

for tomo in tqdm(tomograms):
fname = os.path.basename(tomo)
if tomo in test_tomos:
if fname in test_tomos:
out_path = os.path.join(test_out, fname)
else:
out_path = os.path.join(train_out, fname)
Expand Down
2 changes: 1 addition & 1 deletion synapse_net/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.1"
__version__ = "0.1.0"
Loading

0 comments on commit 1810805

Please sign in to comment.