Merge pull request #74 from computational-cell-analytics/software-upd…

…ates Software updates
computational-cell-analytics · Dec 5, 2024 · afeb8e7 · afeb8e7
2 parents 8e8c25d + 333b24a
commit afeb8e7
Show file tree

Hide file tree

Showing 12 changed files with 344 additions and 59 deletions.
diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
@@ -0,0 +1,40 @@
+name: test
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - "v*" # Push events to matching v*, i.e. v1.0, v20.15.10
+  pull_request:  # run CI on commits to any open PR
+  workflow_dispatch:  # can manually trigger CI from GitHub actions tab
+
+
+jobs:
+  test:
+    name: ${{ matrix.os }} ${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.11"]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup micromamba
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: environment_cpu.yaml
+          create-args: >-
+            python=${{ matrix.python-version }}
+
+      - name: Install SynapseNet
+        shell: bash -l {0}
+        run: pip install --no-deps -e .
+
+      - name: Run tests
+        shell: bash -l {0}
+        run: python -m unittest discover -s test -v
diff --git a/doc/start_page.md b/doc/start_page.md
@@ -14,24 +14,53 @@ especially throught the [domain adaptation](domain-adaptation) functionality.
 SynapseNet offers a [napari plugin](napari-plugin), [command line interface](command-line-interface), and [python library](python-library).
 Please cite our [bioRxiv preprint](TODO) if you use it in your research.
 
-**The rest of the documentation will be updated in the next days!**
 
 ## Requirements & Installation
 
-- Requirements: Tested on Linux but should work on Mac/Windows.
-    - GPU needed to use 3d segmentation networks
-- Installation via conda and local pip install
-    - GPU support
+SynapseNet was developed and tested on Linux. It should be possible to install and use it on Mac or Windows, but we have not tested this.
+Furthermore, SynapseNet requires a GPU for segmentation of 3D volumes.
+
+You need a [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html) installation. Follow the instruction at the respective links if you have installed neither. We assume you have `conda` for the rest of the instructions. After installing it, you can use the `conda` command.
+
+To install it you should follow these steps:
+- First, download the SynapseNet repository via
+```bash
+git clone https://github.com/computational-cell-analytics/synapse-net
+```
+- Then, enter the `synapse-net` folder:
+```bash
+cd synapse-net
+```
+- Now you can install the environment for SynapseNet with `conda` from the environment file we proved:
+```bash
+conda env create -f environment.yaml
+```
+- You will need to confirm this step. It will take a while. Afterwards you can activate the environment:
+```bash
+conda activate synapse-net
+```
+- Finally, install SynapseNet itself into the environment:
+```bash
+pip install -e .
+```
+
+Now you can use all SynapseNet features. From now on, just activate the environment via
+```
+conda activate synapse-net
+```
+to use them.
+
+> Note: If you use `mamba` instead of conda just replace `conda` in the commands above with `mamba`.
+
+> Note: We also provide an environment for a CPU version of SynapseNet. You can install it by replacing `environment.yaml` with `environment_cpu.yaml` in the respective command above. This version can be used for 2D vesicle segmentation, but it does not work for 3D segmentation.
+
+> Note: If you have issues with the CUDA version then install a PyTorch that matches your nvidia drivers. See [pytorch.org](https://pytorch.org/) for details.
 
-- Make sure conda or mamba is installed.
-    - If you don't have a conda installation yet we recommend [micromamba](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html)
-- Create the environment with all required dependencies: `mamba env create -f environment.yaml`
-- Activate the environment: `mamba activate synaptic-reconstruction`
-- Install the package: `pip install -e .`
 
 ## Napari Plugin
 
-lorem ipsum
+**The rest of the documentation will be updated in the next days!**
+
 
 ## Command Line Functionality
 
@@ -40,6 +69,7 @@ lorem ipsum
     - vesicles / spheres
     - objects
 
+
 ## Python Library
 
 - segmentation functions

diff --git a/environment.yaml b/environment.yaml
@@ -1,17 +1,22 @@
 channels:
+    - pytorch
+    - nvidia
     - conda-forge
 name:
-    synaptic-reconstruction
+    synapse-net
 dependencies:
-    - python-elf
+    - bioimageio.core
+    - kornia
+    - magicgui
     - napari
     - pip
     - pyqt
-    - magicgui
+    - python-elf
     - pytorch
-    - bioimageio.core
-    - kornia
+    - pytorch-cuda=12.4
     - tensorboard
+    - torch_em
+    - torchvision
     - trimesh
     - pip:
         - napari-skimage-regionprops
diff --git a/environment_cpu.yaml b/environment_cpu.yaml
@@ -0,0 +1,18 @@
+channels:
+    - conda-forge
+name:
+    synapse-net
+dependencies:
+    - bioimageio.core
+    - kornia
+    - magicgui
+    - napari
+    - pip
+    - pyqt
+    - python-elf
+    - pytorch
+    - tensorboard
+    - torch_em
+    - trimesh
+    - pip:
+        - napari-skimage-regionprops
diff --git a/plot_distances.sh b/plot_distances.sh
diff --git a/synaptic_reconstruction/inference/util.py b/synaptic_reconstruction/inference/util.py
@@ -11,6 +11,7 @@
 
 import imageio.v3 as imageio
 import elf.parallel as parallel
+import mrcfile
 import numpy as np
 import torch
 import torch_em
@@ -131,7 +132,7 @@ def get_prediction(
             # torch_em expects the root folder of a checkpoint path instead of the checkpoint itself.
             if model_path.endswith("best.pt"):
                 model_path = os.path.split(model_path)[0]
-        print(f"tiling {tiling}")
+        # print(f"tiling {tiling}")
         # Create updated_tiling with the same structure
         updated_tiling = {
             "tile": {},
@@ -140,7 +141,7 @@ def get_prediction(
         # Update tile dimensions
         for dim in tiling["tile"]:
             updated_tiling["tile"][dim] = tiling["tile"][dim] - 2 * tiling["halo"][dim]
-        print(f"updated_tiling {updated_tiling}")
+        # print(f"updated_tiling {updated_tiling}")
         pred = get_prediction_torch_em(
             input_volume, updated_tiling, model_path, model, verbose, with_channels, mask=mask
         )
@@ -252,6 +253,33 @@ def _load_input(img_path, extra_files, i):
     return input_volume
 
 
+def _derive_scale(img_path, model_resolution):
+    try:
+        with mrcfile.open(img_path, "r") as f:
+            voxel_size = f.voxel_size
+            if len(model_resolution) == 2:
+                voxel_size = [voxel_size.y, voxel_size.x]
+            else:
+                voxel_size = [voxel_size.z, voxel_size.y, voxel_size.x]
+
+        assert len(voxel_size) == len(model_resolution)
+        # The voxel size is given in Angstrom and we need to translate it to nanometer.
+        voxel_size = [vsize / 10 for vsize in voxel_size]
+
+        # Compute the correct scale factor.
+        scale = tuple(vsize / res for vsize, res in zip(voxel_size, model_resolution))
+        print("Rescaling the data at", img_path, "by", scale, "to match the training voxel size", model_resolution)
+
+    except Exception:
+        warnings.warn(
+            f"The voxel size could not be read from the data for {img_path}. "
+            "This data will not be scaled for prediction."
+        )
+        scale = None
+
+    return scale
+
+
 def inference_helper(
     input_path: str,
     output_root: str,
@@ -263,6 +291,8 @@ def inference_helper(
     mask_input_ext: str = ".tif",
     force: bool = False,
     output_key: Optional[str] = None,
+    model_resolution: Optional[Tuple[float, float, float]] = None,
+    scale: Optional[Tuple[float, float, float]] = None,
 ) -> None:
     """Helper function to run segmentation for mrc files.
 
@@ -282,7 +312,13 @@ def inference_helper(
         mask_input_ext: File extension for the mask inputs (by default .tif).
         force: Whether to rerun segmentation for output files that are already present.
         output_key: Output key for the prediction. If none will write an hdf5 file.
+        model_resolution: The resolution / voxel size to which the inputs should be scaled for prediction.
+            If given, the scaling factor will automatically be determined based on the voxel_size of the input data.
+        scale: Fixed factor for scaling the model inputs. Cannot be passed together with 'model_resolution'.
     """
+    if (scale is not None) and (model_resolution is not None):
+        raise ValueError("You must not provide both 'scale' and 'model_resolution' arguments.")
+
     # Get the input files. If input_path is a folder then this will load all
     # the mrc files beneath it. Otherwise we assume this is an mrc file already
     # and just return the path to this mrc file.
@@ -333,8 +369,18 @@ def inference_helper(
         # Load the mask (if given).
         mask = None if mask_files is None else imageio.imread(mask_files[i])
 
+        # Determine the scale factor:
+        # If the neither the 'scale' nor 'model_resolution' arguments were passed then set it to None.
+        if scale is None and model_resolution is None:
+            this_scale = None
+        elif scale is not None:   # If 'scale' was passed then use it.
+            this_scale = scale
+        else:   # Otherwise 'model_resolution' was passed, use it to derive the scaling from the data
+            assert model_resolution is not None
+            this_scale = _derive_scale(img_path, model_resolution)
+
         # Run the segmentation.
-        segmentation = segmentation_function(input_volume, mask=mask)
+        segmentation = segmentation_function(input_volume, mask=mask, scale=this_scale)
 
         # Write the result to tif or h5.
         os.makedirs(os.path.split(output_path)[0], exist_ok=True)
@@ -348,15 +394,21 @@ def inference_helper(
         print(f"Saved segmentation to {output_path}.")
 
 
-def get_default_tiling() -> Dict[str, Dict[str, int]]:
+def get_default_tiling(is_2d: bool = False) -> Dict[str, Dict[str, int]]:
     """Determine the tile shape and halo depending on the available VRAM.
 
+    Args:
+        is_2d: Whether to return tiling settings for 2d inference.
+
     Returns:
         The default tiling settings for the available computational resources.
     """
-    if torch.cuda.is_available():
-        print("Determining suitable tiling")
+    if is_2d:
+        tile = {"x": 768, "y": 768, "z": 1}
+        halo = {"x": 128, "y": 128, "z": 0}
+        return {"tile": tile, "halo": halo}
 
+    if torch.cuda.is_available():
         # We always use the same default halo.
         halo = {"x": 64, "y": 64, "z": 16}  # before 64,64,8
 
@@ -390,19 +442,21 @@ def get_default_tiling() -> Dict[str, Dict[str, int]]:
 
 def parse_tiling(
     tile_shape: Tuple[int, int, int],
-    halo: Tuple[int, int, int]
+    halo: Tuple[int, int, int],
+    is_2d: bool = False,
 ) -> Dict[str, Dict[str, int]]:
     """Helper function to parse tiling parameter input from the command line.
 
     Args:
         tile_shape: The tile shape. If None the default tile shape is used.
         halo: The halo. If None the default halo is used.
+        is_2d: Whether to return tiling for a 2d model.
 
     Returns:
         The tiling specification.
     """
 
-    default_tiling = get_default_tiling()
+    default_tiling = get_default_tiling(is_2d=is_2d)
 
     if tile_shape is None:
         tile_shape = default_tiling["tile"]

diff --git a/synaptic_reconstruction/sample_data.py b/synaptic_reconstruction/sample_data.py
@@ -0,0 +1,34 @@
+import os
+import pooch
+
+
+def get_sample_data(name: str) -> str:
+    """Get the filepath to SynapseNet sample data, stored as mrc file.
+
+    Args:
+        name: The name of the sample data. Currently, we only provide the 'tem_2d' sample data.
+
+    Returns:
+        The filepath to the downloaded sample data.
+    """
+    registry = {
+        "tem_2d.mrc": "3c6f9ff6d7673d9bf2fd46c09750c3c7dbb8fa1aa59dcdb3363b65cc774dcf28",
+    }
+    urls = {
+        "tem_2d.mrc": "https://owncloud.gwdg.de/index.php/s/5sAQ0U4puAspcHg/download",
+    }
+    key = f"{name}.mrc"
+
+    if key not in registry:
+        valid_names = [k[:-4] for k in registry.keys()]
+        raise ValueError(f"Invalid sample name {name}, please choose one of {valid_names}.")
+
+    cache_dir = os.path.expanduser(pooch.os_cache("synapse-net"))
+    data_registry = pooch.create(
+        path=os.path.join(cache_dir, "sample_data"),
+        base_url="",
+        registry=registry,
+        urls=urls,
+    )
+    file_path = data_registry.fetch(key)
+    return file_path