Merge pull request #70 from computational-cell-analytics/more-inner-e…

…ar-analysis Add active zone inference
computational-cell-analytics · Dec 1, 2024 · ae2a472 · ae2a472
2 parents d723d5c + 53e6c9b
commit ae2a472
Show file tree

Hide file tree

Showing 8 changed files with 474 additions and 12 deletions.
diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py
@@ -200,7 +200,7 @@ def active_zone_train_data():
         "01": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete",  # noqa
         "04": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples",  # noqa
         "06": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm",  # noqa
-        "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/20241021_imig_2014_data_transfer_exported_grouped",  # noqa
+        "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation",  # noqa
     }
 
     test_tomograms = {
@@ -467,11 +467,58 @@ def get_image_sizes_tem_2d():
             print(f["raw"].shape)
 
 
+def mito_train_data():
+    train_root = "/scratch-grete/projects/nim00007/data/mitochondria/cooper/fidi_down_s2"
+    test_tomograms = [
+        "36859_J1_66K_TS_CA3_MF_18_rec_2Kb1dawbp_crop_downscaled.h5",
+        "3.2_downscaled.h5",
+    ]
+    all_tomos = sorted(glob(os.path.join(train_root, "*.h5")))
+
+    tomo_names = []
+    tomo_condition = []
+    tomo_mitos = []
+    tomo_resolution = []
+    tomo_train = []
+
+    for tomo in all_tomos:
+        fname = os.path.basename(tomo)
+        split = "test" if fname in test_tomograms else "train/val"
+        if "36859" in fname or "37371" in fname:  # This is from the STEM dataset.
+            condition = stem
+            resolution = 2 * 0.868
+        else:  # This is from the TEM Single-Axis Dataset
+            condition = single_ax_tem
+            # These were scaled, despite the resolution mismatch
+            resolution = 2 * 1.554
+
+        with h5py.File(tomo, "r") as f:
+            seg = f["labels/mitochondria"][:]
+            n_mitos = len(np.unique(seg)) - 1
+
+        tomo_names.append(tomo)
+        tomo_condition.append(condition)
+        tomo_train.append(split)
+        tomo_resolution.append(resolution)
+        tomo_mitos.append(n_mitos)
+
+    df = pd.DataFrame({
+        "tomogram": tomo_names,
+        "condition": tomo_condition,
+        "resolution": tomo_resolution,
+        "used_for": tomo_train,
+        "mito_count_all": tomo_mitos,
+    })
+
+    os.makedirs("data_summary", exist_ok=True)
+    df.to_excel("./data_summary/mitochondria.xlsx", index=False)
+
+
 def main():
     # active_zone_train_data()
     # compartment_train_data()
-    # mito_train_data()
-    vesicle_train_data()
+    mito_train_data()
+    # vesicle_train_data()
 
     # vesicle_domain_adaptation_data()
     # get_n_images_frog()

diff --git a/scripts/cooper/full_reconstruction/segment_mitochondria.py b/scripts/cooper/full_reconstruction/segment_mitochondria.py
@@ -8,23 +8,53 @@
 ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04_full_reconstruction"  # noqa
 MODEL_PATH = "/scratch-grete/projects/nim00007/models/exports_for_cooper/mito_model_s2.pt"  # noqa
 
+# MODEL_PATH = "/scratch-grete/projects/nim00007/models/luca/mito/source_domain"
+
 
 def run_seg(path):
+
+    out_folder = "./mito_seg"
+    ds, fname = os.path.split(path)
+    ds = os.path.basename(ds)
+
+    os.makedirs(os.path.join(out_folder, ds), exist_ok=True)
+    out_path = os.path.join(out_folder, ds, fname)
+    if os.path.exists(out_path):
+        return
+
     with h5py.File(path, "r") as f:
-        if "labels/mitochondria" in f:
-            return
         raw = f["raw"][:]
 
     scale = (0.5, 0.5, 0.5)
     seg = segment_mitochondria(raw, model_path=MODEL_PATH, scale=scale, verbose=False)
-    with h5py.File(path, "a") as f:
+    with h5py.File(out_path, "a") as f:
+        f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
+
+
+def run_seg_and_pred(path):
+    with h5py.File(path, "r") as f:
+        raw = f["raw"][:]
+
+    scale = (0.5, 0.5, 0.5)
+    seg, pred = segment_mitochondria(
+        raw, model_path=MODEL_PATH, scale=scale, verbose=False, return_predictions=True
+    )
+
+    out_folder = "./mito_pred"
+    os.makedirs(out_folder, exist_ok=True)
+    out_path = os.path.join(out_folder, os.path.basename(path))
+
+    with h5py.File(out_path, "a") as f:
+        f.create_dataset("raw", data=raw[::2, ::2, ::2])
         f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
+        f.create_dataset("pred", data=pred, compression="gzip")
 
 
 def main():
     paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
     for path in tqdm(paths):
         run_seg(path)
+        # run_seg_and_pred(path)
 
 
 main()
diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx
diff --git a/scripts/prepare_zenodo_uploads.py b/scripts/prepare_zenodo_uploads.py
@@ -0,0 +1,246 @@
+import os
+from glob import glob
+from shutil import copyfile
+
+import h5py
+from tqdm import tqdm
+
+OUTPUT_ROOT = "./data_summary/for_zenodo"
+
+
+def _copy_vesicles(tomos, out_folder):
+    label_key = "labels/vesicles/combined_vesicles"
+    os.makedirs(out_folder, exist_ok=True)
+    for tomo in tqdm(tomos, desc="Export tomos"):
+        out_path = os.path.join(out_folder, os.path.basename(tomo))
+        if os.path.exists(out_path):
+            continue
+
+        with h5py.File(tomo, "r") as f:
+            raw = f["raw"][:]
+            labels = f[label_key][:]
+            try:
+                fname = f.attrs["filename"]
+            except KeyError:
+                fname = None
+
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/vesicles", data=labels, compression="gzip")
+            if fname is not None:
+                f.attrs["filename"] = fname
+
+
+def _export_vesicles(train_root, test_root, name):
+    train_tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
+    test_tomograms = sorted(glob(os.path.join(test_root, "*.h5")))
+    print(f"Vesicle data for {name}:")
+    print(len(train_tomograms), len(test_tomograms), len(train_tomograms) + len(test_tomograms))
+
+    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "train", name)
+    _copy_vesicles(train_tomograms, train_out)
+
+    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "test", name)
+    _copy_vesicles(test_tomograms, test_out)
+
+
+def _export_az(train_root, test_tomos, name):
+    tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
+    print(f"AZ data for {name}:")
+
+    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "train", name)
+    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "test", name)
+
+    os.makedirs(train_out, exist_ok=True)
+    os.makedirs(test_out, exist_ok=True)
+
+    for tomo in tqdm(tomograms):
+        fname = os.path.basename(tomo)
+        if tomo in test_tomos:
+            out_path = os.path.join(test_out, fname)
+        else:
+            out_path = os.path.join(train_out, fname)
+        if os.path.exists(out_path):
+            continue
+
+        with h5py.File(tomo, "r") as f:
+            raw = f["raw"][:]
+            az = f["labels/AZ"][:]
+
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/AZ", data=az, compression="gzip")
+
+
+# NOTE: we have very few mito annotations from 01, so we don't include them in here.
+def prepare_single_ax_stem_chemical_fix():
+    # single-axis-tem: vesicles
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete"  # noqa
+    _export_vesicles(train_root, test_root, name="single_axis_tem")
+
+    # single-axis-tem: active zones
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete"  # noqa
+    test_tomos = [
+        "WT_MF_DIV28_01_MS_09204_F1.h5", "WT_MF_DIV14_01_MS_B2_09175_CA3.h5", "M13_CTRL_22723_O2_05_DIV29_5.2.h5", "WT_Unt_SC_09175_D4_05_DIV14_mtk_05.h5",  # noqa
+        "20190805_09002_B4_SC_11_SP.h5", "20190807_23032_D4_SC_01_SP.h5", "M13_DKO_22723_A1_03_DIV29_03_MS.h5", "WT_MF_DIV28_05_MS_09204_F1.h5", "M13_CTRL_09201_S2_06_DIV31_06_MS.h5", # noqa
+        "WT_MF_DIV28_1.2_MS_09002_B1.h5", "WT_Unt_SC_09175_C4_04_DIV15_mtk_04.h5",   "M13_DKO_22723_A4_10_DIV29_10_MS.h5",  "WT_MF_DIV14_3.2_MS_D2_09175_CA3.h5",  # noqa
+           "20190805_09002_B4_SC_10_SP.h5", "M13_CTRL_09201_S2_02_DIV31_02_MS.h5", "WT_MF_DIV14_04_MS_E1_09175_CA3.h5", "WT_MF_DIV28_10_MS_09002_B3.h5",   "WT_Unt_SC_05646_D4_02_DIV16_mtk_02.h5",   "M13_DKO_22723_A4_08_DIV29_08_MS.h5",  "WT_MF_DIV28_04_MS_09204_M1.h5",   "WT_MF_DIV28_03_MS_09204_F1.h5",   "M13_DKO_22723_A1_05_DIV29_05_MS.h5",  # noqa
+        "WT_Unt_SC_09175_C4_06_DIV15_mtk_06.h5",  "WT_MF_DIV28_09_MS_09002_B3.h5", "20190524_09204_F4_SC_07_SP.h5",
+           "WT_MF_DIV14_02_MS_C2_09175_CA3.h5",    "M13_DKO_23037_K1_01_DIV29_01_MS.h5",  "WT_Unt_SC_09175_E2_01_DIV14_mtk_01.h5", "20190807_23032_D4_SC_05_SP.h5",   "WT_MF_DIV14_01_MS_E2_09175_CA3.h5",   "WT_MF_DIV14_03_MS_B2_09175_CA3.h5",   "M13_DKO_09201_O1_01_DIV31_01_MS.h5",  "M13_DKO_09201_U1_04_DIV31_04_MS.h5",  # noqa
+        "WT_MF_DIV14_04_MS_E2_09175_CA3_2.h5",   "WT_Unt_SC_09175_D5_01_DIV14_mtk_01.h5",
+        "M13_CTRL_22723_O2_05_DIV29_05_MS_.h5",  "WT_MF_DIV14_02_MS_B2_09175_CA3.h5", "WT_MF_DIV14_01.2_MS_D1_09175_CA3.h5",  # noqa
+    ]
+    _export_az(train_root, test_tomos, name="single_axis_tem")
+
+    # chemical_fixation: vesicles
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation"  # noqa
+    _export_vesicles(train_root, test_root, name="chemical_fixation")
+
+    # chemical-fixation: active zones
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation"  # noqa
+    test_tomos = ["20180305_09_MS.h5", "20180305_04_MS.h5", "20180305_08_MS.h5",
+                  "20171113_04_MS.h5", "20171006_05_MS.h5", "20180305_01_MS.h5"]
+    _export_az(train_root, test_tomos, name="chemical_fixation")
+
+
+def prepare_ier():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser/other_tomograms"
+    sets = {
+        "01_vesicle_pools": "vesicle_pools",
+        "02_tether": "tether",
+        "03_ratten_tomos": "rat",
+    }
+
+    output_folder = os.path.join(OUTPUT_ROOT, "IER")
+    label_names = {
+        "ribbons": "ribbon",
+        "membrane": "membrane",
+        "presynapse": "PD",
+        "postsynapse": "PSD",
+        "vesicles": "vesicles",
+    }
+
+    for name, output_name in sets.items():
+        out_set = os.path.join(output_folder, output_name)
+        os.makedirs(out_set, exist_ok=True)
+        tomos = sorted(glob(os.path.join(root, name, "*.h5")))
+
+        print("Export", output_name)
+        for tomo in tqdm(tomos):
+            with h5py.File(tomo, "r") as f:
+                try:
+                    fname = os.path.split(f.attrs["filename"])[1][:-4]
+                except KeyError:
+                    fname = f.attrs["path"][1]
+                    fname = "_".join(fname.split("/")[-2:])
+
+                out_path = os.path.join(out_set, os.path.basename(tomo))
+                if os.path.exists(out_path):
+                    continue
+
+                raw = f["raw"][:]
+                labels = {}
+                for label_name, out_name in label_names.items():
+                    key = f"labels/{label_name}"
+                    if key not in f:
+                        continue
+                    labels[out_name] = f[key][:]
+
+            with h5py.File(out_path, "a") as f:
+                f.attrs["filename"] = fname
+                f.create_dataset("raw", data=raw, compression="gzip")
+                for label_name, seg in labels.items():
+                    f.create_dataset(f"labels/{label_name}", data=seg, compression="gzip")
+
+
+def prepare_frog():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted"
+    train_tomograms = [
+        "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
+        "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
+        "block30UB_one.h5", "block10U3A_five.h5",
+    ]
+    test_tomograms = ["block10U3A_four.h5", "block30UB_five.h5"]
+
+    output_folder = os.path.join(OUTPUT_ROOT, "frog")
+    output_train = os.path.join(output_folder, "train_unlabeled")
+    os.makedirs(output_train, exist_ok=True)
+
+    for name in train_tomograms:
+        path = os.path.join(root, name)
+        out_path = os.path.join(output_train, name)
+        if os.path.exists(out_path):
+            continue
+        copyfile(path, out_path)
+
+    output_test = os.path.join(output_folder, "test")
+    os.makedirs(output_test, exist_ok=True)
+    for name in test_tomograms:
+        path = os.path.join(root, name)
+        out_path = os.path.join(output_test, name)
+        if os.path.exists(out_path):
+            continue
+        copyfile(path, out_path)
+
+
+def prepare_2d_tem():
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicle_gt_2d/maus_2020_tem2d"  # noqa
+    train_images = [
+        "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
+        "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
+        "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
+    ]
+    test_images = [
+        "MF_05649_G-09175-C_04B.h5", "MF_05646_C-09175-B_000B.h5",
+        "MF_05649_G-09175-C_03.h5", "MF_05649_G-09175-C_02.h5"
+    ]
+    print(len(train_images) + len(test_images))
+
+    output_folder = os.path.join(OUTPUT_ROOT, "2d_tem")
+
+    output_train = os.path.join(output_folder, "train_unlabeled")
+    os.makedirs(output_train, exist_ok=True)
+    for name in tqdm(train_images, desc="Export train images"):
+        out_path = os.path.join(output_train, name)
+        if os.path.exists(out_path):
+            continue
+        in_path = os.path.join(train_root, name)
+        with h5py.File(in_path, "r") as f:
+            raw = f["raw"][:]
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+
+    output_test = os.path.join(output_folder, "test")
+    os.makedirs(output_test, exist_ok=True)
+    for name in tqdm(test_images, desc="Export test images"):
+        out_path = os.path.join(output_test, name)
+        if os.path.exists(out_path):
+            continue
+        in_path = os.path.join(test_root, name)
+        with h5py.File(in_path, "r") as f:
+            raw = f["data"][:]
+            labels = f["labels/vesicles"][:]
+            mask = f["labels/mask"][:]
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/vesicles", data=labels, compression="gzip")
+            f.create_dataset("labels/mask", data=mask, compression="gzip")
+
+
+def prepare_munc_snap():
+    pass
+
+
+def main():
+    prepare_single_ax_stem_chemical_fix()
+    # prepare_2d_tem()
+    # prepare_frog()
+    # prepare_ier()
+    # prepare_munc_snap()
+
+
+if __name__ == "__main__":
+    main()