From 53e6c9b135f24e40cdbeeb2dcf43582b3d9cf9b8 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@informatik.uni-goettingen.de>
Date: Sun, 1 Dec 2024 13:14:48 +0100
Subject: [PATCH] Add active zone inference

---
 scripts/aggregate_data_information.py         |  53 +++-
 .../segment_mitochondria.py                   |  36 ++-
 .../data_summary/vesicle_training_data.xlsx   | Bin 14745 -> 4941 bytes
 scripts/prepare_zenodo_uploads.py             | 246 ++++++++++++++++++
 .../inference/active_zone.py                  | 122 +++++++++
 synaptic_reconstruction/inference/util.py     |   9 +-
 synaptic_reconstruction/tools/cli.py          |  12 +-
 synaptic_reconstruction/tools/util.py         |   8 +-
 8 files changed, 474 insertions(+), 12 deletions(-)
 create mode 100644 scripts/prepare_zenodo_uploads.py
 create mode 100644 synaptic_reconstruction/inference/active_zone.py

diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py
index 7086b23..6ce446a 100644
--- a/scripts/aggregate_data_information.py
+++ b/scripts/aggregate_data_information.py
@@ -200,7 +200,7 @@ def active_zone_train_data():
         "01": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete",  # noqa
         "04": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples",  # noqa
         "06": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm",  # noqa
-        "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/20241021_imig_2014_data_transfer_exported_grouped",  # noqa
+        "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation",  # noqa
     }
 
     test_tomograms = {
@@ -467,11 +467,58 @@ def get_image_sizes_tem_2d():
             print(f["raw"].shape)
 
 
+def mito_train_data():
+    train_root = "/scratch-grete/projects/nim00007/data/mitochondria/cooper/fidi_down_s2"
+    test_tomograms = [
+        "36859_J1_66K_TS_CA3_MF_18_rec_2Kb1dawbp_crop_downscaled.h5",
+        "3.2_downscaled.h5",
+    ]
+    all_tomos = sorted(glob(os.path.join(train_root, "*.h5")))
+
+    tomo_names = []
+    tomo_condition = []
+    tomo_mitos = []
+    tomo_resolution = []
+    tomo_train = []
+
+    for tomo in all_tomos:
+        fname = os.path.basename(tomo)
+        split = "test" if fname in test_tomograms else "train/val"
+        if "36859" in fname or "37371" in fname:  # This is from the STEM dataset.
+            condition = stem
+            resolution = 2 * 0.868
+        else:  # This is from the TEM Single-Axis Dataset
+            condition = single_ax_tem
+            # These were scaled, despite the resolution mismatch
+            resolution = 2 * 1.554
+
+        with h5py.File(tomo, "r") as f:
+            seg = f["labels/mitochondria"][:]
+            n_mitos = len(np.unique(seg)) - 1
+
+        tomo_names.append(tomo)
+        tomo_condition.append(condition)
+        tomo_train.append(split)
+        tomo_resolution.append(resolution)
+        tomo_mitos.append(n_mitos)
+
+    df = pd.DataFrame({
+        "tomogram": tomo_names,
+        "condition": tomo_condition,
+        "resolution": tomo_resolution,
+        "used_for": tomo_train,
+        "mito_count_all": tomo_mitos,
+    })
+
+    os.makedirs("data_summary", exist_ok=True)
+    df.to_excel("./data_summary/mitochondria.xlsx", index=False)
+
+
 def main():
     # active_zone_train_data()
     # compartment_train_data()
-    # mito_train_data()
-    vesicle_train_data()
+    mito_train_data()
+    # vesicle_train_data()
 
     # vesicle_domain_adaptation_data()
     # get_n_images_frog()
diff --git a/scripts/cooper/full_reconstruction/segment_mitochondria.py b/scripts/cooper/full_reconstruction/segment_mitochondria.py
index 395de78..cb82275 100644
--- a/scripts/cooper/full_reconstruction/segment_mitochondria.py
+++ b/scripts/cooper/full_reconstruction/segment_mitochondria.py
@@ -8,23 +8,53 @@
 ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04_full_reconstruction"  # noqa
 MODEL_PATH = "/scratch-grete/projects/nim00007/models/exports_for_cooper/mito_model_s2.pt"  # noqa
 
+# MODEL_PATH = "/scratch-grete/projects/nim00007/models/luca/mito/source_domain"
+
 
 def run_seg(path):
+
+    out_folder = "./mito_seg"
+    ds, fname = os.path.split(path)
+    ds = os.path.basename(ds)
+
+    os.makedirs(os.path.join(out_folder, ds), exist_ok=True)
+    out_path = os.path.join(out_folder, ds, fname)
+    if os.path.exists(out_path):
+        return
+
     with h5py.File(path, "r") as f:
-        if "labels/mitochondria" in f:
-            return
         raw = f["raw"][:]
 
     scale = (0.5, 0.5, 0.5)
     seg = segment_mitochondria(raw, model_path=MODEL_PATH, scale=scale, verbose=False)
-    with h5py.File(path, "a") as f:
+    with h5py.File(out_path, "a") as f:
+        f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
+
+
+def run_seg_and_pred(path):
+    with h5py.File(path, "r") as f:
+        raw = f["raw"][:]
+
+    scale = (0.5, 0.5, 0.5)
+    seg, pred = segment_mitochondria(
+        raw, model_path=MODEL_PATH, scale=scale, verbose=False, return_predictions=True
+    )
+
+    out_folder = "./mito_pred"
+    os.makedirs(out_folder, exist_ok=True)
+    out_path = os.path.join(out_folder, os.path.basename(path))
+
+    with h5py.File(out_path, "a") as f:
+        f.create_dataset("raw", data=raw[::2, ::2, ::2])
         f.create_dataset("labels/mitochondria", data=seg, compression="gzip")
+        f.create_dataset("pred", data=pred, compression="gzip")
 
 
 def main():
     paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True))
     for path in tqdm(paths):
         run_seg(path)
+        # run_seg_and_pred(path)
 
 
 main()
diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx
index 0f9ee1e82eeb9566bdfd8f76fc9b951258dc404a..57fb145e044ec104849f3b1e8f5b23524fe75e12 100644
GIT binary patch
delta 992
zcmbPPd{&J&z?+#xgn@y9gTcJKY9jC2dLR{PFBCQJHBj(0BLjmVkWR@@4k*emDArHT
zFG|&`$jw<g(bxa50Z-fiQ&Xy6FOzlc+7;(8$H6H{s3UqxMfBEZ&Ygj~>;0p)@v)fq
zef<4n<64tb+)tV8*q<8)v=^Gry63j=Pn&GV9)^xr4Q|J51kGBl#e0@~;i>)*R4@Bf
zZlO+>6wjn9lT>^TozZtI3(MN7@z?!DdSh6Ft<)Bg#^*BYbP^3do^P{^JYiVIZOY?p
z7h*i!?9qpBUw#<r&o4Ebw1q43c7|$*gQwn>%dY=SzVX&*n?1i7&w6FGiQ|QnkK1A=
zU4Qki?)QOPn>4OVK97vduT;&;JG!fGevG~Ruk}ukOSZrMm$--H7F*rl=--?uLA3cA
zqaB+ND3DZstlyFk3?M#c1_lX+$@&&j^|g~v`W-e9aHv=P#Qvb^$Re#ymuLZ*2?87z
zVR=TLIywFu)LH(`+qhfF(SPRKv(G&37S;31GNi^|+Y!*ww_?7R<SM>3T<SNPll`0T
zX(e6VZuCViWUfN?lbb0;ifb?VO{v^u`^)HfV^7(K7n=<tR{#0V!_(JlAsT&na-Ovp
zTV(ySy;%<>Uo1^;2;F%)IGbB9qhx0LGJ)&MFV8les1#?~^U&ekBLnRt-&fqZ-a6yR
zyaNudhOLjztDiYIV@9X-&l@VDzZXrknX>(5{^dV+?s>7!SX_D8w6t}HP$X}!6p!u0
z3ijDE&wba`6Ip5W-6GA7FZx-iNYGakoAzw+@UK4e_SVmO9q5^{@$0E)lXgtcn0&Qu
zj;y+N$=1HZ{~FKrFP3AP^-#X7aE>g~6>qzV_9tdPUUIeNG#9(Ocj}q^xKFpX<!`)~
zyk|1|)9ANOX$v0qOrGq0Z(F>6-CBWfEAMf2iqAaK_g=JyC!xOC!&=qu&pUCAdIprJ
ziQLR(aE^%?7&nt|8$AQjn~xYrF@gmYO&@^;*vz~k0wLzsobaSRd4{<eSlLB$X~t8N
z-<m7QW5zHrA3#AvBll!I3-xG_n(yEGIhQjrFywMGFz~_@0O<zC4V(-N6*>CFC6zg;
z#h^?R;LXS+!VJ%Bk=xy0cO3vK6$5Gq>V|^`#wMQ08!RNjE;?f&&6LeM`K5&{n9FG?
f&2)lqvVo}#Kbmoq1A$U&`6uUDs<QnQ02u}Vop*~b

delta 10902
zcmZ8{bzGEP*Dj#~f}kKE!q5zYNJ^IvA~|#`og&?X!~lYH4l&fw-Gb7fATc!3E#2LH
z?%{dg@0{=Whx22u`?^-_wbtG<vwOjbPLGu3vF|;_z`(%8DDZ_wKPrI%FFM8_wzM~Z
zH@h)0FrEOfrgkREj&}A=942;-X6#TK>zwFcimjXkUk%K<8eN7688g|)A2UZqA?_`Z
zG0kcYzE&|F-r1Jz9`*g(&UGUW`$1~y$0@d;c7)0B9if@EdBF%7FUMASjDeyWSXVdC
z?4R$G&SMc@r1s;3?S76_)*9gzV6@@2MEJkc4%^DEGFSN;Io^w9g(2)bu7;)WSX1sl
zm?KWPVoKLB%s$LXusqPCI~&xxx!8Kx;5m2_>}(gsVj0f)g!eo@H?+lJi_!CC*U(lO
z)}#iKcI}ycYadG9$)mc_qOf$s<(IokS@o6Np447)HR?Ev`j*~#_DB5*O?%}V*|TH2
z4U+$rp~^8#u?KfKx;?XKZD7X0*!RW6cn0JMYR%zh=V;~h(ag-*iR12t>n>HPGmcap
z(E$blzdS<KG_E((pBc$(LdWKDOO|IECEqLsN#ZirU@*GR&0@UE<axPT)KgkZjFL2G
z7VC9RyI2?EfYsMtpYP1<LvObXcFV-4y<pc@GuPK0;!RM8(~Ft?>*X0Oews)cYI^aR
z>89oe*xBax6uBFXU1Wlt7*k`jcQjGpdfR1p5mCQE<JFPf++5RV_rvb%<B`C{)9U^m
zZ};1qgOl4^&xWS+?akHo^90yg$H<i1Y1++vTTucH#~d|GVD|R3BhWvvxd~c$2wT~>
z-fx{5=?%1~?nrC$zR5f4*p<@}TlTz2ysZ_wab>=_+S(sG-(Oi-?P$L0NG?mGp{^Iw
zr!kllC#&^5>o4oDix?Y={7(J(yt&^j%`U;KCl+?2dQ-1-kNWx5{=v>!Z}0J~eq$}{
z^!PB)e3lAUceQ+RL&M*EeSCIa4SQZq*4)_8?0I}}vQlkP9of87lu$-<ySKiP=6xMV
z(|oaqyBT;n6}UFidc0#WGuP1U?%{EMvmAOEs84;n-L~1E2(z_lYC7Lr+1O7IjlNJ2
zK0blEIbGu~c~A9SFYlbKEX==kq(9`??lFjT(<&>ABrmh`KDUQCxO;hJ!hXAX$mupV
zyJuN7K+|uJnrX$aIXVfi#Hk1a%Vxv#ZfKM@h_q;C^#PaiV0ts6m#YRh-W#{Sb_*is
z;tdRP%Gw>E=L=dWiuyXQvxvsC<AbMZ&7GN68vDW&DY<#+nX1VAr)hFntg+A2RjF9V
zH9|w!>Zn**xdlLqY7|IVK7m*qf%*&HH)i(vhFY&!zpZg=shirf=M{u0sY|8FVWNrL
zLh9v3WTYBL$}nwHXT`h<h>|v#rW-^11+R~pV>!TM`?gl44)FXbyRRh8P71=_1gK)G
zn+HC09m%8*RV>J)|I%uo)O8?fwqF3b!^rM17yur>m3bhY{!L6p$BUImIb#?jZY0A`
z?|Xh{ru?-|^e=Z21tt*RAat*m?dN?bY(ueCZVKDt0I9gt4Uf{0Kif=r@3w>2#i@wb
z1$)AO%2HEV7vo9Ci4GHeBGuPSuMD19Dw>h3BXnYL7HRAb=UvbLW^!%%XP)3RxvF3m
zw$wc<iAChZ02P@B!2SGhf#2J+Vz~w<hV2fP?dUTCYo7=qF;?1>oMXFq?M<DNF#EuM
z@G?IVlqg_r-<gwt)IY0>?66mL{@z>vs^IyhqIt=>J3-(9w4hnxKE!@(6w0TO;;EA&
zgJEoYFOoT|wgOC@<J}7jlzzwabK<wd!H5GrD5&;{08$JvX0NW}v8Xcdi(F5ZQ*44L
zzQE}pK%q#wrnN~T+3J(*;)(z4O`wuB`<gTrUn=aTX2ezI#no1<5TvmtJH3+*BoX;@
zm>XYN7SGe?jm(Iz%!^0c(;0=Rue}hd{Dz=7X>$Zugw;NQ3G6J{&ux{n)jt#9mV<wA
zhj&dRbR}kUuSorcswPrewM3f*UX*5KaVn(n=cIV$q{y<E*xrlCiv<L!cy)Be%hjF1
z0?Dd<O{#s_v0VIE9cpMVBiQN@8sxkd1ViNRL#T-{RHE~pq!3ziP^cDp_Gca&;<XG@
zV74$|w)lfWQ2tt6H?=JemX70f>J&;@5B|=wy!CP!B1eH<oSc7iiSn#Ah2{|TgJUZ>
zVtMe)dcp_x{ki!^v$OoaMf*L;(B;d}^~^|-ePLp2iW)=b%W3d*z%MHk*8+}eOF#vf
z-g&`|_Clqy-Q*FBnI7H^hKF*%@pniC-h#=Mz)mI#0)<-{@qVlgev>T53;|_u{B={n
zlEbO$kxj0$S?`{1pyffPwOs9)Ty2zU`!p`K5(~f*`L^|5ykFkb;N3*5<9?hYT@H|7
z0i-H=J!4h*gdI#{e=Ni*u#g+DkgIkfD18@M%wV=avBv9d4%SArtTk8*)t(8}MkxZ4
zR7%WfN4^zPm=wHtH4>}9L~g)Dt_tMIcd-D-6Ts?~V%jWHWOU_A)b&i%RmMZo!9$th
zcMHvzTmS-z{nWA=_docHzwBE@yCUU^`B?&pdk0(HZ0tS32IqG3Grj_t$eF4;EiX`b
zB1m<>QRvZ4=*3W0OO=boc-6sQt7S~LC}c()j?zoO8=<OS3p}7#Ie7ixcpL*ZqU;A;
z{C*uI-FH=)>~yU>rj@!j%J75c(%79=6s@^KxYYEj^GhAi*kPBDr#41EI046iohTa@
zO@sqP7*Nm2@&)_(MT$qullKn);;;C=rdyHPgZcr!E~q5O^g78)S>A4!<E{K6@4>c|
zKzH3e9-!-nklLg31WESejE8^aSA267?=*YR&cyV*6@5CQrDvEfW*y_c$8;E!v`2sv
zfyUfvCXHJ!jB4r4w~N@uz}%0R4rP)aJ28|6-N8+anJe*|m)}DENL<w5tqhX)Ff@rd
zeKn*O{RNcZeD+ZDjfDWWPz#UM^wGiE=5q8?B6O>KzN;uRwZ`NR0a`ULBa?Nhy=p~b
z{{EWA1u3zkbN9_!aJU*o+IRnJ<{~&j+JHTh5z5IA`!@|Fk*vZaZ&g6K3anXBoBo7u
z2GJpOI9^U=yCb4Z3E{NE^ODs_1@_1$U24nc1n8EiMYqIS3E8ose8B!_<@)mAST~8-
zpZ<aFth@xE3Q~2vgnCTm^<2Or$@<h@jUt00fUA3Vt}YC!^{g~%i?+cCc=i@^0e3i*
z^4<IXOQrrk)-n*S@;px|`90F}yU2WKt401YN)1@B!x<wD*Zk#!dz8GdQx({inskve
zhlaNP5zJ_tAdS%;-vZ9?s$2yQr6zr(%mEs@a|a~qYb_?c0l`-m8Olvy1`KAS<Co|d
z0#xA<Y*s&v^IvR>r1E_iIsfEN-}nXHJMYoGbF}3kiWtz!iA%Or>xP%bTO=(2X{A<r
zh3>gRYreVS{Ud4lp(hr4XU6VeL7?u0b{xpPR`oi+363c_due=ja-AGLSSnghTmt%F
z5BdpR27`{{p4Aet%I=}ucxHoF{fs#1grh?wiB#=gu?K<r(&O_KUMV%bN2(m8WW+Q7
z$KaOXUcS8_-mv>ncV$_E#rwKn)18UjNPWQ7yspk~?uDcogyc1+J;{bhuU{hYwL4!q
z(Z0%h)oy&KBxFD;aP>DCBV|tvZDIE!^APBk)fnA+Gh#HECTthue!_Gp@eK#P#;E^}
zz1rd6XZ99I{$7#GBzL+AAUQ%vbHJkW(N}`Q`N5dy@$Xg@c>0^n^<@B6Ks51RQGyz0
zXy%80aFT_=YmV2008a$@5#8riANio*8XlOX$U?RNfI-!#1ufcY1KW;M$?|d$y|RiI
zg-4UU4Z$lD<h}R`$7oUJB(!KwNbg2GVHWGO<l$E6%6*M_icI$tNuVQtXLTj1{c%Qo
z)$Il+kpr6954rdqm(tO+m5eWi)&=CZ&S!6+e1i>VneKEN=m7!y*BkYTiW|pudq2w+
zGm_p=jwCJB0Q80sY>wU#>`Y4SdVyigs>|2gaF*oGgl7IHe03e3sjSujF_Gkt`ksRg
zF|5DQmGK&#Z8_2@qR83r|D+)mUUM{k)VJimYnwrIN(W>e()K&|-Z;blBiRd#>i<ZZ
zPE1TEjDXFxv$Xlz8qPx8ncD8=xG}mGO?7zUGBUAoTi#JsY;7)+_G^o+Mwy+)E;pIP
zA5$;Ce_Y6EqpibN+i>RS&QyCRs*7GG(}}@p=Ocn6OMy*ryp{o58Dl(<8{+RUW?*Mt
z(HVZvHWp}-zlU42v6&mEW$>~rJC2JV=nY^8L-)pPoys5qe%5Agtd<rFa}_yIQ&e|*
z4zNK8j;p=HpN<(*cu`iWthmPQhi)8Yw6j&jrFJM*h|;0U5UQvMPEr?s*Nx~c0;J>0
z$q&lOgyHq&#%O6VF~0`gX)*zs6DTYe?9>gg%o?gISQ^nl;WbKg0=}rB0ACajHN;7C
zymmMLWmVDsUl!m=@O$a?_=bN;sWP1C!vH`ciQuLx=vZMD`Wbu4dL^S9oiZM^L?ul;
z7L(l%gG<JVKGP!3$(Z*zPS;27iQG&qx3)7{2*S@s8B=E&FC@(tn==+ZDmgyBVZ_m5
z&%;KUDH8tU_)4y(nvYRu+<I+r-j61m?b&%@qJl1`;O|ae*7@1Vuwnh>3yR#<075Tu
z+@hE-9LR~T`|!76y4A!xCA~7W`y;0epxhEs7*UH9=K<5}%UVOnpy*AVmd5WCyk=E|
z7FSewb1`4avL{CFXq$I5)bi(F5`pXLGdPJ1&(1$5_3EJw9&_9o?5Q+kY|W;m8ICYq
z>i*;k{k8O%VeGfRhZRrKR9Nhf_SGa#Zo#vSBpdxZ5;vL@3!6)8q+tmTKee7Wciel&
zSKo;C!|Eg2kCN@nXaaJ?rOY3U0XGbY^|@pG{q$YTaZ9ZHhsmdZou8#$ci{|-O{`tD
zpy;HUNDcGI)BjAgv#10lMh{Fhfea-s$rTB?99>-V7!3V-pBp?|9-^rXL1WB@66DX&
z7z_YkrL$UVb~q!%r6nvb`+V?v4F!OxMkZ2Md<yN6Y>cFNkAh`|?p=`i;^`R)2uh+f
zCR5pc3-^MBV+hE<{etuPHYpiOINj;Rx#nr4v!;|#BAYlHmr}Doq}!1r2AICW-^S{e
z%&b4l$DhI@2IZwn$WA`EX!6B_^%~zPPNk0e*6kTj?>l>DSClvt9c^Qid~UaQ42mhL
z76#pOnI|C<5Zw>|fs$~z6!XJtG!YP7O4(xF5qtTM-uP_1GXiPXBF}kl?uYlv6z#pn
zw3oMEU3qG>`h4_`Y>O)AInoa=^$x23yL0q6KA>`9UPliK%U(_1BZD;68cN7hC=#kT
z^17upr?Xo}W|S>BYq}DHzo6VGARk|~qUhvGH+FcQ4+ku>*ciMhz#l>t%ho&s&)ITj
z1V3aBd{J9i-|wJI{-Z2LgB;u)$Y_lN-xWp-C<LHGpP8``&BtH;1*u=h@6X5b-#OXF
zArT6+cZ2}})?ZTL7PYL5q_ZOTD43nAcO?)Ht#X{4t2f_{uazjsZhqn!C`oy^zw-*f
zq!0t<|CN-{CjDU|0Ls(zmaQQM&&6|QOg&_leo@=*A3mED>pWk#<)%T^?9hVbJiqbB
zQ_#B0^sE<IKwY_@>4!j+vj83}p=D|6DY#019;j9=_ftVq&46$>CDJ?+INbgvq)E?6
z0*4HpmpHdHlMHritvU5)t8xBjMhBw!B$MU90!0J<TjK)@6ryT?AmmbLspmXj3BcQy
zLR%Y|@wy4nm><c<0HuA*ehDFf?;0Zp-2DHO6H{#%DIYV(h5+Hbf1=UB3;{`9q`f0<
z%**;qf=4d%)~9hB_#CgV!Mcy&c2<Z1voC1g#LTL-HYxQMwqm#KRSVt($ew>`yLxq;
z5<E6nJ~*|%bMu<^8l08*dt_hIMAyg;fpzm$<g;+cU(Q;>!0=hzsKUfvUxN|eHqw`G
zBw*IkRD}B^&Jk@!j-rH=jdZlv5?Qq$HqS6I@p3?%f>bJ&X^V~s-Q-p&J6xIV9`+T*
zk_C!D`j%zR`__cgo@;4~P6*wsJG7oE>6On#62t5l{}#e$AVq-<@s4%QT1~*{<0vt!
zN$<8QCw%WV{~AJVkm41dkhE^SIqVKB6r4&vdWOyD?DBKVzMX4Q&^8eDUD*2QtIyHk
z=M$3b>Zk$#<~?BuW@{@&pv9^HHF)fxe2@~0zV_;V6mq#exhiT1v%B@ag+$-J2&8n~
zD2xAI_NSy)^+Ex5GyMM6rDW>d7N0(qKc(AuP+Zmh?6}vxKDcAB)}<+Gv9JKV0l}s~
z6t9@|k(7^mZ^|QDFNNWMTFyDoM+5O5c?b&L>(ux&O^Kx{CrAOxF01{^Ec3#0?uK(;
zd6DY2b?G@R-{-e)<XC8WjuM-|Iu>9fAA~2)Z~yk96e_GGR?D47nCtM99_PS_Ar%i<
zZQ5KP9D1a9D{Afr=IFltcTh-(Qo!qQxNysm5HhsS4Oe~29>ik!B`9#f-qB}Z?eRu(
zmZn)~^dF%x1Fnp0Y_T9H!6RmzGYDZ?(}bnoXcKMb;?*+}RiVEDK^B8($`(LL8r2V4
zK^(F+4HRbDzlC55ONuxKz_I<w0+r{gs#t~1J<>EeV4@2`QJj(xOle6GH#DjVpo)(*
zTsb>5v>Y0U@U(tEEr0jBRZC!kS$%^3xrA3~kcN&4v3i@sSh2kn1k*}VfAZ%NM#j?P
zjVa8`Zm<*0pMC5{I?Ne)%rNuk{cQ^Xw)J?jBuAs|8zO)PBwg^L=ZV@+P_)wSuO*BW
z*uu|=k}Qoj<E>9FBK^41YimBj^qU_cgg*<Mt0>HlY8f`_%(x;sz4Sb<-#%l^^?6H=
zZwVo!eJ(2LCHH>hm8+sdNUt1z0^p^r>8o%QrX)NtUkOrF_XT9Lwwd{uSTUHzFs@Ve
z2^>4foJb`tY1Cz#>;Y*W&sq$pJOr~ECK(@sprWmU`d<uJ6$+0pwjQ}yNHPvP|KzI3
zAiW%MUPeH%RMJv3U8J2}IJoFdt1&@H)PC|tx6y+ygILETL-YH2RmNTwC3=mFG0}Ww
z3<FTXXr+wSC5+4^x($-y!krpz$qKwrMo0wKPOv8~2#oXCj7ViJVnfGng%f7*n$%$5
z1FY`y^G$SQI>5?Uc(O62P_jYjx|rF-{>jQH6M@AZ;)#Xkxlh*hzf&0#b$O{e)*B@A
zanlD*PxB@ah#L80$4PbA<?z$jGGM{u>E?3~Smq&~S5#meT=ze9{ScKg(AN8?CiXre
zJn^=toa#$MtDJ&9oiO;Q*VhmYJ>jn!4hP$EEvj7iMM%%L1R;cv53*g-@{Bdy$|HGr
zqZJsm=L->7KSDeY?j*bF`ZWPb!V{kCYOev~KzmJ~6yAwJ3E^Inp?SrZ#iTAX#&eG~
z9i~0NJ)~+s^`awFf#B$rv`=eaZq&JR4biej@YB!3$`Du&LOs{+MC<DM4S`&?4>#4Z
zBXhwJGPR#((fug^j~>lq({$M$c-yvLg*9J^z>*5}oc%BBBaoAQ_P{3gb2#(-cOdf6
z2Iok0R!UdeKMuZ)W1H~y9%kBK_V^E|`xS*Lgv?!OkAg)A^?&9w;r5bE=^-}{W%>J~
zbJGk#mA;6wv?W;2hZmDqHtQM?6FEM$4aL)Y)Lj*}*#^rEwY->W6NlBDttc%o?9S<B
zFE5;&UC`fdcO0!8RMUv-H#9Y#2OZga@JhhtPWMK*Uwxh25gjR9c|9T8dWB8OPgg#;
z8&y-{Q3IPVwOaa7H|=WoU6Xg!T5I$S=QY34AY$O*K2P;kz?3*)D|YqU3Eyy0D!k+a
zWs%_U5=y+}LcwB`r~Ek%tF=T(mNdO#@`+(9Xfo(k?YOI5_o#P7D3|Z2t2Y4(YfjEe
zY-;ZVLvNq|u(PZ?y9o?!SpCq|6xsQze%f`HQ-v3%<Td$1Uxg$FWJ9VnGeupneNi=U
zYn!d9`>DPfsZJerqS7U<>_8$x{fOLmD(cj}b&M%`>aXL}bwoB_DwQU~4&a5Y#7q`B
zg99aiR=<dzT63H#xzK8qZG-d~He`a;#Xr9${;oHt>iM0xK!N@HOcv(0T4IO_Ejke}
zF6uXz?P`&Kc40D>M0}p}#1*~lKW19*X*m6W=R7M*4>rP@6vC>~`(|_c!pKD$&-!if
z!0-v#r6W)K%a6ukKCY3?Z!fKy&gL&eTTA=Lc6@(r5E6{X604w+x`UG`+DHr8*jRWb
z#aPvakqNrd%FAE)pZ!hpmm7=ypbe95gY7@mAkLqB>)W62>WNuPpLOs6Ll>@M`61<^
zv<<+>^0ja9)my36J$PYa(4;ggZdM&9#S^u60QMw*GU=A5(4DBTQ9_(XB%_~~B2Gx7
z9BI>LfB)0t^hx?#H|6y`Jav+OebF=`Byu$=EKgLcfB)gY3ykk0R~B!Ck)L6@(JJo5
zNI;gCD^1GdOAAfDp#qlXhu^o~WrFvda7Xo*kwi$x)g*!<uYCTFm+F;oMn<>ORKh)!
zw>37;{}#BEqVk8cy-Vl(=Ai+f0GB>p|Lf9oQ5uo(KES0(95pu95UqSo->$*6=B)Q8
z>61l+hrfo_fH>D-kGM{LNjrqg)##WzhpFd}g7U`~Vw9P*vMS4%GXCf%woMmt>Squ?
z2Zb{*753B91%0ZPQYw|Q@Q|0xaa}=7h(~DU`%6jRcIH_Ycg)#S6Q!9yI}v*{aYbBF
z`#BZF@g%v}a+a>${=P!K%j;OlkM2ZOGhL#!U8%1)pCpOx!KS9Sxb%%kCPAX$Cm$xh
z*i>gKm1bIKL4Jd+*v;|DJ6>8#3}&q}8UrOhRk8yw+L-{8R1~D{tgMMeXpQ^-kfxFQ
z{X;ssA{cD!<RT_)LC@zRQ<FN^0tKqy^L4JIwZ||;{U>@pt;5PVu<;P3OW3i|A`Yv)
z#Br7ulkXIa`HgENMvO3)p_{@zW~-IAjkLe_A0|J;Om}B8o?-&P2Qk9W4eJ$t=N4}&
ziO92*R^a9JhS|mk+4NV&mI&Z|aFu?r)hs*Cy4m}gpB`}6*wp7iLXge)7ocG2>a*_s
zSdz5<lslVJf2Yfoz6$6bvYE9i{$7b79*5<SRG^Zwbbc%Y4u_F=eo+)741gzw2otP1
zEe{9aR4!<@twX=E=?YfvcA+}b?YXlYa=g<>^VqNUe)Du`csLMZTXGt;CwlQHieUu6
zDI#93T~Y}EXUDKzr{$lrj{gYQo6q^e`KO!!<~xtH?OgU0@MWCpzF$9I2JrjWl!Dm4
zKb#``WzS^;pUb>)gz~6^eigZX^2F)n5?oOraj@o3eZQd^x#Gz%>H=piV3m!HuhOmp
zW%tpZyVc>oYg$<@yU6q1k02UN2?kHz%H1MVN3%V5CQE714$shVK&2o~S;)}*KRO6Y
z^En7S(reySYVqN+7><G-w8tFgs@;a8%SuX}G;unxw5X4^XI#@Sphk||Ui{F;EXDA&
z!Y&16N9+i=Ps+tFxp+^5XNtKxf`^put5*?3tT!CU*2}N8blI6oP8I4$(Tj}7{=DWV
zC0vwQ%n;W(s-8CY&8k7|%gt>5#AM9eCicb$2>~4DCtkn<FCZ{NT1S0Ro7baK*|Xxj
zWepl*%e(Q)ZA~xDP1pi}9i;CvFT8y+=twrm+*j5hfe_x)Ld<g+k0?oYJauQU<^C<=
zo3F%FRCfZ}-KZ151YdDF{U#y_Ohr(=S)y9|^RYNwre7erV5Ex+-t@8M2@ehx*E!u+
zlvrjL(GdMYdlD?jS+Ab%eVap{yteU*4-+_2f=lpCr{F-&pmuUpOy5dF??#>TtT|aO
zLeMY-WugL{eH;~?Ezu4W(ehV*xu|nCeM9yGKtoX`s{aofLC3#}p*fbrAX~}V-h*iZ
zp-wh-#E5pvmd~o-qvu6g6E+Sy2*K=l@GW|%?Aw><SkXs?$s|fEk{Fk5_v8^5hdyPf
z$%+p@m|C{$?ufAhWCFbEky|RiYGXbhk%+y6fHJZ$qt7J(jo}Yc%sdIww%*Xow#DW2
zlbS?{aT&ekzjrdRk+|T6ybP)O2kaf>Uv=_``&>p<k&yNv8^+5IM507d9b}pkQQO@@
z$8$-T=t<AQeCaQxqyu~zBT-l?s_?CjJr$f7AZ>mk%6<M*;f{GnJoz|olMSSR*pQ4p
zal#F?gv~DqoNC~s)7q5LC%;~2%CxNq?6X;OSK7fRwBNL$-O<@S=^SP%`5@^ctGfq#
zLY*l1H##`)#?jFyg1NAkD?1CE)vHq?q-gQo+$b`U;RGSD@Y6Kq7tIWa(yIwkx7Qjq
z<841o6qcVIz;7M_HQ`S2X43O8nN(6ti=jSd`%W<&S)udEC?c_+t78d$bdf!+HQA`k
zdmIoU*V6*xk)GyKkvpw7S(yY4(Ze?2e+q4XsFkxeEs*HHSPv2gv#xu)_2|}{yq5q@
zxKjp`|1>tS&xZJmnn#SAEHE_9QS*$QARefsiRYGn*b1EmqQBB5yFmypL<XzY&C&3T
z-5|(R(qe(bQpKWB#A1c)i**)G*>Jh%!0BFusW<#P-uFmQS!w8(NAod>Fl_cj5$vhm
zm_(}tNNB!Y%xC(>tNSWT<NIvI+)h7P;TDFMCES%3aT6bl6=X(Jdl32tWSDreAkMC(
z^D*zNo-GP9zUNlhsxG1KvqjMM%cy}7%rpqnd;1jCgH$pHYApkAhsOZ7Kuvznfy310
zCm5b*aA^Zb!qJj2nGwD1YKRTtFS|tL5zJ%=(rddKU@D-QbT(fs?++4R5a4Nm>`_Y-
zW8MipTeYTuJ(A24kO6K<bbk~lss|GeK~ik+11wp91*X#&D|N`<dsF$gg`P*6jax<a
z^n<B+KYit(Hf2<yFv5k~CPVh{6eu-mDsC8n3Iiw|fF?&dc2;p9ifWYQN$E23u?Z8`
z9<-q_C6JjrPMyUY&xd$H98gN)5Fx{;U|Z!BD%vHM4Y4l}y)mR)UQ43+UVx}M;kZdA
z`BP>b4EK<_e(K6XD}y2}2hV-gaiEVx!Jq$eaP*1F%fH;>a+&euG$ACVf$htKpSt`6
z@nmK0xOMa(h413dLA;`EOiseU`v=6?(eonzvX`sUA_B<QBbkx!p~scTcCdtmGENT{
z&%+IGr=G1u6Le1APn}x2=v+kLS@lqrw=Y!-U6cT{@QIIMn(sAGB%1>lF*Ss*tbx`f
zgBz@^Rk!sZU%||?xUv_m(?=72V=MYInG<RoA5b59%lB;Q81p@UEA(wCxX~QojX`RR
zR(vSIWs;}$M&|vGfOH*>W^F`F^h_5)#Rg_{#sL^)zPLG3mMJRSBV}SES-2CdW_3)I
z4teU0IHwYaC=zlW_`l{KHD}yZZ1>EnV;7RcRfF|-MzDa<BSa28LR94@o4}<DP`?CV
zW<4>1JPo;SM@;avTfZI!1GKC}E%p7dRc1W5o*r|!@5pCh-W^l{no`*YB2^Y3j`xto
z4w-yQzIXyQDHBJZ=)Bz83O<Ho^GvU14TA)0+)B>I9&36E?XY%Jn!$~AOIuiRFi?R~
zJlBO~p;EXlf4M7-ObvN~RtXYO0b1n=st)c2e``Q)fJtT^pV&x;y*Y>;?JQQdz+^{R
zZHi`=3I8Qm=(OVVBVdy;0R}8{8DNs$D>jfKt>8@Zn+#Ol{D<cq1}uFkfYI^d+4OIy
zI;0prwT*=(_JXkS{{SCBmn;EIpvAu1%~-C=Ud7;<^@>IK9s?hRF%DvZVfP@r@nhIG
z&HZ3k&WE9>p7`)P?C~82&W^1VHCAmqci`~nz2td7mW|#-NPsDr93`~hihNpJ=JAs#
zdSGO$`<ZrhJ9;Q-0Yh0*P7C&7_p#T^J*V(@HBw|JCPxX`f)nr52_s5vUyn*;Z^o-D
z32X?YYQFc^F*Tx;g2^TULxf6a20aWwUoDy}G^mi~@1T-ot_DYA*@A#kl@VaS^pBmZ
zpUBYCOE5PL-TTV6Rky|==LF;?tB?}NX@RU251y*tDMn@mE=;Slr7!fR@HJ2Zg5T<Z
z7LAS>F%h*WS;Qs~2`LX?v#wd=kaq&whM@@H;<3zF6PP%sq`V8=^JClk_8%<kUu-ua
zhq^&?j45G~UW69E{-te@j5gXVL-C#2Ly;cIB;XsE&+oLw$@ny;aihyuV8D(bt<4W~
z&mZaaiH^i?o|3Xv4<&qOXtImY;SAn|GweW}6(`9p7F|px6&djAX&NqR><D6uL=IUD
z{LbL7KI6j6Pfwa=WUcAnY+|(fJN$9iC-f(1Q()b|*`}4G;jB$RF`98dbhLnjDd_+%
zu->Roz<LAoj~AzP;|yyPNTUFmtK<>C&fSnGbo;=o$wbbBUj(Bizrw(K*V`9(BSx=C
z_;>&5v8}w6-obC^TZ5Pgs~0UX73IG&ZX<vcu0cP2u0G6U7o=tbxyZg}dD-G#Q68Dm
z91sL=c6alH9bX(=Ot@*?)-^QOo!cixX%4|m8j0qoq};ZJqgbtH*YX#7WIHYS*pJ+N
z>>Z<qwj4Kxh6~GepujJ4f0g~qjExc`U?jNAI4rN&bHwyY9wv)g3Z<yLeyG!%>mPl|
z%Y?&H0suB857VH<z&Bn#G4={<TmL+(u`_WDQx${8*3^TU^u`rGYzo*ku(p6J%3<s2
z%^-jHmz0?90wuG}mdbO1b%E41J%0|s)<3XU@c{G<fUGP;18g|o^^Lr2c~EiB`mqB`
zIQ&a&h<t$^*#~kyJ3<Wy$71#tl8SpGPuxWx0i*{BYIEv8*sb~cG##3R@s2^|(o6TB
zKnNY5>nVDXOxD>K^fMMO^uT1-_I$MH%R&qtqu@oYmtiDDE$v;94pdD&?%Z$UJ%yzl
zfMo1w22BgjWN(<X05zOws}OUG7-n>KGa#t4m%8@E#NX+SQ@zh_g~)d9`6SSnMFC<l
zF99*qq88Vx%y4qqC%6V}#CsY`X2g&PBi2&FT^^m`fZnM!Az0Xksy17^Pa8c)9GX?s
za(PAOGC$IZX;UqOFV|vIac`Q;z4jgutnQ;(i$-lyK0CclfyN7M|3hOu0gi(tJTMMd
zEqzM8l}Sv%w(`3G2jLidV>V1?DSiH5OSDW{eok&Sa)srow8{=b`vtgG?ZoBgZ23RA
z1cOi-f4yKgVRhwt{zE6>&|9(#;f21S3re)D>)x3`Ro}wI(CFEVq@$#dPj<qX{`PfJ
zG6KBLk=bbwz3PYW;g-6j;(renaCd!#8Fr(7FVWI#L7m)!WJapG9pZ#dL>&$m?k?MH
zFJCYJd;9Iy+x_xnqjjcob5V&%C$Q`tHPLl#<LAqrb&gYhnr82tbXM<%S{UugdgN|I
zf?Y&)r1;+B+x3-|n{^HjYWBUU-tGQ0_JfVN|9zFP5w9N$8@O%>k0e+@zg9vYy~n(J
zV}x+w?u|Mk`G4QQeIktaKNmCMkSBaJ=)V$mQy3mUz`#fWt|5{DR}laE&j%9)&v-(A
z_ssu)P4R!8@qGwic*1;Vj)0gF>xu);LQE-wmaUfF4EYH>M}~!gf&bs<fma{QK5h&Q
z14lD!Ck}S>|H|^1SWmJ4=k_Sv2hgeJfu|CK?qnN*KcDi#Cy8IsqGgl6O88R(7fQ#l
T|9@}KdExXVOb_1iqmBI^umugi

diff --git a/scripts/prepare_zenodo_uploads.py b/scripts/prepare_zenodo_uploads.py
new file mode 100644
index 0000000..b642c07
--- /dev/null
+++ b/scripts/prepare_zenodo_uploads.py
@@ -0,0 +1,246 @@
+import os
+from glob import glob
+from shutil import copyfile
+
+import h5py
+from tqdm import tqdm
+
+OUTPUT_ROOT = "./data_summary/for_zenodo"
+
+
+def _copy_vesicles(tomos, out_folder):
+    label_key = "labels/vesicles/combined_vesicles"
+    os.makedirs(out_folder, exist_ok=True)
+    for tomo in tqdm(tomos, desc="Export tomos"):
+        out_path = os.path.join(out_folder, os.path.basename(tomo))
+        if os.path.exists(out_path):
+            continue
+
+        with h5py.File(tomo, "r") as f:
+            raw = f["raw"][:]
+            labels = f[label_key][:]
+            try:
+                fname = f.attrs["filename"]
+            except KeyError:
+                fname = None
+
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/vesicles", data=labels, compression="gzip")
+            if fname is not None:
+                f.attrs["filename"] = fname
+
+
+def _export_vesicles(train_root, test_root, name):
+    train_tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
+    test_tomograms = sorted(glob(os.path.join(test_root, "*.h5")))
+    print(f"Vesicle data for {name}:")
+    print(len(train_tomograms), len(test_tomograms), len(train_tomograms) + len(test_tomograms))
+
+    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "train", name)
+    _copy_vesicles(train_tomograms, train_out)
+
+    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "test", name)
+    _copy_vesicles(test_tomograms, test_out)
+
+
+def _export_az(train_root, test_tomos, name):
+    tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
+    print(f"AZ data for {name}:")
+
+    train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "train", name)
+    test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "test", name)
+
+    os.makedirs(train_out, exist_ok=True)
+    os.makedirs(test_out, exist_ok=True)
+
+    for tomo in tqdm(tomograms):
+        fname = os.path.basename(tomo)
+        if tomo in test_tomos:
+            out_path = os.path.join(test_out, fname)
+        else:
+            out_path = os.path.join(train_out, fname)
+        if os.path.exists(out_path):
+            continue
+
+        with h5py.File(tomo, "r") as f:
+            raw = f["raw"][:]
+            az = f["labels/AZ"][:]
+
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/AZ", data=az, compression="gzip")
+
+
+# NOTE: we have very few mito annotations from 01, so we don't include them in here.
+def prepare_single_ax_stem_chemical_fix():
+    # single-axis-tem: vesicles
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete"  # noqa
+    _export_vesicles(train_root, test_root, name="single_axis_tem")
+
+    # single-axis-tem: active zones
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete"  # noqa
+    test_tomos = [
+        "WT_MF_DIV28_01_MS_09204_F1.h5", "WT_MF_DIV14_01_MS_B2_09175_CA3.h5", "M13_CTRL_22723_O2_05_DIV29_5.2.h5", "WT_Unt_SC_09175_D4_05_DIV14_mtk_05.h5",  # noqa
+        "20190805_09002_B4_SC_11_SP.h5", "20190807_23032_D4_SC_01_SP.h5", "M13_DKO_22723_A1_03_DIV29_03_MS.h5", "WT_MF_DIV28_05_MS_09204_F1.h5", "M13_CTRL_09201_S2_06_DIV31_06_MS.h5", # noqa
+        "WT_MF_DIV28_1.2_MS_09002_B1.h5", "WT_Unt_SC_09175_C4_04_DIV15_mtk_04.h5",   "M13_DKO_22723_A4_10_DIV29_10_MS.h5",  "WT_MF_DIV14_3.2_MS_D2_09175_CA3.h5",  # noqa
+           "20190805_09002_B4_SC_10_SP.h5", "M13_CTRL_09201_S2_02_DIV31_02_MS.h5", "WT_MF_DIV14_04_MS_E1_09175_CA3.h5", "WT_MF_DIV28_10_MS_09002_B3.h5",   "WT_Unt_SC_05646_D4_02_DIV16_mtk_02.h5",   "M13_DKO_22723_A4_08_DIV29_08_MS.h5",  "WT_MF_DIV28_04_MS_09204_M1.h5",   "WT_MF_DIV28_03_MS_09204_F1.h5",   "M13_DKO_22723_A1_05_DIV29_05_MS.h5",  # noqa
+        "WT_Unt_SC_09175_C4_06_DIV15_mtk_06.h5",  "WT_MF_DIV28_09_MS_09002_B3.h5", "20190524_09204_F4_SC_07_SP.h5",
+           "WT_MF_DIV14_02_MS_C2_09175_CA3.h5",    "M13_DKO_23037_K1_01_DIV29_01_MS.h5",  "WT_Unt_SC_09175_E2_01_DIV14_mtk_01.h5", "20190807_23032_D4_SC_05_SP.h5",   "WT_MF_DIV14_01_MS_E2_09175_CA3.h5",   "WT_MF_DIV14_03_MS_B2_09175_CA3.h5",   "M13_DKO_09201_O1_01_DIV31_01_MS.h5",  "M13_DKO_09201_U1_04_DIV31_04_MS.h5",  # noqa
+        "WT_MF_DIV14_04_MS_E2_09175_CA3_2.h5",   "WT_Unt_SC_09175_D5_01_DIV14_mtk_01.h5",
+        "M13_CTRL_22723_O2_05_DIV29_05_MS_.h5",  "WT_MF_DIV14_02_MS_B2_09175_CA3.h5", "WT_MF_DIV14_01.2_MS_D1_09175_CA3.h5",  # noqa
+    ]
+    _export_az(train_root, test_tomos, name="single_axis_tem")
+
+    # chemical_fixation: vesicles
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation"  # noqa
+    _export_vesicles(train_root, test_root, name="chemical_fixation")
+
+    # chemical-fixation: active zones
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation"  # noqa
+    test_tomos = ["20180305_09_MS.h5", "20180305_04_MS.h5", "20180305_08_MS.h5",
+                  "20171113_04_MS.h5", "20171006_05_MS.h5", "20180305_01_MS.h5"]
+    _export_az(train_root, test_tomos, name="chemical_fixation")
+
+
+def prepare_ier():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser/other_tomograms"
+    sets = {
+        "01_vesicle_pools": "vesicle_pools",
+        "02_tether": "tether",
+        "03_ratten_tomos": "rat",
+    }
+
+    output_folder = os.path.join(OUTPUT_ROOT, "IER")
+    label_names = {
+        "ribbons": "ribbon",
+        "membrane": "membrane",
+        "presynapse": "PD",
+        "postsynapse": "PSD",
+        "vesicles": "vesicles",
+    }
+
+    for name, output_name in sets.items():
+        out_set = os.path.join(output_folder, output_name)
+        os.makedirs(out_set, exist_ok=True)
+        tomos = sorted(glob(os.path.join(root, name, "*.h5")))
+
+        print("Export", output_name)
+        for tomo in tqdm(tomos):
+            with h5py.File(tomo, "r") as f:
+                try:
+                    fname = os.path.split(f.attrs["filename"])[1][:-4]
+                except KeyError:
+                    fname = f.attrs["path"][1]
+                    fname = "_".join(fname.split("/")[-2:])
+
+                out_path = os.path.join(out_set, os.path.basename(tomo))
+                if os.path.exists(out_path):
+                    continue
+
+                raw = f["raw"][:]
+                labels = {}
+                for label_name, out_name in label_names.items():
+                    key = f"labels/{label_name}"
+                    if key not in f:
+                        continue
+                    labels[out_name] = f[key][:]
+
+            with h5py.File(out_path, "a") as f:
+                f.attrs["filename"] = fname
+                f.create_dataset("raw", data=raw, compression="gzip")
+                for label_name, seg in labels.items():
+                    f.create_dataset(f"labels/{label_name}", data=seg, compression="gzip")
+
+
+def prepare_frog():
+    root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted"
+    train_tomograms = [
+        "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
+        "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
+        "block30UB_one.h5", "block10U3A_five.h5",
+    ]
+    test_tomograms = ["block10U3A_four.h5", "block30UB_five.h5"]
+
+    output_folder = os.path.join(OUTPUT_ROOT, "frog")
+    output_train = os.path.join(output_folder, "train_unlabeled")
+    os.makedirs(output_train, exist_ok=True)
+
+    for name in train_tomograms:
+        path = os.path.join(root, name)
+        out_path = os.path.join(output_train, name)
+        if os.path.exists(out_path):
+            continue
+        copyfile(path, out_path)
+
+    output_test = os.path.join(output_folder, "test")
+    os.makedirs(output_test, exist_ok=True)
+    for name in test_tomograms:
+        path = os.path.join(root, name)
+        out_path = os.path.join(output_test, name)
+        if os.path.exists(out_path):
+            continue
+        copyfile(path, out_path)
+
+
+def prepare_2d_tem():
+    train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled"  # noqa
+    test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicle_gt_2d/maus_2020_tem2d"  # noqa
+    train_images = [
+        "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
+        "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
+        "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
+    ]
+    test_images = [
+        "MF_05649_G-09175-C_04B.h5", "MF_05646_C-09175-B_000B.h5",
+        "MF_05649_G-09175-C_03.h5", "MF_05649_G-09175-C_02.h5"
+    ]
+    print(len(train_images) + len(test_images))
+
+    output_folder = os.path.join(OUTPUT_ROOT, "2d_tem")
+
+    output_train = os.path.join(output_folder, "train_unlabeled")
+    os.makedirs(output_train, exist_ok=True)
+    for name in tqdm(train_images, desc="Export train images"):
+        out_path = os.path.join(output_train, name)
+        if os.path.exists(out_path):
+            continue
+        in_path = os.path.join(train_root, name)
+        with h5py.File(in_path, "r") as f:
+            raw = f["raw"][:]
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+
+    output_test = os.path.join(output_folder, "test")
+    os.makedirs(output_test, exist_ok=True)
+    for name in tqdm(test_images, desc="Export test images"):
+        out_path = os.path.join(output_test, name)
+        if os.path.exists(out_path):
+            continue
+        in_path = os.path.join(test_root, name)
+        with h5py.File(in_path, "r") as f:
+            raw = f["data"][:]
+            labels = f["labels/vesicles"][:]
+            mask = f["labels/mask"][:]
+        with h5py.File(out_path, "a") as f:
+            f.create_dataset("raw", data=raw, compression="gzip")
+            f.create_dataset("labels/vesicles", data=labels, compression="gzip")
+            f.create_dataset("labels/mask", data=mask, compression="gzip")
+
+
+def prepare_munc_snap():
+    pass
+
+
+def main():
+    prepare_single_ax_stem_chemical_fix()
+    # prepare_2d_tem()
+    # prepare_frog()
+    # prepare_ier()
+    # prepare_munc_snap()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synaptic_reconstruction/inference/active_zone.py b/synaptic_reconstruction/inference/active_zone.py
new file mode 100644
index 0000000..d611693
--- /dev/null
+++ b/synaptic_reconstruction/inference/active_zone.py
@@ -0,0 +1,122 @@
+import time
+from typing import Dict, List, Optional, Tuple, Union
+
+import elf.parallel as parallel
+import numpy as np
+import torch
+
+from skimage.segmentation import find_boundaries
+from synaptic_reconstruction.inference.util import get_prediction, _Scaler
+
+
+def find_intersection_boundary(segmented_AZ: np.ndarray, segmented_compartment: np.ndarray) -> np.ndarray:
+    """
+    Find the cumulative intersection of the boundary of each label in segmented_compartment with segmented_AZ.
+
+    Args:
+        segmented_AZ: 3D array representing the active zone (AZ).
+        segmented_compartment: 3D array representing the compartment, with multiple labels.
+
+    Returns:
+        Array with the cumulative intersection of all boundaries of segmented_compartment labels with segmented_AZ.
+    """
+    # Step 0: Initialize an empty array to accumulate intersections
+    cumulative_intersection = np.zeros_like(segmented_AZ, dtype=bool)
+
+    # Step 1: Loop through each unique label in segmented_compartment (excluding 0 if it represents background)
+    labels = np.unique(segmented_compartment)
+    labels = labels[labels != 0]  # Exclude background label (0) if necessary
+
+    for label in labels:
+        # Step 2: Create a binary mask for the current label
+        label_mask = (segmented_compartment == label)
+
+        # Step 3: Find the boundary of the current label's compartment
+        boundary_compartment = find_boundaries(label_mask, mode='outer')
+
+        # Step 4: Find the intersection with the AZ for this label's boundary
+        intersection = np.logical_and(boundary_compartment, segmented_AZ)
+
+        # Step 5: Accumulate intersections for each label
+        cumulative_intersection = np.logical_or(cumulative_intersection, intersection)
+
+    return cumulative_intersection.astype(int)  # Convert boolean array to int (1 for intersecting points, 0 elsewhere)
+
+
+def _run_segmentation(
+    foreground, verbose, min_size,
+    # blocking shapes for parallel computation
+    block_shape=(128, 256, 256),
+):
+
+    # get the segmentation via seeded watershed
+    t0 = time.time()
+    seg = parallel.label(foreground > 0.5, block_shape=block_shape, verbose=verbose)
+    if verbose:
+        print("Compute connected components in", time.time() - t0, "s")
+
+    # size filter
+    t0 = time.time()
+    ids, sizes = parallel.unique(seg, return_counts=True, block_shape=block_shape, verbose=verbose)
+    filter_ids = ids[sizes < min_size]
+    seg[np.isin(seg, filter_ids)] = 0
+    if verbose:
+        print("Size filter in", time.time() - t0, "s")
+    seg = np.where(seg > 0, 1, 0)
+    return seg
+
+
+def segment_active_zone(
+    input_volume: np.ndarray,
+    model_path: Optional[str] = None,
+    model: Optional[torch.nn.Module] = None,
+    tiling: Optional[Dict[str, Dict[str, int]]] = None,
+    min_size: int = 500,
+    verbose: bool = True,
+    return_predictions: bool = False,
+    scale: Optional[List[float]] = None,
+    mask: Optional[np.ndarray] = None,
+    compartment: Optional[np.ndarray] = None,
+) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """Segment active zones in an input volume.
+
+    Args:
+        input_volume: The input volume to segment.
+        model_path: The path to the model checkpoint if `model` is not provided.
+        model: Pre-loaded model. Either `model_path` or `model` is required.
+        tiling: The tiling configuration for the prediction.
+        verbose: Whether to print timing information.
+        scale: The scale factor to use for rescaling the input volume before prediction.
+        mask: An optional mask that is used to restrict the segmentation.
+        compartment:
+
+    Returns:
+        The foreground mask as a numpy array.
+    """
+    if verbose:
+        print("Segmenting AZ in volume of shape", input_volume.shape)
+    # Create the scaler to handle prediction with a different scaling factor.
+    scaler = _Scaler(scale, verbose)
+    input_volume = scaler.scale_input(input_volume)
+
+    # Rescale the mask if it was given and run prediction.
+    if mask is not None:
+        mask = scaler.scale_input(mask, is_segmentation=True)
+    pred = get_prediction(input_volume, model_path=model_path, model=model, tiling=tiling, mask=mask, verbose=verbose)
+
+    # Run segmentation and rescale the result if necessary.
+    foreground = pred[0]
+    print(f"shape {foreground.shape}")
+
+    segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size)
+
+    # returning prediciton and intersection not possible atm, but currently do not need prediction anyways
+    if return_predictions:
+        pred = scaler.rescale_output(pred, is_segmentation=False)
+        return segmentation, pred
+
+    if compartment is not None:
+        intersection = find_intersection_boundary(segmentation, compartment)
+        return segmentation, intersection
+
+    return segmentation
diff --git a/synaptic_reconstruction/inference/util.py b/synaptic_reconstruction/inference/util.py
index cedfb07..5a799f3 100644
--- a/synaptic_reconstruction/inference/util.py
+++ b/synaptic_reconstruction/inference/util.py
@@ -332,7 +332,7 @@ def inference_helper(
         mask_files, _ = _get_file_paths(mask_input_path, mask_input_ext)
         assert len(input_files) == len(mask_files)
 
-    for i, img_path in tqdm(enumerate(input_files), total=len(input_files)):
+    for i, img_path in tqdm(enumerate(input_files), total=len(input_files), desc="Processing files"):
         # Determine the output file name.
         input_folder, input_name = os.path.split(img_path)
 
@@ -350,7 +350,12 @@ def inference_helper(
         # Check if the output path is already present.
         # If it is we skip the prediction, unless force was set to true.
         if os.path.exists(output_path) and not force:
-            continue
+            if output_key is None:
+                continue
+            else:
+                with open_file(output_path, "r") as f:
+                    if output_key in f:
+                        continue
 
         # Load the input volume. If we have extra_files then this concatenates the
         # data across a new first axis (= channel axis).
diff --git a/synaptic_reconstruction/tools/cli.py b/synaptic_reconstruction/tools/cli.py
index bcb3085..54a52a3 100644
--- a/synaptic_reconstruction/tools/cli.py
+++ b/synaptic_reconstruction/tools/cli.py
@@ -83,6 +83,7 @@ def imod_object_cli():
 
 # TODO: handle kwargs
 # TODO: add custom model path
+# TODO: enable autoscaling from input resolution
 def segmentation_cli():
     parser = argparse.ArgumentParser(description="Run segmentation.")
     parser.add_argument(
@@ -117,15 +118,24 @@ def segmentation_cli():
     parser.add_argument(
         "--data_ext", default=".mrc", help="The extension of the tomogram data. By default .mrc."
     )
+    parser.add_argument(
+        "--segmentation_key", "-s", help=""
+    )
+    # TODO enable autoscaling
+    parser.add_argument(
+        "--scale", type=float, default=None, help=""
+    )
     args = parser.parse_args()
 
     model = get_model(args.model)
     tiling = parse_tiling(args.tile_shape, args.halo)
+    scale = None if args.scale is None else 3 * (args.scale,)
 
     segmentation_function = partial(
-        run_segmentation, model=model, model_type=args.model, verbose=False, tiling=tiling,
+        run_segmentation, model=model, model_type=args.model, verbose=False, tiling=tiling, scale=scale
     )
     inference_helper(
         args.input_path, args.output_path, segmentation_function,
         mask_input_path=args.mask_path, force=args.force, data_ext=args.data_ext,
+        output_key=args.segmentation_key,
     )
diff --git a/synaptic_reconstruction/tools/util.py b/synaptic_reconstruction/tools/util.py
index 4ab9ca5..6f0cbad 100644
--- a/synaptic_reconstruction/tools/util.py
+++ b/synaptic_reconstruction/tools/util.py
@@ -6,8 +6,10 @@
 import numpy as np
 import pooch
 
-from ..inference.vesicles import segment_vesicles
+from ..inference.active_zone import segment_active_zone
+from ..inference.compartments import segment_compartments
 from ..inference.mitochondria import segment_mitochondria
+from ..inference.vesicles import segment_vesicles
 
 
 def load_custom_model(model_path: str, device: Optional[Union[str, torch.device]] = None) -> torch.nn.Module:
@@ -86,9 +88,9 @@ def run_segmentation(
     elif model_type == "mitochondria":
         segmentation = segment_mitochondria(image, model=model, tiling=tiling, scale=scale, verbose=verbose)
     elif model_type == "active_zone":
-        raise NotImplementedError
+        segmentation = segment_active_zone(image, model=model, tiling=tiling, scale=scale, verbose=verbose)
     elif model_type == "compartments":
-        raise NotImplementedError
+        segmentation = segment_compartments(image, model=model, tiling=tiling, scale=scale, verbose=verbose)
     elif model_type == "inner_ear_structures":
         raise NotImplementedError
     else: