From 53e6c9b135f24e40cdbeeb2dcf43582b3d9cf9b8 Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Sun, 1 Dec 2024 13:14:48 +0100 Subject: [PATCH] Add active zone inference --- scripts/aggregate_data_information.py | 53 +++- .../segment_mitochondria.py | 36 ++- .../data_summary/vesicle_training_data.xlsx | Bin 14745 -> 4941 bytes scripts/prepare_zenodo_uploads.py | 246 ++++++++++++++++++ .../inference/active_zone.py | 122 +++++++++ synaptic_reconstruction/inference/util.py | 9 +- synaptic_reconstruction/tools/cli.py | 12 +- synaptic_reconstruction/tools/util.py | 8 +- 8 files changed, 474 insertions(+), 12 deletions(-) create mode 100644 scripts/prepare_zenodo_uploads.py create mode 100644 synaptic_reconstruction/inference/active_zone.py diff --git a/scripts/aggregate_data_information.py b/scripts/aggregate_data_information.py index 7086b23..6ce446a 100644 --- a/scripts/aggregate_data_information.py +++ b/scripts/aggregate_data_information.py @@ -200,7 +200,7 @@ def active_zone_train_data(): "01": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete", # noqa "04": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/04_hoi_stem_examples", # noqa "06": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/06_hoi_wt_stem750_fm", # noqa - "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/20241021_imig_2014_data_transfer_exported_grouped", # noqa + "12": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation", # noqa } test_tomograms = { @@ -467,11 +467,58 @@ def get_image_sizes_tem_2d(): print(f["raw"].shape) +def mito_train_data(): + train_root = "/scratch-grete/projects/nim00007/data/mitochondria/cooper/fidi_down_s2" + test_tomograms = [ + "36859_J1_66K_TS_CA3_MF_18_rec_2Kb1dawbp_crop_downscaled.h5", + "3.2_downscaled.h5", + ] + all_tomos = sorted(glob(os.path.join(train_root, "*.h5"))) + + tomo_names = [] + tomo_condition = [] + tomo_mitos = [] + tomo_resolution = [] + tomo_train = [] + + for tomo in all_tomos: + fname = os.path.basename(tomo) + split = "test" if fname in test_tomograms else "train/val" + if "36859" in fname or "37371" in fname: # This is from the STEM dataset. + condition = stem + resolution = 2 * 0.868 + else: # This is from the TEM Single-Axis Dataset + condition = single_ax_tem + # These were scaled, despite the resolution mismatch + resolution = 2 * 1.554 + + with h5py.File(tomo, "r") as f: + seg = f["labels/mitochondria"][:] + n_mitos = len(np.unique(seg)) - 1 + + tomo_names.append(tomo) + tomo_condition.append(condition) + tomo_train.append(split) + tomo_resolution.append(resolution) + tomo_mitos.append(n_mitos) + + df = pd.DataFrame({ + "tomogram": tomo_names, + "condition": tomo_condition, + "resolution": tomo_resolution, + "used_for": tomo_train, + "mito_count_all": tomo_mitos, + }) + + os.makedirs("data_summary", exist_ok=True) + df.to_excel("./data_summary/mitochondria.xlsx", index=False) + + def main(): # active_zone_train_data() # compartment_train_data() - # mito_train_data() - vesicle_train_data() + mito_train_data() + # vesicle_train_data() # vesicle_domain_adaptation_data() # get_n_images_frog() diff --git a/scripts/cooper/full_reconstruction/segment_mitochondria.py b/scripts/cooper/full_reconstruction/segment_mitochondria.py index 395de78..cb82275 100644 --- a/scripts/cooper/full_reconstruction/segment_mitochondria.py +++ b/scripts/cooper/full_reconstruction/segment_mitochondria.py @@ -8,23 +8,53 @@ ROOT = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/04_full_reconstruction" # noqa MODEL_PATH = "/scratch-grete/projects/nim00007/models/exports_for_cooper/mito_model_s2.pt" # noqa +# MODEL_PATH = "/scratch-grete/projects/nim00007/models/luca/mito/source_domain" + def run_seg(path): + + out_folder = "./mito_seg" + ds, fname = os.path.split(path) + ds = os.path.basename(ds) + + os.makedirs(os.path.join(out_folder, ds), exist_ok=True) + out_path = os.path.join(out_folder, ds, fname) + if os.path.exists(out_path): + return + with h5py.File(path, "r") as f: - if "labels/mitochondria" in f: - return raw = f["raw"][:] scale = (0.5, 0.5, 0.5) seg = segment_mitochondria(raw, model_path=MODEL_PATH, scale=scale, verbose=False) - with h5py.File(path, "a") as f: + with h5py.File(out_path, "a") as f: + f.create_dataset("labels/mitochondria", data=seg, compression="gzip") + + +def run_seg_and_pred(path): + with h5py.File(path, "r") as f: + raw = f["raw"][:] + + scale = (0.5, 0.5, 0.5) + seg, pred = segment_mitochondria( + raw, model_path=MODEL_PATH, scale=scale, verbose=False, return_predictions=True + ) + + out_folder = "./mito_pred" + os.makedirs(out_folder, exist_ok=True) + out_path = os.path.join(out_folder, os.path.basename(path)) + + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=raw[::2, ::2, ::2]) f.create_dataset("labels/mitochondria", data=seg, compression="gzip") + f.create_dataset("pred", data=pred, compression="gzip") def main(): paths = sorted(glob(os.path.join(ROOT, "**/*.h5"), recursive=True)) for path in tqdm(paths): run_seg(path) + # run_seg_and_pred(path) main() diff --git a/scripts/data_summary/vesicle_training_data.xlsx b/scripts/data_summary/vesicle_training_data.xlsx index 0f9ee1e82eeb9566bdfd8f76fc9b951258dc404a..57fb145e044ec104849f3b1e8f5b23524fe75e12 100644 GIT binary patch delta 992 zcmbPPd{&J&z?+#xgn@y9gTcJKY9jC2dLR{PFBCQJHBj(0BLjmVkWR@@4k*emDArHT zFG|&`$jw;0p)@v)fq zef<4n<64tb+)tV8*q<8)v=^Gry63j=Pn&GV9)^xr4Q|J51kGBl#e0@~;i>)*R4@Bf zZlO+>6wjn9lT>^TozZtI3(MN7@z?!DdSh6Ft<)Bg#^*BYbP^3do^P{^JYiVIZOY?p z7h*i!?9qpBUw#%dY=SzVX&*n?1i7&w6FGiQ|QnkK1A= zU4Qki?)QOPn>4OVK97vduT;&;JG!fGevG~Ruk}ukOSZrMm$--H7F*rl=--?uLA3cA zqaB+ND3DZstlyFk3?M#c1_lX+$@&&j^|g~v`W-e9aHv=P#Qvb^$Re#ymuLZ*2?87z zVR=TLIywFu)LH(`+qhfF(SPRKv(G&37S;31GNi^|+Y!*ww_?7R3TUo1^;2;F%)IGbB9qhx0LGJ)&MFV8les1#?~^U&ekBLnRt-&fqZ-a6yR zyaNudhOLjztDiYIV@9X-&l@VDzZXrknX>(5{^dV+?s>7!SX_D8w6t}HP$X}!6p!u0 z3ijDE&wba`6Ip5W-6GA7FZx-iNYGakoAzw+@UK4e_SVmO9q5^{@$0E)lXgtcn0&Qu zj;y+N$=1HZ{~FKrFP3AP^-#X7aE>g~6>qzV_9tdPUUIeNG#9(Ocj}q^xKFpX6-CBWfEAMf2iqAaK_g=JyC!xOC!&=qu&pUCAdIprJ ziQLR(aE^%?7&nt|8$AQjn~xYrF@gmYO&@^;*vz~k0wLzsobaSRd4{CFC6zg; z#h^?R;LXS+!VJ%Bk=xy0cO3vK6$5Gq>V|^`#wMQ08!RNjE;?f&&6LeM`K5&{n9FG? f&2)lqvVo}#Kbmoq1A$U&`6uUDszwFcimjXkUk%K<8eN7688g|)A2UZqA?_`Z zG0kcYzE&|F-r1Jz9`*g(&UGUW`$1~y$0@d;c7)0B9if@EdBF%7FUMASjDeyWSXVdC z?4R$G&SMc@r1s;3?S76_)*9gzV6@@2MEJkc4%^DEGFSN;Io^w9g(2)bu7;)WSX1sl zm?KWPVoKLB%s$LXusqPCI~&xxx!8Kx;5m2_>}(gsVj0f)g!eo@H?+lJi_!CC*U(lO z)}#iKcI}ycYadG9$)mc_qOf$s<(IokS@o6Np447)HR?Ev`j*~#_DB5*O?%}V*|TH2 z4U+$rp~^8#u?KfKx;?XKZD7X0*!RW6cn0JMYR%zh=V;~h(ag-*iR12t>n>HPGmcap z(E$blzdS*X0Oews)cYI^aR z>89oe*xBax6uBFXU1Wlt7*k`jcQjGpdfR1p5mCQE z-fx{5=?%1~?nrC$zR5f4*p<@}TlTz2ysZ_wab>=_+S(sG-(Oi-?P$L0NG?mGp{^Iw zr!kllC#&^5>o4oDix?Y={7(J(yt&^j%`U;KCl+?2dQ-1-kNWx5{=v>!Z}0J~eq$}{ z^!PB)e3lAUceQ+RL&M*EeSCIa4SQZq*4)_8?0I}}vQlkP9of87lu$-ABrmh`KDUQCxO;hJ!hXAX$mupV zyJuN7K+|uJnrX$aIXVfi#Hk1a%Vxv#ZfKM@h_q;C^#PaiV0ts6m#YRh-W#{Sb_*is z;tdRP%Gw>E=L=dWiuyXQvxvsCZn**xdlLqY7|IVK7m*qf%*&HH)i(vhFY&!zpZg=shirf=M{u0sY|8FVWNrL zLh9v3WTYBL$}nwHXT`h|v#rW-^11+R~pV>!TM`?gl44)FXbyRRh8P71=_1gK)G zn+HC09m%8*RV>J)|I%uo)O8?fwqF3b!^rM17yur>m3bhY{!L6p$BUImIb#?jZY0A` z?|Xh{ru?-|^e=Z21tt*RAat*m?dN?bY(ueCZVKDt0I9gt4Uf{0Kif=r@3w>2#i@wb z1$)AO%2HEV7vo9Ci4GHeBGuPSuMD19Dw>h3BXnYL7HRAb=UvbLW^!%%XP)3RxvF3m zw$wcoMXFq?M?66mL{@z>vs^IyhqIt=>J3-(9w4hnxKE!@(6w0TO;;EA& zgJEoYFOoT|wgOC@XYN7SGe?jm(Iz%!^0c(;0=Rue}hd{Dz=7X>$Zugw;NQ3G6J{&ux{n)jt#9mVJ&;@5B|=wy!CP!B1eH zVtMe)dcp_x{ki!^v$OoaMf*L;(B;d}^~^|-ePLp2iW)=b%W3d*z%MHk*8+}eOF#vf z-g&`|_Clqy-Q*FBnI7H^hKF*%@pniC-h#=Mz)mI#0)<-{@qVlgev>T53;|_u{B={n zlEbO$kxj0$S?`{1pyffPwOs9)Ty2zU`!p`K5(~f*`L^|5ykFkb;N3*5<9?hYT@H|7 z0i-H=J!4h*gdI#{e=Ni*u#g+DkgIkfD18@M%wV=avBv9d4%SArtTk8*)t(8}MkxZ4 zR7%WfN4^zPm=wHtH4>}9L~g)Dt_tMIcd-D-6Ts?~V%jWHWOU_A)b&i%RmMZo!9$th zcMHvzTmS-z{nWA=_docHzwBE@yCUU^`B?&pdk0(HZ0tS32IqG3Grj_t$eF4;EiX`b zB1m<>QRvZ4=*3W0OO=boc-6sQt7S~LC}c()j?zoO8=~yU>rj@!j%J75c(%79=6s@^KxYYEj^GhAi*kPBDr#41EI046iohTa@ zO@sqP7*Nm2@&)_(MT$qullKn);;;C=rdyHPgZcr!E~q5O^g78)S>A4!c| zKzH3e9-!-nklLg31WESejE8^aSA267?=*YR&cyV*6@5CQrDvEfW*y_c$8;E!v`2sv zfyUfvCXHJ!jB4r4w~N@uz}%0R4rP)aJ28|6-N8+anJe*|m)}DENLKzN;uRwZ`NR0a`ULBa?Nhy=p~b z{{EWA1u3zkbN9_!aJU*o+IRnJ<{~&j+JHTh5z5IA`!@|Fk*vZaZ&g6K3anXBoBo7u z2GJpOI9^U=yCb4Z3E{NE^ODs_1@_1$U24nc1n8EiMYqIS3E8ose8B!_<@)mAST~8- zpZr1E_iIsfEN-}nXHJMYoGbF}3kiWtz!iA%Or>xP%bTO=(2X{AgRYreVS{Ud4lp(hr4XU6VeL7?u0b{xpPR`oi+363c_due=ja-AGLSSnghTmt%F z5BdpR27`{{p4Aet%I=}ucxHoF{fs#1grh?wiB#=gu?KncV$_E#rwKn)18UjNPWQ7yspk~?uDcogyc1+J;{bhuU{hYwL4!q z(Z0%h)oy&KBxFD;aP>DCBV|tvZDIE!^APBk)fnA+Gh#HECTthue!_Gp@eK#P#;E^} zz1rd6XZ99I{$7#GBzL+AAUQ%vbHJkW(N}`Q`N5dy@$Xg@c>0^n^<@B6Ks51RQGyz0 zXy%80aFT_=YmV2008a$@5#8riANio*8XlOX$U?RNfI-!#1ufcY1KW;M$?|d$y|RiI zg-4UU4Z$lDVneKEN=m7!y*BkYTiW|pudq2w+ zGm_p=jwCJB0Q80sY>wU#>`Y4SdVyigs>|2gaF*oGgl7IHe03e3sjSujF_Gkt`ksRg zF|5DQmGK&#Z8_2@qR83r|D+)mUUM{k)VJimYnwrIN(W>e()K&|-Z;blBiRd#>iRS&QyCRs*7GG(}}@p=Ocn6OMy*ryp{o58Dl(<8{+RUW?*Mt z(HVZvHWp}-zlU42v6&mEW$>~rJC2JV=nY^8L-)pPoys5qe%5Agtd0 z$q&lOgyHq&#%O6VF~0`gX)*zs6DTYe?9>gg%o?gISQ^nl;WbKg0=}rB0ACajHN;7C zymmMLWmVDsUl!m=@O$a?_=bN;sWP1C!vH`ciQuLx=vZMD`Wbu4dL^S9oiZM^L?ul; z7L(l%gG3yR#<075Tu z+@hE-9LR~T`|!76y4A!xCA~7W`y;0epxhEs7*UH9=K<5}%UVOnpy*AVmd5WCyk=E| z7FSewb1`4avL{CFXq$I5)bi(F5`pXLGdPJ1&(1$5_3EJw9&_9o?5Q+kY|W;m8ICYq z>i*;k{k8O%VeGfRhZRrKR9Nhf_SGa#Zo#vSBpdxZ5;vL@3!6)8q+tmTKee7Wciel& zSKo;C!|Eg2kCN@nXaaJ?rOY3U0XGbY^|@pG{q$YTaZ9ZHhsmdZou8#$ci{|-O{`tD zpy;HUNDcGI)BjAgv#10lMh{Fhfea-s$rTB?99>-V7!3V-pBp?|9-^rXL1WB@66DX& z7z_YkrL$UVb~q!%r6nvb`+V?v4F!OxMkZ2MdcFNkAh`|?p=`i;^`R)2uh+f zCR5pc3-^MBV+hE<{etuPHYpiOINj;Rx#nr4v!;|#BAYlHmr}Doq}!1r2AICW-^S{e z%&b4l$DhI@2IZwn$WA`EX!6B_^%~zPPNk0e*6kTj?>l>DSClvt9c^Qid~UaQ42mhL z76#pOnI|C<5Zw>|fs$~z6!XJtG!YP7O4(xF5qtTM-uP_1GXiPXBF}kl?uYlv6z#pn zw3oMEU3qG>`h4_`Y>O)AInoa=^$x23yL0q6KA>`9UPliK%U(_1BZD;68cN7hC=#kT z^17upr?Xo}W|S>BYq}DHzo6VGARk|~qUhvGH+FcQ4+ku>*ciMhz#l>t%ho&s&)ITj z1V3aBd{J9i-|wJI{-Z2LgB;u)$Y_lN-xWp-CpWk#<)%T^?9hVbJiqbB zQ_#B0^sE4!j+vj83}p=D|6DY#019;j9=_ftVq&46$>CDJ?+INbgvq)E?6 z0*4HpmpHdHlMHritvU5)t8xBjMhBw!B$MU90!0J`yLxq; z5!0=hzsKUfvUxN|eHqw`G zBw*IkRD}B^&Jk@!j-rH=jdZlv5?Qq$HqS6I@p3?%f>bJ&X^V~s-Q-p&J6xIV9`+T* zk_C!D`j%zR`__cgo@;4~P6*wsJG7oE>6On#62t5l{}#e$AVq-<@s4%QT1~*{<0vt! zN$<8QCw%WV{~AJVkm41dkhE^SIqVKB6r4&vdWOyD?DBKVzMX4Q&^8eDUD*2QtIyHk z=M$3b>Zk$#<~?BuW@{@&pv9^HHF)fxe2@~0zV_;V6mq#exhiT1v%B@ag+$-J2&8n~ zD2xAI_NSy)^+Ex5GyMM6rDW>d7N0(qKc(AuP+Zmh?6}vxKDcAB)}<+Gv9JKV0l}s~ z6t9@|k(7^mZ^|QDFNNWMTFyDoM+5O5c?b&L>(ux&O^Kx{CrAOxF01{^Ec3#0?uK(; zd6DY2b?G@R-{-e)9fAA~2)Z~yk96e_GGR?D47nCtM99_PS_Ar%i< zZQ5KP9D1a9D{Afr=IFltcTh-(Qo!qQxNysm5HhsS4Oe~29>ik!B`9#f-qB}Z?eRu( zmZn)~^dF%x1Fnp0Y_T9H!6RmzGYDZ?(}bnoXcKMb;?*+}RiVEDK^B8($`(LL8r2V4 zK^(F+4HRbDzlC55ONuxKz_IEeV4@2`QJj(xOle6GH#DjVpo)(* zTsb>5v>Y0U@U(tEEr0jBRZC!kS$%^3xrA3~kcN&4v3i@sSh2kn1k*}VfAZ%NM#j?P zjVa8`Zm<*0pMC5{I?Ne)%rNuk{cQ^Xw)J?jBuAs|8zO)PBwg^L=ZV@+P_)wSuO*BW z*uu|=k}Qoj9FBK^41YimBj^qU_cgg*HlY8f`_%(x;sz4Sb<-#%l^^?6H= zZwVo!eJ(2LCHH>hm8+sdNUt1z0^p^r>8o%QrX)NtUkOrF_XT9Lwwd{uSTUHzFs@Ve z2^>4foJb`tY1Cz#>;Y*W&sq$pJOr~ECK(@sprWmU`d5?Uc(O62P_jYjx|rF-{>jQH6M@AZ;)#Xkxlh*hzf&0#b$O{e)*B@A zanlD*PxB@ah#L80$4PbAE?3~Smq&~S5#meT=ze9{ScKg(AN8?CiXre zJn^=toa#$MtDJ&9oiO;Q*VhmYJ>jn!4hP$EEvj7iMM%%L1R;cv53*g-@{Bdy$|HGr zqZJsm=L->7KSDeY?j*bF`ZWPb!V{kCYOev~KzmJ~6yAwJ3E^Inp?SrZ#iTAX#&eG~ z9i~0NJ)~+s^`awFf#B$rv`=eaZq&JR4biej@YB!3$`Du&LOs{+MC#4Z zBXhwJGPR#((fug^j~>lq({$M$c-yvLg*9J^z>*5}oc%BBBaoAQ_P{3gb2#(-cOdf6 z2Iok0R!UdeKMuZ)W1H~y9%kBK_V^E|`xS*Lgv?!OkAg)A^?&9w;r5bE=^-}{W%>J~ zbJGk#mA;6wv?W;2hZmDqHtQM?6FEM$4aL)Y)Lj*}*#^rEwY->W6NlBDttc%o?9SDPfsZJerqS7U<>_8$x{fOLmD(cj}b&M%`>aXL}bwoB_DwQU~4&a5Y#7q`B zg99aiR=iM0xK!N@HOcv(0T4IO_Ejke} zF6uXz?P`&Kc40D>M0}p}#1*~lKW19*X*m6W=R7M*4>rP@6vC>~`(|_c!pKD$&-!if z!0-v#r6W)K%a6ukKCY3?Z!fKy&gL&eTTA=Lc6@(r5E6{X604w+x`UG`+DHr8*jRWb z#aPvakqNrd%FAE)pZ!hpmm7=ypbe95gY7@mAkLqB>)W62>WNuPpLOs6Ll>@M`61<^ zv<<+>^0ja9)my36J$PYa(4;ggZdM&9#S^u60QMw*GU=A5(4DBTQ9_(XB%_~~B2Gx7 z9BI>LfB)0t^hx?#H|6y`Jav+OebF=`Byu$=EKgLcfB)gY3ykk0R~B!Ck)L6@(JJo5 zNI;gCD^1GdOAAfDp#qlXhu^o~WrFvda7Xo*kwi$x)g*!ORKh)! zw>37;{}#BEqVk8cy-Vl(=Ai+f0GB>p|Lf9oQ5uo(KES0(95pu95UqSo->$*6=B)Q8 z>61l+hrfo_fH>D-kGM{LNjrqg)##WzhpFd}g7U`~Vw9P*vMS4%GXCf%woMmt>Squ? z2Zb{*753B91%0ZPQYw|Q@Q|0xaa}=7h(~DU`%6jRcIH_Ycg)#S6Q!9yI}v*{aYbBF z`#BZF@g%v}a+a>${=P!K%j;OlkM2ZOGhL#!U8%1)pCpOx!KS9Sxb%%kCPAX$Cm$xh z*i>gKm1bIKL4Jd+*v;|DJ6>8#3}&q}8UrOhRk8yw+L-{8R1~D{tgMMeXpQ^-kfxFQ z{X;ssA{cD!@pt;5PVu<;P3OW3i|A`Yv) z#Br7ulkXIa`HgENMvO3)p_{@zW~-IAjkLe_A0|J;Om}B8o?-&P2Qk9W4eJ$t=N4}& ziO92*R^a9JhS|mk+4NV&mI&Z|aFu?r)hs*Cy4m}gpB`}6*wp7iLXge)7ocG2>a*_s zSdz5^VqNUe)Du`csLMZTXGt;CwlQHieUu6 zDI#93T~Y}EXUDKzr{$lrj{gYQo6q^e`KO!!<~xtH?OgU0@MWCpzF$9I2JrjWl!Dm4 zKb#``WzS^;pUb>)gz~6^eigZX^2F)n5?oOraj@o3eZQd^x#Gz%>H=piV3m!HuhOmp zW%tpZyVc>oYg$<@yU6q1k02UN2?kHz%H1MVN3%V5CQE714$shVK&2o~S;)}*KRO6Y z^En7S(reySYVqN+7>5#AM9eCicb$2>~4DCtknTtT|aO zLeMY-WugL{eH;~?Ezu4W(ehV*xu|nCeM9yGKtoX`s{aofLC3#}p*fbrAX~}V-h*iZ zp-wh-#E5pvmd~o-qvu6g6E+Sy2*K=l@GW|%?Aw>Uv=_``&>pmk%6tS&xZJmnn#SAEHE_9QS*$QARefsiRYGn*b1EmqQBB5yFmypLJh%!0BFusW<#P-uFmQS!w8(NAod>Fl_cj5$vhm zm_(}tNNB!Y%xC(>tNSWT`_Y- zW8MipTeYTuJ(A24kO6Kb4EK<_e(K6XD}y2}2hV-gaiEVx!Jq$eaP*1F%fH;>a+&euG$ACVf$htKpSt`6 z@nmK0xOMa(h413dLA;`EOiseU`v=6?(eonzvX`sUA_BlF*Ss*tbx`f zgBz@^Rk!sZU%||?xUv_m(?=72V=MYInGW^F`F^h_5)#Rg_{#sL^)zPLG3mMJRSBV}SES-2CdW_3)I z4teU0IHwYaC=zlW_`l{KHD}yZZ1>EnV;7RcRfF|-MzDa1JPo;SM@;avTfZI!1GKC}E%p7dRc1W5o*r|!@5pCh-W^l{no`*YB2^Y3j`xto z4w-yQzIXyQDHBJZ=)Bz83OI9&36E?XY%Jn!$~AOIuiRFi?R~ zJlBO~p;EXlf4M7-ObvN~RtXYO0b1n=st)c2e``Q)fJtT^pV&x;y*Y>;?JQQdz+^{R zZHi`=3I8Qm=(OVVBVdy;0R}8{8DNs$D>jfKt>8@Zn+#Ol{DIK9s?hRF%DvZVfP@r@nhIG z&HZ3k&WE9>p7`)P?C~82&W^1VHCAmqci`~nz2td7mW|#-NPsDr93`~hihNpJ=JAs# zdSGO$`F%h*WS;Qs~2`LX?v#wd=kaq&whM@@H;<3zF6PP%sq`V8=^JClk_8%RozC&fSnGbo;=o$wbbBUj(Bizrw(K*V`9(BSx=C z_;>&5v8}w6-obC^TZ5Pgs~0UX73IG&ZX>ZmPl| z%Y?&H0suB857VHnVDXOxD>K^fMMO^uT1-_I$MH%R&qtqu@oYmtiDDE$v;94pdD&?%Z$UJ%yzl zfMo1w22BgjWN(KGa#t4m%8@E#NX+SQ@zh_g~)d9`6SSnMFCV1?DSiH5OSDW{eok&Sa)srow8{=b`vtgG?ZoBgZ23RA z1cOi-f4yKgVRhwt{zE6>&|9(#;f21S3re)D>)x3`Ro}wI(CFEVq@$#dPj&$m?k?MH zFJCYJd;9Iy+x_xnqjjcob5V&%C$Q`tHPLl#k0e+@zg9vYy~n(J zV}x+w?u|Mk`G4QQeIktaKNmCMkSBaJ=)V$mQy3mUz`#fWt|5{DR}laE&j%9)&v-(A z_ssu)P4R!8@qGwic*1;Vj)0gF>xu);LQE-wmaUfF4EYH>M}~!gf&bs|H|^1SWmJ4=k_Sv2hgeJfu|CK?qnN*KcDi#Cy8IsqGgl6O88R(7fQ#l T|9@}KdExXVOb_1iqmBI^umugi diff --git a/scripts/prepare_zenodo_uploads.py b/scripts/prepare_zenodo_uploads.py new file mode 100644 index 0000000..b642c07 --- /dev/null +++ b/scripts/prepare_zenodo_uploads.py @@ -0,0 +1,246 @@ +import os +from glob import glob +from shutil import copyfile + +import h5py +from tqdm import tqdm + +OUTPUT_ROOT = "./data_summary/for_zenodo" + + +def _copy_vesicles(tomos, out_folder): + label_key = "labels/vesicles/combined_vesicles" + os.makedirs(out_folder, exist_ok=True) + for tomo in tqdm(tomos, desc="Export tomos"): + out_path = os.path.join(out_folder, os.path.basename(tomo)) + if os.path.exists(out_path): + continue + + with h5py.File(tomo, "r") as f: + raw = f["raw"][:] + labels = f[label_key][:] + try: + fname = f.attrs["filename"] + except KeyError: + fname = None + + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=raw, compression="gzip") + f.create_dataset("labels/vesicles", data=labels, compression="gzip") + if fname is not None: + f.attrs["filename"] = fname + + +def _export_vesicles(train_root, test_root, name): + train_tomograms = sorted(glob(os.path.join(train_root, "*.h5"))) + test_tomograms = sorted(glob(os.path.join(test_root, "*.h5"))) + print(f"Vesicle data for {name}:") + print(len(train_tomograms), len(test_tomograms), len(train_tomograms) + len(test_tomograms)) + + train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "train", name) + _copy_vesicles(train_tomograms, train_out) + + test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "vesicles", "test", name) + _copy_vesicles(test_tomograms, test_out) + + +def _export_az(train_root, test_tomos, name): + tomograms = sorted(glob(os.path.join(train_root, "*.h5"))) + print(f"AZ data for {name}:") + + train_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "train", name) + test_out = os.path.join(OUTPUT_ROOT, "synapse-net", "active_zones", "test", name) + + os.makedirs(train_out, exist_ok=True) + os.makedirs(test_out, exist_ok=True) + + for tomo in tqdm(tomograms): + fname = os.path.basename(tomo) + if tomo in test_tomos: + out_path = os.path.join(test_out, fname) + else: + out_path = os.path.join(train_out, fname) + if os.path.exists(out_path): + continue + + with h5py.File(tomo, "r") as f: + raw = f["raw"][:] + az = f["labels/AZ"][:] + + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=raw, compression="gzip") + f.create_dataset("labels/AZ", data=az, compression="gzip") + + +# NOTE: we have very few mito annotations from 01, so we don't include them in here. +def prepare_single_ax_stem_chemical_fix(): + # single-axis-tem: vesicles + train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete" # noqa + test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete" # noqa + _export_vesicles(train_root, test_root, name="single_axis_tem") + + # single-axis-tem: active zones + train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/01_hoi_maus_2020_incomplete" # noqa + test_tomos = [ + "WT_MF_DIV28_01_MS_09204_F1.h5", "WT_MF_DIV14_01_MS_B2_09175_CA3.h5", "M13_CTRL_22723_O2_05_DIV29_5.2.h5", "WT_Unt_SC_09175_D4_05_DIV14_mtk_05.h5", # noqa + "20190805_09002_B4_SC_11_SP.h5", "20190807_23032_D4_SC_01_SP.h5", "M13_DKO_22723_A1_03_DIV29_03_MS.h5", "WT_MF_DIV28_05_MS_09204_F1.h5", "M13_CTRL_09201_S2_06_DIV31_06_MS.h5", # noqa + "WT_MF_DIV28_1.2_MS_09002_B1.h5", "WT_Unt_SC_09175_C4_04_DIV15_mtk_04.h5", "M13_DKO_22723_A4_10_DIV29_10_MS.h5", "WT_MF_DIV14_3.2_MS_D2_09175_CA3.h5", # noqa + "20190805_09002_B4_SC_10_SP.h5", "M13_CTRL_09201_S2_02_DIV31_02_MS.h5", "WT_MF_DIV14_04_MS_E1_09175_CA3.h5", "WT_MF_DIV28_10_MS_09002_B3.h5", "WT_Unt_SC_05646_D4_02_DIV16_mtk_02.h5", "M13_DKO_22723_A4_08_DIV29_08_MS.h5", "WT_MF_DIV28_04_MS_09204_M1.h5", "WT_MF_DIV28_03_MS_09204_F1.h5", "M13_DKO_22723_A1_05_DIV29_05_MS.h5", # noqa + "WT_Unt_SC_09175_C4_06_DIV15_mtk_06.h5", "WT_MF_DIV28_09_MS_09002_B3.h5", "20190524_09204_F4_SC_07_SP.h5", + "WT_MF_DIV14_02_MS_C2_09175_CA3.h5", "M13_DKO_23037_K1_01_DIV29_01_MS.h5", "WT_Unt_SC_09175_E2_01_DIV14_mtk_01.h5", "20190807_23032_D4_SC_05_SP.h5", "WT_MF_DIV14_01_MS_E2_09175_CA3.h5", "WT_MF_DIV14_03_MS_B2_09175_CA3.h5", "M13_DKO_09201_O1_01_DIV31_01_MS.h5", "M13_DKO_09201_U1_04_DIV31_04_MS.h5", # noqa + "WT_MF_DIV14_04_MS_E2_09175_CA3_2.h5", "WT_Unt_SC_09175_D5_01_DIV14_mtk_01.h5", + "M13_CTRL_22723_O2_05_DIV29_05_MS_.h5", "WT_MF_DIV14_02_MS_B2_09175_CA3.h5", "WT_MF_DIV14_01.2_MS_D1_09175_CA3.h5", # noqa + ] + _export_az(train_root, test_tomos, name="single_axis_tem") + + # chemical_fixation: vesicles + train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation" # noqa + test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation" # noqa + _export_vesicles(train_root, test_root, name="chemical_fixation") + + # chemical-fixation: active zones + train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/exported_imod_objects/12_chemical_fix_cryopreparation" # noqa + test_tomos = ["20180305_09_MS.h5", "20180305_04_MS.h5", "20180305_08_MS.h5", + "20171113_04_MS.h5", "20171006_05_MS.h5", "20180305_01_MS.h5"] + _export_az(train_root, test_tomos, name="chemical_fixation") + + +def prepare_ier(): + root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/moser/other_tomograms" + sets = { + "01_vesicle_pools": "vesicle_pools", + "02_tether": "tether", + "03_ratten_tomos": "rat", + } + + output_folder = os.path.join(OUTPUT_ROOT, "IER") + label_names = { + "ribbons": "ribbon", + "membrane": "membrane", + "presynapse": "PD", + "postsynapse": "PSD", + "vesicles": "vesicles", + } + + for name, output_name in sets.items(): + out_set = os.path.join(output_folder, output_name) + os.makedirs(out_set, exist_ok=True) + tomos = sorted(glob(os.path.join(root, name, "*.h5"))) + + print("Export", output_name) + for tomo in tqdm(tomos): + with h5py.File(tomo, "r") as f: + try: + fname = os.path.split(f.attrs["filename"])[1][:-4] + except KeyError: + fname = f.attrs["path"][1] + fname = "_".join(fname.split("/")[-2:]) + + out_path = os.path.join(out_set, os.path.basename(tomo)) + if os.path.exists(out_path): + continue + + raw = f["raw"][:] + labels = {} + for label_name, out_name in label_names.items(): + key = f"labels/{label_name}" + if key not in f: + continue + labels[out_name] = f[key][:] + + with h5py.File(out_path, "a") as f: + f.attrs["filename"] = fname + f.create_dataset("raw", data=raw, compression="gzip") + for label_name, seg in labels.items(): + f.create_dataset(f"labels/{label_name}", data=seg, compression="gzip") + + +def prepare_frog(): + root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted" + train_tomograms = [ + "block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5", + "block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5", + "block30UB_one.h5", "block10U3A_five.h5", + ] + test_tomograms = ["block10U3A_four.h5", "block30UB_five.h5"] + + output_folder = os.path.join(OUTPUT_ROOT, "frog") + output_train = os.path.join(output_folder, "train_unlabeled") + os.makedirs(output_train, exist_ok=True) + + for name in train_tomograms: + path = os.path.join(root, name) + out_path = os.path.join(output_train, name) + if os.path.exists(out_path): + continue + copyfile(path, out_path) + + output_test = os.path.join(output_folder, "test") + os.makedirs(output_test, exist_ok=True) + for name in test_tomograms: + path = os.path.join(root, name) + out_path = os.path.join(output_test, name) + if os.path.exists(out_path): + continue + copyfile(path, out_path) + + +def prepare_2d_tem(): + train_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa + test_root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicle_gt_2d/maus_2020_tem2d" # noqa + train_images = [ + "MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5", + "MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5", + "MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5" + ] + test_images = [ + "MF_05649_G-09175-C_04B.h5", "MF_05646_C-09175-B_000B.h5", + "MF_05649_G-09175-C_03.h5", "MF_05649_G-09175-C_02.h5" + ] + print(len(train_images) + len(test_images)) + + output_folder = os.path.join(OUTPUT_ROOT, "2d_tem") + + output_train = os.path.join(output_folder, "train_unlabeled") + os.makedirs(output_train, exist_ok=True) + for name in tqdm(train_images, desc="Export train images"): + out_path = os.path.join(output_train, name) + if os.path.exists(out_path): + continue + in_path = os.path.join(train_root, name) + with h5py.File(in_path, "r") as f: + raw = f["raw"][:] + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=raw, compression="gzip") + + output_test = os.path.join(output_folder, "test") + os.makedirs(output_test, exist_ok=True) + for name in tqdm(test_images, desc="Export test images"): + out_path = os.path.join(output_test, name) + if os.path.exists(out_path): + continue + in_path = os.path.join(test_root, name) + with h5py.File(in_path, "r") as f: + raw = f["data"][:] + labels = f["labels/vesicles"][:] + mask = f["labels/mask"][:] + with h5py.File(out_path, "a") as f: + f.create_dataset("raw", data=raw, compression="gzip") + f.create_dataset("labels/vesicles", data=labels, compression="gzip") + f.create_dataset("labels/mask", data=mask, compression="gzip") + + +def prepare_munc_snap(): + pass + + +def main(): + prepare_single_ax_stem_chemical_fix() + # prepare_2d_tem() + # prepare_frog() + # prepare_ier() + # prepare_munc_snap() + + +if __name__ == "__main__": + main() diff --git a/synaptic_reconstruction/inference/active_zone.py b/synaptic_reconstruction/inference/active_zone.py new file mode 100644 index 0000000..d611693 --- /dev/null +++ b/synaptic_reconstruction/inference/active_zone.py @@ -0,0 +1,122 @@ +import time +from typing import Dict, List, Optional, Tuple, Union + +import elf.parallel as parallel +import numpy as np +import torch + +from skimage.segmentation import find_boundaries +from synaptic_reconstruction.inference.util import get_prediction, _Scaler + + +def find_intersection_boundary(segmented_AZ: np.ndarray, segmented_compartment: np.ndarray) -> np.ndarray: + """ + Find the cumulative intersection of the boundary of each label in segmented_compartment with segmented_AZ. + + Args: + segmented_AZ: 3D array representing the active zone (AZ). + segmented_compartment: 3D array representing the compartment, with multiple labels. + + Returns: + Array with the cumulative intersection of all boundaries of segmented_compartment labels with segmented_AZ. + """ + # Step 0: Initialize an empty array to accumulate intersections + cumulative_intersection = np.zeros_like(segmented_AZ, dtype=bool) + + # Step 1: Loop through each unique label in segmented_compartment (excluding 0 if it represents background) + labels = np.unique(segmented_compartment) + labels = labels[labels != 0] # Exclude background label (0) if necessary + + for label in labels: + # Step 2: Create a binary mask for the current label + label_mask = (segmented_compartment == label) + + # Step 3: Find the boundary of the current label's compartment + boundary_compartment = find_boundaries(label_mask, mode='outer') + + # Step 4: Find the intersection with the AZ for this label's boundary + intersection = np.logical_and(boundary_compartment, segmented_AZ) + + # Step 5: Accumulate intersections for each label + cumulative_intersection = np.logical_or(cumulative_intersection, intersection) + + return cumulative_intersection.astype(int) # Convert boolean array to int (1 for intersecting points, 0 elsewhere) + + +def _run_segmentation( + foreground, verbose, min_size, + # blocking shapes for parallel computation + block_shape=(128, 256, 256), +): + + # get the segmentation via seeded watershed + t0 = time.time() + seg = parallel.label(foreground > 0.5, block_shape=block_shape, verbose=verbose) + if verbose: + print("Compute connected components in", time.time() - t0, "s") + + # size filter + t0 = time.time() + ids, sizes = parallel.unique(seg, return_counts=True, block_shape=block_shape, verbose=verbose) + filter_ids = ids[sizes < min_size] + seg[np.isin(seg, filter_ids)] = 0 + if verbose: + print("Size filter in", time.time() - t0, "s") + seg = np.where(seg > 0, 1, 0) + return seg + + +def segment_active_zone( + input_volume: np.ndarray, + model_path: Optional[str] = None, + model: Optional[torch.nn.Module] = None, + tiling: Optional[Dict[str, Dict[str, int]]] = None, + min_size: int = 500, + verbose: bool = True, + return_predictions: bool = False, + scale: Optional[List[float]] = None, + mask: Optional[np.ndarray] = None, + compartment: Optional[np.ndarray] = None, +) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """Segment active zones in an input volume. + + Args: + input_volume: The input volume to segment. + model_path: The path to the model checkpoint if `model` is not provided. + model: Pre-loaded model. Either `model_path` or `model` is required. + tiling: The tiling configuration for the prediction. + verbose: Whether to print timing information. + scale: The scale factor to use for rescaling the input volume before prediction. + mask: An optional mask that is used to restrict the segmentation. + compartment: + + Returns: + The foreground mask as a numpy array. + """ + if verbose: + print("Segmenting AZ in volume of shape", input_volume.shape) + # Create the scaler to handle prediction with a different scaling factor. + scaler = _Scaler(scale, verbose) + input_volume = scaler.scale_input(input_volume) + + # Rescale the mask if it was given and run prediction. + if mask is not None: + mask = scaler.scale_input(mask, is_segmentation=True) + pred = get_prediction(input_volume, model_path=model_path, model=model, tiling=tiling, mask=mask, verbose=verbose) + + # Run segmentation and rescale the result if necessary. + foreground = pred[0] + print(f"shape {foreground.shape}") + + segmentation = _run_segmentation(foreground, verbose=verbose, min_size=min_size) + + # returning prediciton and intersection not possible atm, but currently do not need prediction anyways + if return_predictions: + pred = scaler.rescale_output(pred, is_segmentation=False) + return segmentation, pred + + if compartment is not None: + intersection = find_intersection_boundary(segmentation, compartment) + return segmentation, intersection + + return segmentation diff --git a/synaptic_reconstruction/inference/util.py b/synaptic_reconstruction/inference/util.py index cedfb07..5a799f3 100644 --- a/synaptic_reconstruction/inference/util.py +++ b/synaptic_reconstruction/inference/util.py @@ -332,7 +332,7 @@ def inference_helper( mask_files, _ = _get_file_paths(mask_input_path, mask_input_ext) assert len(input_files) == len(mask_files) - for i, img_path in tqdm(enumerate(input_files), total=len(input_files)): + for i, img_path in tqdm(enumerate(input_files), total=len(input_files), desc="Processing files"): # Determine the output file name. input_folder, input_name = os.path.split(img_path) @@ -350,7 +350,12 @@ def inference_helper( # Check if the output path is already present. # If it is we skip the prediction, unless force was set to true. if os.path.exists(output_path) and not force: - continue + if output_key is None: + continue + else: + with open_file(output_path, "r") as f: + if output_key in f: + continue # Load the input volume. If we have extra_files then this concatenates the # data across a new first axis (= channel axis). diff --git a/synaptic_reconstruction/tools/cli.py b/synaptic_reconstruction/tools/cli.py index bcb3085..54a52a3 100644 --- a/synaptic_reconstruction/tools/cli.py +++ b/synaptic_reconstruction/tools/cli.py @@ -83,6 +83,7 @@ def imod_object_cli(): # TODO: handle kwargs # TODO: add custom model path +# TODO: enable autoscaling from input resolution def segmentation_cli(): parser = argparse.ArgumentParser(description="Run segmentation.") parser.add_argument( @@ -117,15 +118,24 @@ def segmentation_cli(): parser.add_argument( "--data_ext", default=".mrc", help="The extension of the tomogram data. By default .mrc." ) + parser.add_argument( + "--segmentation_key", "-s", help="" + ) + # TODO enable autoscaling + parser.add_argument( + "--scale", type=float, default=None, help="" + ) args = parser.parse_args() model = get_model(args.model) tiling = parse_tiling(args.tile_shape, args.halo) + scale = None if args.scale is None else 3 * (args.scale,) segmentation_function = partial( - run_segmentation, model=model, model_type=args.model, verbose=False, tiling=tiling, + run_segmentation, model=model, model_type=args.model, verbose=False, tiling=tiling, scale=scale ) inference_helper( args.input_path, args.output_path, segmentation_function, mask_input_path=args.mask_path, force=args.force, data_ext=args.data_ext, + output_key=args.segmentation_key, ) diff --git a/synaptic_reconstruction/tools/util.py b/synaptic_reconstruction/tools/util.py index 4ab9ca5..6f0cbad 100644 --- a/synaptic_reconstruction/tools/util.py +++ b/synaptic_reconstruction/tools/util.py @@ -6,8 +6,10 @@ import numpy as np import pooch -from ..inference.vesicles import segment_vesicles +from ..inference.active_zone import segment_active_zone +from ..inference.compartments import segment_compartments from ..inference.mitochondria import segment_mitochondria +from ..inference.vesicles import segment_vesicles def load_custom_model(model_path: str, device: Optional[Union[str, torch.device]] = None) -> torch.nn.Module: @@ -86,9 +88,9 @@ def run_segmentation( elif model_type == "mitochondria": segmentation = segment_mitochondria(image, model=model, tiling=tiling, scale=scale, verbose=verbose) elif model_type == "active_zone": - raise NotImplementedError + segmentation = segment_active_zone(image, model=model, tiling=tiling, scale=scale, verbose=verbose) elif model_type == "compartments": - raise NotImplementedError + segmentation = segment_compartments(image, model=model, tiling=tiling, scale=scale, verbose=verbose) elif model_type == "inner_ear_structures": raise NotImplementedError else: