From 8b5b4e26312df7f87a1c9840d81ac4eedbf5d27a Mon Sep 17 00:00:00 2001 From: DavidHerreros Date: Fri, 20 Sep 2024 08:10:36 +0200 Subject: [PATCH] New Zernike3D distance implementation --- docs/setup_zernike3d_distance.md | 51 +++++++++++++ .../_map_to_map/map_to_map_distance.py | 73 +++++++++++++++++++ .../_map_to_map/map_to_map_pipeline.py | 2 + .../data/_validation/output_validators.py | 2 + 4 files changed, 128 insertions(+) create mode 100644 docs/setup_zernike3d_distance.md diff --git a/docs/setup_zernike3d_distance.md b/docs/setup_zernike3d_distance.md new file mode 100644 index 0000000..90384e8 --- /dev/null +++ b/docs/setup_zernike3d_distance.md @@ -0,0 +1,51 @@ +

How to setup Zernike3D distance?

+ +

+ +Supported Python versions +GitHub Downloads (all assets, all releases) +GitHub branch check runs +GitHub License + +

+ +

+ +Flexutils + +

+ + + +Zernike3D distance relies on the external software **[Flexutils](https://github.com/I2PC/Flexutils-Toolkit)**. The following document includes the installation guide to setup this software in your machine, as well as some guidelines on the parameters and characteristics of the Zernike3D distance. + +# Flexutils installation +**Flexutils** can be installed in your system with the following commands: + +```bash +git clone https://github.com/I2PC/Flexutils-Toolkit.git +cd Flexutils-Toolkit +bash install.sh +``` + +Any errors raised during the installation of the software or the computation of the Zernike3D distance can be reported through Flexutils GitHub issue [webpage](https://github.com/I2PC/Flexutils-Toolkit/issues). + +# Defining the config file parameters +Zernike3D distance relies on the approximation of a deformation field between two volumes to measure their similarity metric. A detailed explanation on the theory behind the computation of these deformation fields is provided in the following publications: [Zernike3D-IUCRJ](https://journals.iucr.org/m/issues/2021/06/00/eh5012/) and [Zernike3D-NatComm](https://www.nature.com/articles/s41467-023-35791-y). + +The software follows a neural network approximation, so the usage of a GPU is strongly recommended. + +The Zernike3D distance requires a set of additional execution parameters that need to be supplied through the `config_map_to_map.yaml` file passed to the distance compution step. These additional parameters are presented below: + +- **gpuID**: An integer larger than 0 determining the GPU to be used to train the Zernike3Deep neural network. +- **tmpDir**: A path to a folder needed to store the intermediate files generated by the software. This folder is **NOT** emptied once the execution finishes. +- **thr**: An integer larger than 0 determining the number of processes to use during the execution of the software. + +```yaml + metrics: + - zernike3d + zernike3d_extra_params: + gpuID: 0 + tmpDir: where/to/save/intermediate/files/folder + thr: 20 +``` diff --git a/src/cryo_challenge/_map_to_map/map_to_map_distance.py b/src/cryo_challenge/_map_to_map/map_to_map_distance.py index 3021db5..2804653 100644 --- a/src/cryo_challenge/_map_to_map/map_to_map_distance.py +++ b/src/cryo_challenge/_map_to_map/map_to_map_distance.py @@ -1,3 +1,5 @@ +import os +import subprocess import math import torch from typing import Optional, Sequence @@ -398,3 +400,74 @@ def res_at_fsc_threshold(fscs, threshold=0.5): res_fsc_half, fraction_nyquist = res_at_fsc_threshold(fsc_matrix) self.stored_computed_assets = {"fraction_nyquist": fraction_nyquist} return units_Angstroms[res_fsc_half] + + +class Zernike3DDistance(MapToMapDistance): + """Zernike3D based distance. + + Zernike3D distance relies on the estimation of the non-linear transformation needed to align two different maps. + The RMSD of the associated non-linear alignment represented as a deformation field is then used as the distance + between two maps + """ + + @override + def get_distance_matrix(self, maps1, maps2, global_store_of_running_results): + gpuID = self.config["analysis"]["zernike3d_extra_params"]["gpuID"] + outputPath = self.config["analysis"]["zernike3d_extra_params"]["tmpDir"] + thr = self.config["analysis"]["zernike3d_extra_params"]["thr"] + + # Create output directory + if not os.path.isdir(outputPath): + os.mkdir(outputPath) + + # # Unmasking if mask->do is true + # if self.config["data"]["mask"]["do"]: + # mask = ( + # mrcfile.open(self.config["data"]["mask"]["volume"]).data.astype(bool).flatten() + # ) + # aux = np.zeros([maps2.shape[0], maps1.shape[1]]) + # aux[:, mask] = maps2 + # maps2 = aux + + # Prepare data to call external + targets_paths = os.path.join(outputPath, "target_maps.npy") + references_path = os.path.join(outputPath, "reference_maps.npy") + if not os.path.isfile(targets_paths): + np.save(targets_paths, maps1) + if not os.path.isfile(references_path): + np.save(references_path, maps2) + + # Check conda is in PATH (otherwise abort as external software is not installed) + try: + subprocess.check_call("conda", shell=True, stdout=subprocess.PIPE) + except: + raise Exception("Conda not found in PATH... Aborting") + + # Check if conda env is installed + env_installed = subprocess.run( + r"conda env list | grep 'flexutils-tensorflow '", + shell=True, check=False, stdout=subprocess.PIPE).stdout + env_installed = bool(env_installed.decode("utf-8").replace('\n', '').replace("*", "")) + if not env_installed: + raise Exception("External software not found... Aborting") + + # Find conda executable (needed to activate conda envs in a subprocess) + condabin_path = subprocess.run(r"which conda | sed 's: ::g'", shell=True, check=False, + stdout=subprocess.PIPE).stdout + condabin_path = condabin_path.decode("utf-8").replace('\n', '').replace("*", "") + + # Call external program + subprocess.check_call(f'eval "$({condabin_path} shell.bash hook)" &&' + f' conda activate flexutils-tensorflow && ' + f'compute_distance_matrix_zernike3deep.py --references_file {references_path} ' + f'--targets_file {targets_paths} --out_path {outputPath} --gpu {gpuID} ' + f'--thr {thr}', shell=True) + + # Read distance matrix + dists = np.load(os.path.join(outputPath, "dist_mat.npy")).T + self.stored_computed_assets = {"zernike3d": dists} + return dists + + @override + def get_computed_assets(self, maps1, maps2, global_store_of_running_results): + return self.stored_computed_assets # must run get_distance_matrix first diff --git a/src/cryo_challenge/_map_to_map/map_to_map_pipeline.py b/src/cryo_challenge/_map_to_map/map_to_map_pipeline.py index 06ce66f..78686ef 100644 --- a/src/cryo_challenge/_map_to_map/map_to_map_pipeline.py +++ b/src/cryo_challenge/_map_to_map/map_to_map_pipeline.py @@ -9,6 +9,7 @@ L2DistanceNorm, BioEM3dDistance, FSCResDistance, + Zernike3DDistance, ) @@ -18,6 +19,7 @@ "l2": L2DistanceNorm, "bioem": BioEM3dDistance, "res": FSCResDistance, + "zernike3d": Zernike3DDistance, } diff --git a/src/cryo_challenge/data/_validation/output_validators.py b/src/cryo_challenge/data/_validation/output_validators.py index 9f76a6d..e81a363 100644 --- a/src/cryo_challenge/data/_validation/output_validators.py +++ b/src/cryo_challenge/data/_validation/output_validators.py @@ -31,6 +31,7 @@ class MapToMapResultsValidator: bioem: Optional[dict] = None fsc: Optional[dict] = None res: Optional[dict] = None + zernike3d: Optional[dict] = None def __post_init__(self): validate_input_config_mtm(self.config) @@ -151,6 +152,7 @@ class DistributionToDistributionResultsValidator: res: Optional[dict] = None l2: Optional[dict] = None corr: Optional[dict] = None + zernike3d: Optional[dict] = None def __post_init__(self): validate_input_config_disttodist(self.config)