Merge pull request #382 from ml-struct-bio/v3.3.3

v3.3.3: RELION3.1 .star filtering, interactive tilt series filtering, and fixes to backprojection
ml-struct-bio · Jun 25, 2024 · 34d5c39 · 34d5c39
2 parents 2e9c376 + 75d5a7c
commit 34d5c39
Show file tree

Hide file tree

Showing 101 changed files with 935 additions and 786 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -2,12 +2,12 @@ name: CI Testing
 
 on:
   push:
-    branches: [ main, develop ]
+    branches: [ develop ]
     tags:
       - '[0-9]+\.[0-9]+\.[0-9]+'
       - '[0-9]+\.[0-9]+\.[0-9]+-*'
   pull_request:
-    branches: [ main, develop ]
+    branches: [ main ]
 
 jobs:
   run_tests:
@@ -34,9 +34,9 @@ jobs:
           python3 -m pip install pytest-xdist
           python3 -m pip install .
           python3 -m pip uninstall -y torch
-          python3 -m pip install --no-cache-dir torch==${{ matrix.torch }}
+          python3 -m pip cache purge
+          python3 -m pip install torch==${{ matrix.torch }}
 
       - name: Pytest
         run: |
-          pytest -v -n2 --dist=loadscope -k 'test_reconstruct'
-          pytest -v -n0 --dist=loadscope -k 'not test_reconstruct'
+          pytest -v -n2 --dist=loadscope
diff --git a/.gitignore b/.gitignore
@@ -49,8 +49,6 @@ coverage.xml
 *.cover
 .hypothesis/
 
-testing/output
-
 # Translations
 *.mo
 *.pot

diff --git a/README.md b/README.md
diff --git a/cryodrgn/command_line.py b/cryodrgn/command_line.py
@@ -1,9 +1,9 @@
 """Creating commands installed with cryoDRGN for use from command line.
 
-This module searches through the `commands` and `commands_utils` folders
-for anything that matches the format of a cryoDRGN command module
-and creates a `cryodrgn <x>` command line interface for each of the
-former and a `cryodrgn_utils <x>` for each of the latter.
+Upon installation, this module searches through the `commands` and `commands_utils`
+folders for anything that matches the format of a cryoDRGN command module, and creates
+a `cryodrgn <x>` command line interface for each such found in the former
+and a `cryodrgn_utils <x>` for each found in the latter.
 
 See the `[project.scripts]` entry in the `pyproject.toml` file for how this module
 is used to create the commands during installation.
@@ -12,6 +12,7 @@
 import argparse
 import os
 from importlib import import_module
+import re
 import cryodrgn
 
 
@@ -45,6 +46,8 @@ def _get_commands(cmd_dir: str, doc_str: str = "") -> None:
                 parsed_doc = module.__doc__.split("\n") if module.__doc__ else list()
                 descr_txt = parsed_doc[0] if parsed_doc else ""
                 epilog_txt = "" if len(parsed_doc) <= 1 else "\n".join(parsed_doc[1:])
+                epilog_txt = re.sub(" +", " ", epilog_txt)
+                epilog_txt = re.sub("\n ", "\n\t ", epilog_txt)
 
                 # we add documentation text parsed from the module's docstring
                 this_parser = subparsers.add_parser(
@@ -61,15 +64,15 @@ def _get_commands(cmd_dir: str, doc_str: str = "") -> None:
 
 
 def main_commands():
-    """Commands installed with cryoDRGN."""
+    """Primary commands installed with cryoDRGN as `cryodrgn <cmd_module_name>."""
     _get_commands(
         cmd_dir=os.path.join(os.path.dirname(__file__), "commands"),
         doc_str="Commands installed with cryoDRGN",
     )
 
 
 def util_commands():
-    """Utility commands installed with cryoDRGN."""
+    """Utility commands installed with cryoDRGN as `cryodrgn_utils <cmd_module_name>."""
     _get_commands(
         cmd_dir=os.path.join(os.path.dirname(__file__), "commands_utils"),
         doc_str="Utility commands installed with cryoDRGN",

diff --git a/cryodrgn/commands/README.md b/cryodrgn/commands/README.md
@@ -0,0 +1,10 @@
+# cryoDRGN commands #
+
+This folder contains the primary commands that are installed as part of the cryoDRGN package, as well as any associated
+auxiliary files.
+
+See `cryodrgn.command_line` for how the contents of this folder are parsed as part of creating the cryoDRGN command
+line interface upon installation of the package.
+
+See also the `cryodrgn/commands_utils/` folder for the utility commands that are the other part of the cryoDRGN command
+line interface.
diff --git a/cryodrgn/commands/abinit_het.py b/cryodrgn/commands/abinit_het.py
@@ -1,5 +1,16 @@
-"""
-Heterogeneous NN reconstruction with hierarchical pose optimization
+"""Train a heterogeneous NN reconstruction model with hierarchical pose optimization.
+
+Example usages
+--------------
+# the default is to train for thirty epochs; here we train for fifty instead
+$ cryodrgn abinit_het particles.mrcs -o cryodrgn-outs/003_abinit_het --zdim 4
+                                     --ctf ctf.pkl -n 50
+
+# using .star particle input requires datadir argument pointing to image stacks
+$ cryodrgn abinit_het particles.star --datadir path_to_images/
+                                     -o cryodrgn-outs/004_abinit_het.10 --zdim 10
+                                     --ctf ctf.pkl -n 50
+
 """
 import argparse
 import os

diff --git a/cryodrgn/commands/abinit_homo.py b/cryodrgn/commands/abinit_homo.py
@@ -2,7 +2,7 @@
 
 Example usages
 --------------
-$ cryodrgn abinit_homo particles.256.txt --ctf ctf.pkl --ind chosen-particles.pkl \
+$ cryodrgn abinit_homo particles.256.txt --ctf ctf.pkl --ind chosen-particles.pkl
                                          -o cryodrn-out/256_abinit-homo
 
 """

diff --git a/cryodrgn/commands/analyze.py b/cryodrgn/commands/analyze.py
@@ -1,7 +1,16 @@
-"""
-Visualize latent space and generate volumes
-"""
+"""Visualize latent space and generate volumes using a trained cryoDRGN model.
+
+Example usages
+--------------
+$ cryodrgn analyze 003_abinit-het/ 49
 
+# it is necessary to invert handedness for some datasets
+$ cryodrgn analyze 003_abinit-het/ 99 --invert
+
+# don't run more computationally expensive analyses
+$ cryodrgn analyze 003_abinit-het/ 99 --skip-umap --skip-vol
+
+"""
 import argparse
 import os
 import os.path

diff --git a/cryodrgn/commands/backproject_voxel.py b/cryodrgn/commands/backproject_voxel.py
@@ -7,9 +7,13 @@
 
 Example usages
 ----------
-$ cryodrgn backproject_voxel particles.128.mrcs --poses pose.pkl -o backproj.128.mrc
-$ cryodrgn backproject_voxel particles.256.mrcs --poses pose.pkl
+$ cryodrgn backproject_voxel particles.128.mrcs
+                             --ctf ctf.pkl --poses pose.pkl -o backproj.128.mrc
+$ cryodrgn backproject_voxel particles.256.mrcs --ctf ctf.pkl --poses pose.pkl
                              --ind good-particles.pkl -o backproj.256.mrc --lazy
+$ cryodrgn backproject_voxel particles_from_M.star --datadir subtilts/128/
+                             --ctf ctf.pkl --poses pose.pkl
+                             -o bproj_tilt.mrc --lazy --tilt --ntilts=5
 
 """
 import argparse
@@ -58,15 +62,17 @@ def add_args(parser):
         "--reg-weight",
         type=float,
         default=1.0,
-        help="Add this value times the mean weight to the weight map to regularize the volume, reducing noise."
-        "Alternatively, you can set --output-sumcount, and then use `cryodrgn_utils regularize_backproject` on the"
-        ".sums and .counts files to try different regularization constants post hoc.",
+        help="Add this value times the mean weight to the weight map to regularize the"
+        "volume, reducing noise.\nAlternatively, you can set --output-sumcount, and "
+        "then use `cryodrgn_utils regularize_backproject` on the"
+        ".sums and .counts files to try different regularization constants post hoc.\n"
+        "(default: %(default)s)",
     )
     parser.add_argument(
         "--output-sumcount",
         action="store_true",
-        help="Output voxel sums and counts so that different regularization weights can be applied post hoc, with "
-        "`cryodrgn_utils regularize_backproject`.",
+        help="Output voxel sums and counts so that different regularization weights "
+        "can be applied post hoc, with `cryodrgn_utils regularize_backproject`.",
     )
 
     group = parser.add_argument_group("Dataset loading options")
@@ -126,7 +132,7 @@ def add_args(parser):
 
 def add_slice(volume, counts, ff_coord, ff, D, ctf_mul):
     d2 = int(D / 2)
-    ff_coord = ff_coord.transpose(0, 1)
+    ff_coord = ff_coord.transpose(0, 1).clip(-d2, d2)
     xf, yf, zf = ff_coord.floor().long()
     xc, yc, zc = ff_coord.ceil().long()
 
@@ -232,15 +238,21 @@ def main(args):
     mask = lattice.get_circular_mask(D // 2)
     iterator = range(min(args.first, Nimg)) if args.first else range(Nimg)
 
+    if args.tilt:
+        use_tilts = set(range(args.ntilts))
+        iterator = [
+            ii for ii in iterator if int(data.tilt_numbers[ii].item()) in use_tilts
+        ]
+
     volume_full = torch.zeros((D, D, D), device=device)
     counts_full = torch.zeros((D, D, D), device=device)
     volume_half1 = torch.zeros((D, D, D), device=device)
     counts_half1 = torch.zeros((D, D, D), device=device)
     volume_half2 = torch.zeros((D, D, D), device=device)
     counts_half2 = torch.zeros((D, D, D), device=device)
 
-    for ii in iterator:
-        if ii % 100 == 0:
+    for i, ii in enumerate(iterator):
+        if i % 100 == 0:
             logger.info(f"fimage {ii}")
 
         r, t = posetracker.get_pose(ii)

diff --git a/cryodrgn/commands/downsample.py b/cryodrgn/commands/downsample.py
@@ -1,7 +1,25 @@
-"""
-Downsample an image stack or volume by clipping fourier frequencies
-"""
+"""Downsample an image stack or volume by clipping fourier frequencies.
+
+Example usages
+--------------
+$ cryodrgn downsample my_particle_stack.mrcs -D 128 -o particles.128.mrcs
+$ cryodrgn downsample my_particle_stack.mrcs -D 164 -o particles.164.mrcs
+                                                    --ind chosen_particles.pkl
+$ cryodrgn downsample my_particle_stack.star -D 128 -o particles.128.mrcs
+                                             --datadir folder_with_subtilts/
+
+# try a smaller processing batch size if you are running into memory issues, or a
+# larger size for faster processing
+$ cryodrgn downsample my_particle_stack.txt -D 256 -o particles.256.mrcs -b 2000
+$ cryodrgn downsample my_particle_stack.txt -D 256 -o particles.256.mrcs -b 20000
+
+# will create files
+#       particles.256.0.mrcs, particles.256.1.mrcs, ..., particles.256.i.mrcs
+# where i is equal to particle count // 10000
+# in addition to output file particles.256.txt that indexes all of them
+$ cryodrgn downsample my_particle_stack.mrcs -D 256 -o particles.256.mrcs --chunk 10000
 
+"""
 import argparse
 import math
 import os
@@ -40,11 +58,13 @@ def add_args(parser):
     parser.add_argument(
         "--chunk",
         type=int,
-        help="Chunksize (in # of images) to split particle stack when saving",
+        help="Size of chunks (in # of images, each in its own file) to split particle "
+        "stack when saving",
     )
     parser.add_argument(
         "--datadir",
-        help="Optionally provide path to input .mrcs if loading from a .star or .cs file",
+        help="Optionally provide folder containing input .mrcs files "
+        "if loading from a .star or .cs file",
     )
     parser.add_argument(
         "--max-threads",

diff --git a/cryodrgn/commands/eval_images.py b/cryodrgn/commands/eval_images.py
@@ -3,9 +3,9 @@
 Example usages
 --------------
 
-$ cryodrgn eval_images hand.mrcs het_weights.pkl --config config.pkl \
-                        -o output/out_eval_images_losses.pkl \
-                       --out-z output/out_eval_images_z.pkl \
+$ cryodrgn eval_images hand.mrcs het_weights.pkl --config config.pkl
+                        -o output/out_eval_images_losses.pkl
+                       --out-z output/out_eval_images_z.pkl
                        --poses hand_rot.pkl --log-interval 1 --verbose
 
 """

diff --git a/cryodrgn/commands/eval_vol.py b/cryodrgn/commands/eval_vol.py
@@ -1,5 +1,16 @@
-"""
-Evaluate the decoder at specified values of z
+"""Evaluate the decoder of a heterogeneous model at given z-latent-space co-ordinates.
+
+Example usages
+--------------
+# this model used the default of zdim=8
+$ cryodrgn eval_vol 004_vae128/weights.pkl -c 004_vae128/config.yaml
+                                           -o zero-vol.mrc -z 0 0 0 0 0 0 0 0
+
+# we can instead specify a z-latent-space path instead of a single location
+# here the model was trained using zdim=4
+$ cryodrgn eval_vol 004_vae128/weights.pkl -c 004_vae128/config.yaml -o zero-vol.mrc
+                                           --z-start 0 -1 0 0 --z-end 1 1 1 1
+
 """
 import argparse
 import os

diff --git a/cryodrgn/commands/filter.py b/cryodrgn/commands/filter.py
@@ -1,23 +1,23 @@
 """Interactive filtering of particles plotted using various model variables.
 
-Note that this tool can only be used for outputs of SPA — *not* tilt series!
+Note that `cryodrgn analyze` must be run first using the epoch to filter on!
 
 Example usages
 --------------
 $ cryodrgn filter 00_trainvae
-$ cryodrgn filter outdir --epoch 20
+$ cryodrgn filter my_outdir --epoch 30
+$ cryodrgn filter my_outdir/ -k 25
+$ cryodrgn filter my_outdir/01_trainvae --plot-inds candidate-particles.pkl
 
 """
 import os
 import pickle
-import argparse
-
-import pandas as pd
 import yaml
 import re
-import numpy as np
 import logging
 
+import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from matplotlib import colors
@@ -26,8 +26,7 @@
 from matplotlib.path import Path as PlotPath
 from scipy.spatial.transform import Rotation as RR
 
-from cryodrgn import analysis
-from cryodrgn import utils
+from cryodrgn import analysis, utils
 
 logger = logging.getLogger(__name__)
 
@@ -87,6 +86,11 @@ def main(args) -> None:
         logger.info(f"Using epoch {epoch} for filtering...")
 
     anlzdir = os.path.join(workdir, f"analyze.{epoch}")
+    if not os.path.isdir(anlzdir):
+        raise ValueError(
+            f"No analysis available for epoch {epoch} "
+            f"— first run `cryodrgn analyze {workdir} {epoch}`"
+        )
     z = utils.load_pkl(os.path.join(workdir, f"z.{epoch}.pkl"))
 
     # load poses
@@ -156,20 +160,28 @@ def main(args) -> None:
             )
 
     kmeans_lbls = utils.load_pkl(os.path.join(kmeans_dir, "labels.pkl"))
-
-    plot_df = analysis.load_dataframe(
-        z=z,
-        pc=pc,
-        euler=RR.from_matrix(rot).as_euler("zyz", degrees=True),
-        trans=trans,
-        labels=kmeans_lbls,
-        umap=umap,
-        df1=ctf_params[:, 2],
-        df2=ctf_params[:, 3],
-        dfang=ctf_params[:, 4],
-        phase=ctf_params[:, 8],
-        znorm=np.sum(z**2, axis=1) ** 0.5,
-    )
+    znorm = np.sum(z**2, axis=1) ** 0.5
+
+    if rot.shape[0] == z.shape[0]:
+        plot_df = analysis.load_dataframe(
+            z=z,
+            pc=pc,
+            euler=RR.from_matrix(rot).as_euler("zyz", degrees=True),
+            trans=trans,
+            labels=kmeans_lbls,
+            umap=umap,
+            df1=ctf_params[:, 2],
+            df2=ctf_params[:, 3],
+            dfang=ctf_params[:, 4],
+            phase=ctf_params[:, 8],
+            znorm=znorm,
+        )
+    # tilt-series outputs have tilt-level CTFs and poses but particle-level model
+    # results, thus we ignore the former in this case for now
+    else:
+        plot_df = analysis.load_dataframe(
+            z=z, pc=pc, labels=kmeans_lbls, umap=umap, znorm=znorm
+        )
 
     selector = SelectFromScatter(plot_df, pre_indices)
     input("Press Enter after making your selection...")
@@ -399,9 +411,3 @@ def on_release(self, event: Event) -> None:
             self.handl_id = self.fig.canvas.mpl_connect(
                 "motion_notify_event", self.hover_points
             )
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description=__doc__)
-    args = add_args(parser).parse_args()
-    main(args)
-Original file line number
+Diff line change
@@ Expand Up / @@ -49,8 +49,6 @@ coverage.xml @@
     *.cover
     .hypothesis/
-    testing/output
     # Translations
     *.mo
     *.pot
@@ Expand Down @@