Skip to content

Commit

Permalink
Merge pull request #382 from ml-struct-bio/v3.3.3
Browse files Browse the repository at this point in the history
v3.3.3: RELION3.1 .star filtering, interactive tilt series filtering, and fixes to backprojection
  • Loading branch information
michal-g authored Jun 25, 2024
2 parents 2e9c376 + 75d5a7c commit 34d5c39
Show file tree
Hide file tree
Showing 101 changed files with 935 additions and 786 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ name: CI Testing

on:
push:
branches: [ main, develop ]
branches: [ develop ]
tags:
- '[0-9]+\.[0-9]+\.[0-9]+'
- '[0-9]+\.[0-9]+\.[0-9]+-*'
pull_request:
branches: [ main, develop ]
branches: [ main ]

jobs:
run_tests:
Expand All @@ -34,9 +34,9 @@ jobs:
python3 -m pip install pytest-xdist
python3 -m pip install .
python3 -m pip uninstall -y torch
python3 -m pip install --no-cache-dir torch==${{ matrix.torch }}
python3 -m pip cache purge
python3 -m pip install torch==${{ matrix.torch }}
- name: Pytest
run: |
pytest -v -n2 --dist=loadscope -k 'test_reconstruct'
pytest -v -n0 --dist=loadscope -k 'not test_reconstruct'
pytest -v -n2 --dist=loadscope
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ coverage.xml
*.cover
.hypothesis/

testing/output

# Translations
*.mo
*.pot
Expand Down
286 changes: 144 additions & 142 deletions README.md

Large diffs are not rendered by default.

15 changes: 9 additions & 6 deletions cryodrgn/command_line.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Creating commands installed with cryoDRGN for use from command line.
This module searches through the `commands` and `commands_utils` folders
for anything that matches the format of a cryoDRGN command module
and creates a `cryodrgn <x>` command line interface for each of the
former and a `cryodrgn_utils <x>` for each of the latter.
Upon installation, this module searches through the `commands` and `commands_utils`
folders for anything that matches the format of a cryoDRGN command module, and creates
a `cryodrgn <x>` command line interface for each such found in the former
and a `cryodrgn_utils <x>` for each found in the latter.
See the `[project.scripts]` entry in the `pyproject.toml` file for how this module
is used to create the commands during installation.
Expand All @@ -12,6 +12,7 @@
import argparse
import os
from importlib import import_module
import re
import cryodrgn


Expand Down Expand Up @@ -45,6 +46,8 @@ def _get_commands(cmd_dir: str, doc_str: str = "") -> None:
parsed_doc = module.__doc__.split("\n") if module.__doc__ else list()
descr_txt = parsed_doc[0] if parsed_doc else ""
epilog_txt = "" if len(parsed_doc) <= 1 else "\n".join(parsed_doc[1:])
epilog_txt = re.sub(" +", " ", epilog_txt)
epilog_txt = re.sub("\n ", "\n\t ", epilog_txt)

# we add documentation text parsed from the module's docstring
this_parser = subparsers.add_parser(
Expand All @@ -61,15 +64,15 @@ def _get_commands(cmd_dir: str, doc_str: str = "") -> None:


def main_commands():
"""Commands installed with cryoDRGN."""
"""Primary commands installed with cryoDRGN as `cryodrgn <cmd_module_name>."""
_get_commands(
cmd_dir=os.path.join(os.path.dirname(__file__), "commands"),
doc_str="Commands installed with cryoDRGN",
)


def util_commands():
"""Utility commands installed with cryoDRGN."""
"""Utility commands installed with cryoDRGN as `cryodrgn_utils <cmd_module_name>."""
_get_commands(
cmd_dir=os.path.join(os.path.dirname(__file__), "commands_utils"),
doc_str="Utility commands installed with cryoDRGN",
Expand Down
10 changes: 10 additions & 0 deletions cryodrgn/commands/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# cryoDRGN commands #

This folder contains the primary commands that are installed as part of the cryoDRGN package, as well as any associated
auxiliary files.

See `cryodrgn.command_line` for how the contents of this folder are parsed as part of creating the cryoDRGN command
line interface upon installation of the package.

See also the `cryodrgn/commands_utils/` folder for the utility commands that are the other part of the cryoDRGN command
line interface.
15 changes: 13 additions & 2 deletions cryodrgn/commands/abinit_het.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
"""
Heterogeneous NN reconstruction with hierarchical pose optimization
"""Train a heterogeneous NN reconstruction model with hierarchical pose optimization.
Example usages
--------------
# the default is to train for thirty epochs; here we train for fifty instead
$ cryodrgn abinit_het particles.mrcs -o cryodrgn-outs/003_abinit_het --zdim 4
--ctf ctf.pkl -n 50
# using .star particle input requires datadir argument pointing to image stacks
$ cryodrgn abinit_het particles.star --datadir path_to_images/
-o cryodrgn-outs/004_abinit_het.10 --zdim 10
--ctf ctf.pkl -n 50
"""
import argparse
import os
Expand Down
2 changes: 1 addition & 1 deletion cryodrgn/commands/abinit_homo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Example usages
--------------
$ cryodrgn abinit_homo particles.256.txt --ctf ctf.pkl --ind chosen-particles.pkl \
$ cryodrgn abinit_homo particles.256.txt --ctf ctf.pkl --ind chosen-particles.pkl
-o cryodrn-out/256_abinit-homo
"""
Expand Down
15 changes: 12 additions & 3 deletions cryodrgn/commands/analyze.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
"""
Visualize latent space and generate volumes
"""
"""Visualize latent space and generate volumes using a trained cryoDRGN model.
Example usages
--------------
$ cryodrgn analyze 003_abinit-het/ 49
# it is necessary to invert handedness for some datasets
$ cryodrgn analyze 003_abinit-het/ 99 --invert
# don't run more computationally expensive analyses
$ cryodrgn analyze 003_abinit-het/ 99 --skip-umap --skip-vol
"""
import argparse
import os
import os.path
Expand Down
32 changes: 22 additions & 10 deletions cryodrgn/commands/backproject_voxel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
Example usages
----------
$ cryodrgn backproject_voxel particles.128.mrcs --poses pose.pkl -o backproj.128.mrc
$ cryodrgn backproject_voxel particles.256.mrcs --poses pose.pkl
$ cryodrgn backproject_voxel particles.128.mrcs
--ctf ctf.pkl --poses pose.pkl -o backproj.128.mrc
$ cryodrgn backproject_voxel particles.256.mrcs --ctf ctf.pkl --poses pose.pkl
--ind good-particles.pkl -o backproj.256.mrc --lazy
$ cryodrgn backproject_voxel particles_from_M.star --datadir subtilts/128/
--ctf ctf.pkl --poses pose.pkl
-o bproj_tilt.mrc --lazy --tilt --ntilts=5
"""
import argparse
Expand Down Expand Up @@ -58,15 +62,17 @@ def add_args(parser):
"--reg-weight",
type=float,
default=1.0,
help="Add this value times the mean weight to the weight map to regularize the volume, reducing noise."
"Alternatively, you can set --output-sumcount, and then use `cryodrgn_utils regularize_backproject` on the"
".sums and .counts files to try different regularization constants post hoc.",
help="Add this value times the mean weight to the weight map to regularize the"
"volume, reducing noise.\nAlternatively, you can set --output-sumcount, and "
"then use `cryodrgn_utils regularize_backproject` on the"
".sums and .counts files to try different regularization constants post hoc.\n"
"(default: %(default)s)",
)
parser.add_argument(
"--output-sumcount",
action="store_true",
help="Output voxel sums and counts so that different regularization weights can be applied post hoc, with "
"`cryodrgn_utils regularize_backproject`.",
help="Output voxel sums and counts so that different regularization weights "
"can be applied post hoc, with `cryodrgn_utils regularize_backproject`.",
)

group = parser.add_argument_group("Dataset loading options")
Expand Down Expand Up @@ -126,7 +132,7 @@ def add_args(parser):

def add_slice(volume, counts, ff_coord, ff, D, ctf_mul):
d2 = int(D / 2)
ff_coord = ff_coord.transpose(0, 1)
ff_coord = ff_coord.transpose(0, 1).clip(-d2, d2)
xf, yf, zf = ff_coord.floor().long()
xc, yc, zc = ff_coord.ceil().long()

Expand Down Expand Up @@ -232,15 +238,21 @@ def main(args):
mask = lattice.get_circular_mask(D // 2)
iterator = range(min(args.first, Nimg)) if args.first else range(Nimg)

if args.tilt:
use_tilts = set(range(args.ntilts))
iterator = [
ii for ii in iterator if int(data.tilt_numbers[ii].item()) in use_tilts
]

volume_full = torch.zeros((D, D, D), device=device)
counts_full = torch.zeros((D, D, D), device=device)
volume_half1 = torch.zeros((D, D, D), device=device)
counts_half1 = torch.zeros((D, D, D), device=device)
volume_half2 = torch.zeros((D, D, D), device=device)
counts_half2 = torch.zeros((D, D, D), device=device)

for ii in iterator:
if ii % 100 == 0:
for i, ii in enumerate(iterator):
if i % 100 == 0:
logger.info(f"fimage {ii}")

r, t = posetracker.get_pose(ii)
Expand Down
30 changes: 25 additions & 5 deletions cryodrgn/commands/downsample.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
"""
Downsample an image stack or volume by clipping fourier frequencies
"""
"""Downsample an image stack or volume by clipping fourier frequencies.
Example usages
--------------
$ cryodrgn downsample my_particle_stack.mrcs -D 128 -o particles.128.mrcs
$ cryodrgn downsample my_particle_stack.mrcs -D 164 -o particles.164.mrcs
--ind chosen_particles.pkl
$ cryodrgn downsample my_particle_stack.star -D 128 -o particles.128.mrcs
--datadir folder_with_subtilts/
# try a smaller processing batch size if you are running into memory issues, or a
# larger size for faster processing
$ cryodrgn downsample my_particle_stack.txt -D 256 -o particles.256.mrcs -b 2000
$ cryodrgn downsample my_particle_stack.txt -D 256 -o particles.256.mrcs -b 20000
# will create files
# particles.256.0.mrcs, particles.256.1.mrcs, ..., particles.256.i.mrcs
# where i is equal to particle count // 10000
# in addition to output file particles.256.txt that indexes all of them
$ cryodrgn downsample my_particle_stack.mrcs -D 256 -o particles.256.mrcs --chunk 10000
"""
import argparse
import math
import os
Expand Down Expand Up @@ -40,11 +58,13 @@ def add_args(parser):
parser.add_argument(
"--chunk",
type=int,
help="Chunksize (in # of images) to split particle stack when saving",
help="Size of chunks (in # of images, each in its own file) to split particle "
"stack when saving",
)
parser.add_argument(
"--datadir",
help="Optionally provide path to input .mrcs if loading from a .star or .cs file",
help="Optionally provide folder containing input .mrcs files "
"if loading from a .star or .cs file",
)
parser.add_argument(
"--max-threads",
Expand Down
6 changes: 3 additions & 3 deletions cryodrgn/commands/eval_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
Example usages
--------------
$ cryodrgn eval_images hand.mrcs het_weights.pkl --config config.pkl \
-o output/out_eval_images_losses.pkl \
--out-z output/out_eval_images_z.pkl \
$ cryodrgn eval_images hand.mrcs het_weights.pkl --config config.pkl
-o output/out_eval_images_losses.pkl
--out-z output/out_eval_images_z.pkl
--poses hand_rot.pkl --log-interval 1 --verbose
"""
Expand Down
15 changes: 13 additions & 2 deletions cryodrgn/commands/eval_vol.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
"""
Evaluate the decoder at specified values of z
"""Evaluate the decoder of a heterogeneous model at given z-latent-space co-ordinates.
Example usages
--------------
# this model used the default of zdim=8
$ cryodrgn eval_vol 004_vae128/weights.pkl -c 004_vae128/config.yaml
-o zero-vol.mrc -z 0 0 0 0 0 0 0 0
# we can instead specify a z-latent-space path instead of a single location
# here the model was trained using zdim=4
$ cryodrgn eval_vol 004_vae128/weights.pkl -c 004_vae128/config.yaml -o zero-vol.mrc
--z-start 0 -1 0 0 --z-end 1 1 1 1
"""
import argparse
import os
Expand Down
62 changes: 34 additions & 28 deletions cryodrgn/commands/filter.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
"""Interactive filtering of particles plotted using various model variables.
Note that this tool can only be used for outputs of SPA — *not* tilt series!
Note that `cryodrgn analyze` must be run first using the epoch to filter on!
Example usages
--------------
$ cryodrgn filter 00_trainvae
$ cryodrgn filter outdir --epoch 20
$ cryodrgn filter my_outdir --epoch 30
$ cryodrgn filter my_outdir/ -k 25
$ cryodrgn filter my_outdir/01_trainvae --plot-inds candidate-particles.pkl
"""
import os
import pickle
import argparse

import pandas as pd
import yaml
import re
import numpy as np
import logging

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import colors
Expand All @@ -26,8 +26,7 @@
from matplotlib.path import Path as PlotPath
from scipy.spatial.transform import Rotation as RR

from cryodrgn import analysis
from cryodrgn import utils
from cryodrgn import analysis, utils

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -87,6 +86,11 @@ def main(args) -> None:
logger.info(f"Using epoch {epoch} for filtering...")

anlzdir = os.path.join(workdir, f"analyze.{epoch}")
if not os.path.isdir(anlzdir):
raise ValueError(
f"No analysis available for epoch {epoch} "
f"— first run `cryodrgn analyze {workdir} {epoch}`"
)
z = utils.load_pkl(os.path.join(workdir, f"z.{epoch}.pkl"))

# load poses
Expand Down Expand Up @@ -156,20 +160,28 @@ def main(args) -> None:
)

kmeans_lbls = utils.load_pkl(os.path.join(kmeans_dir, "labels.pkl"))

plot_df = analysis.load_dataframe(
z=z,
pc=pc,
euler=RR.from_matrix(rot).as_euler("zyz", degrees=True),
trans=trans,
labels=kmeans_lbls,
umap=umap,
df1=ctf_params[:, 2],
df2=ctf_params[:, 3],
dfang=ctf_params[:, 4],
phase=ctf_params[:, 8],
znorm=np.sum(z**2, axis=1) ** 0.5,
)
znorm = np.sum(z**2, axis=1) ** 0.5

if rot.shape[0] == z.shape[0]:
plot_df = analysis.load_dataframe(
z=z,
pc=pc,
euler=RR.from_matrix(rot).as_euler("zyz", degrees=True),
trans=trans,
labels=kmeans_lbls,
umap=umap,
df1=ctf_params[:, 2],
df2=ctf_params[:, 3],
dfang=ctf_params[:, 4],
phase=ctf_params[:, 8],
znorm=znorm,
)
# tilt-series outputs have tilt-level CTFs and poses but particle-level model
# results, thus we ignore the former in this case for now
else:
plot_df = analysis.load_dataframe(
z=z, pc=pc, labels=kmeans_lbls, umap=umap, znorm=znorm
)

selector = SelectFromScatter(plot_df, pre_indices)
input("Press Enter after making your selection...")
Expand Down Expand Up @@ -399,9 +411,3 @@ def on_release(self, event: Event) -> None:
self.handl_id = self.fig.canvas.mpl_connect(
"motion_notify_event", self.hover_points
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
args = add_args(parser).parse_args()
main(args)
Loading

0 comments on commit 34d5c39

Please sign in to comment.