From 2f4c623653091e5488db26b61211d553cab6ac45 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Wed, 6 Sep 2023 15:55:49 +0200 Subject: [PATCH 1/8] fix: return best alignment when band extension hits max_band_limit --- packages_rs/nextclade/src/align/align.rs | 33 ++++++++++++------- .../nextclade/src/align/seed_alignment.rs | 11 ++----- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index 705f295a3..c2b2b849b 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -1,3 +1,5 @@ +use std::cmp::max; + use crate::align::backtrace::{backtrace, AlignmentOutput}; use crate::align::band_2d::Stripe; use crate::align::band_2d::{full_matrix, simple_stripes}; @@ -9,6 +11,7 @@ use crate::alphabet::aa::Aa; use crate::alphabet::letter::Letter; use crate::alphabet::nuc::Nuc; use crate::make_error; +use bio::alignment; use eyre::{Report, WrapErr}; use log::{info, trace}; @@ -62,28 +65,36 @@ pub fn align_nuc( let mut terminal_bandwidth = params.terminal_bandwidth as isize; let mut excess_bandwidth = params.excess_bandwidth as isize; - let mut allowed_mismatches = params.allowed_mismatches as isize; - + let mut minimal_bandwidth = max(1, params.allowed_mismatches as isize); + let max_band_area = params.max_band_area; let mut attempt = 0; + let mut alignment: AlignmentOutput; loop { - let stripes = create_alignment_band( + let (stripes, band_area) = create_alignment_band( &seed_matches, qry_len as isize, ref_len as isize, terminal_bandwidth, excess_bandwidth, - allowed_mismatches, - params.max_band_area, - )?; + minimal_bandwidth, + ); + if band_area > max_band_area { + if attempt == 0 { + return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); + } + // return previously calculated alignment + return Ok(alignment); + } - let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); alignment.is_reverse_complement = is_reverse_complement; - if alignment.hit_boundary { - terminal_bandwidth *= 2; - excess_bandwidth *= 2; - allowed_mismatches *= 2; + if alignment.hit_boundary && minimal_bandwidth > 0 { + // double bandwidth parameters or increase to one if 0 + terminal_bandwidth = max(2 * terminal_bandwidth, 1); + excess_bandwidth = max(2 * excess_bandwidth, 1); + minimal_bandwidth = max(2 * minimal_bandwidth, 1); attempt += 1; info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt, alignment.alignment_score); } else { diff --git a/packages_rs/nextclade/src/align/seed_alignment.rs b/packages_rs/nextclade/src/align/seed_alignment.rs index d3877e581..810bd1fe0 100644 --- a/packages_rs/nextclade/src/align/seed_alignment.rs +++ b/packages_rs/nextclade/src/align/seed_alignment.rs @@ -215,8 +215,7 @@ pub fn create_alignment_band( terminal_bandwidth: isize, excess_bandwidth: isize, minimal_bandwidth: isize, - max_band_area: usize, -) -> Result, Report> { +) -> (Vec, usize) { // This function steps through the chained seeds and determines and appropriate band // defined via stripes in query coordinates. These bands will later be chopped to reachable ranges @@ -306,13 +305,7 @@ pub fn create_alignment_band( // write_stripes_to_file(&stripes, "stripes.csv"); // trim stripes to reachable regions - let (regularized_stripes, band_area) = regularize_stripes(stripes, qry_len as usize); - - if band_area > max_band_area { - return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); - } - - Ok(regularized_stripes) + regularize_stripes(stripes, qry_len as usize) } #[derive(Clone, Copy, Debug)] From 75d4038efc31bfce429bf63e64bc0eebaaff769f Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Wed, 6 Sep 2023 15:58:12 +0200 Subject: [PATCH 2/8] chore: remove unused import --- packages_rs/nextclade/src/align/align.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index c2b2b849b..d470fe084 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -1,5 +1,3 @@ -use std::cmp::max; - use crate::align::backtrace::{backtrace, AlignmentOutput}; use crate::align::band_2d::Stripe; use crate::align::band_2d::{full_matrix, simple_stripes}; @@ -11,9 +9,9 @@ use crate::alphabet::aa::Aa; use crate::alphabet::letter::Letter; use crate::alphabet::nuc::Nuc; use crate::make_error; -use bio::alignment; use eyre::{Report, WrapErr}; use log::{info, trace}; +use std::cmp::max; fn align_pairwise>( qry_seq: &[T], From b318dcc0ddde6818a32907850f5e2817ff654873 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Wed, 6 Sep 2023 17:15:40 +0200 Subject: [PATCH 3/8] chore: remove unused argument in tests --- packages_rs/nextclade/src/align/seed_alignment.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages_rs/nextclade/src/align/seed_alignment.rs b/packages_rs/nextclade/src/align/seed_alignment.rs index 810bd1fe0..5c8ee9e61 100644 --- a/packages_rs/nextclade/src/align/seed_alignment.rs +++ b/packages_rs/nextclade/src/align/seed_alignment.rs @@ -413,7 +413,6 @@ mod tests { let max_indel = 100; let qry_len = 30; let ref_len = 40; - let max_band_area = 500_000_000; let result = create_alignment_band( &seed_matches, @@ -422,7 +421,6 @@ mod tests { terminal_bandwidth, excess_bandwidth, allowed_mismatches, - max_band_area, ); Ok(()) From 7ef77d736d782ae70e24c688f2fede89fe3e546a Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Wed, 6 Sep 2023 17:25:07 +0200 Subject: [PATCH 4/8] fix: avoid uninitialized alignment --- packages_rs/nextclade/src/align/align.rs | 55 ++++++++++++------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index d470fe084..5c21a1141 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -66,9 +66,28 @@ pub fn align_nuc( let mut minimal_bandwidth = max(1, params.allowed_mismatches as isize); let max_band_area = params.max_band_area; let mut attempt = 0; - let mut alignment: AlignmentOutput; - loop { + let (stripes, band_area) = create_alignment_band( + &seed_matches, + qry_len as isize, + ref_len as isize, + terminal_bandwidth, + excess_bandwidth, + minimal_bandwidth, + ); + if band_area > max_band_area { + return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); + } + + let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + + while alignment.hit_boundary && attempt < params.max_alignment_attempts { + // double bandwidth parameters or increase to one if 0 + terminal_bandwidth = max(2 * terminal_bandwidth, 1); + excess_bandwidth = max(2 * excess_bandwidth, 1); + minimal_bandwidth = max(2 * minimal_bandwidth, 1); + attempt += 1; + // make new band let (stripes, band_area) = create_alignment_band( &seed_matches, qry_len as isize, @@ -77,36 +96,16 @@ pub fn align_nuc( excess_bandwidth, minimal_bandwidth, ); + // discard stripes and break to return previous alignment if band_area > max_band_area { - if attempt == 0 { - return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); - } - // return previously calculated alignment - return Ok(alignment); + break; } - + // realign alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); - alignment.is_reverse_complement = is_reverse_complement; - - if alignment.hit_boundary && minimal_bandwidth > 0 { - // double bandwidth parameters or increase to one if 0 - terminal_bandwidth = max(2 * terminal_bandwidth, 1); - excess_bandwidth = max(2 * excess_bandwidth, 1); - minimal_bandwidth = max(2 * minimal_bandwidth, 1); - attempt += 1; - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt, alignment.alignment_score); - } else { - if attempt > 0 { - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score); - } - return Ok(alignment); - } - - if attempt > params.max_alignment_attempts { - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Alignment score was: {}", alignment.alignment_score); - return Ok(alignment); - } } + + alignment.is_reverse_complement = is_reverse_complement; + Ok(alignment) } /// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels From bd074789f06f1222368fa5ce399ab09d4ce97ef0 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Wed, 6 Sep 2023 18:06:28 +0200 Subject: [PATCH 5/8] chore: add log messages --- packages_rs/nextclade/src/align/align.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index 5c21a1141..fbdc72d07 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -82,6 +82,7 @@ pub fn align_nuc( let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); while alignment.hit_boundary && attempt < params.max_alignment_attempts { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt+1, alignment.alignment_score); // double bandwidth parameters or increase to one if 0 terminal_bandwidth = max(2 * terminal_bandwidth, 1); excess_bandwidth = max(2 * excess_bandwidth, 1); @@ -103,7 +104,17 @@ pub fn align_nuc( // realign alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); } - + // report success/failure of broadening of band width + if alignment.hit_boundary { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Returning last attempt with score: {}", alignment.alignment_score); + if band_area > max_band_area { + info!( + "When processing sequence #{index} '{seq_name}': final band area {band_area} exceeded the cutoff {max_band_area}" + ); + } + } else if attempt > 0 { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score); + } alignment.is_reverse_complement = is_reverse_complement; Ok(alignment) } From 867a0e66f99ff5987a3639b4a4bdd478963fac9d Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Thu, 7 Sep 2023 09:40:02 +0200 Subject: [PATCH 6/8] fix: reassign rather than redefine band_area in attempt loop to allow reporting after the loop --- packages_rs/nextclade/src/align/align.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index fbdc72d07..37e97bfa2 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -67,7 +67,7 @@ pub fn align_nuc( let max_band_area = params.max_band_area; let mut attempt = 0; - let (stripes, band_area) = create_alignment_band( + let (mut stripes, mut band_area) = create_alignment_band( &seed_matches, qry_len as isize, ref_len as isize, @@ -89,7 +89,7 @@ pub fn align_nuc( minimal_bandwidth = max(2 * minimal_bandwidth, 1); attempt += 1; // make new band - let (stripes, band_area) = create_alignment_band( + (stripes, band_area) = create_alignment_band( &seed_matches, qry_len as isize, ref_len as isize, From 0b2ded4fac07140d3448a16e726c511e68863e05 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 8 Sep 2023 07:44:21 +0200 Subject: [PATCH 7/8] refactor(cli): simplify check for completions cli args --- packages_rs/nextclade-cli/src/cli/nextclade_cli.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs index b40772240..bf45763db 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs @@ -27,13 +27,6 @@ lazy_static! { pub static ref SHELLS: Vec<&'static str> = ["bash", "elvish", "fish", "fig", "powershell", "zsh"].to_vec(); } -pub fn check_shells(value: &str) -> Result { - SHELLS - .contains(&value) - .then_some(value.to_owned()) - .ok_or_else(|| eyre!("Unknown shell: '{value}'. Possible values: {}", SHELLS.join(", "))) -} - #[derive(Parser, Debug)] #[clap(name = "nextclade")] #[clap(author, version)] @@ -69,7 +62,7 @@ pub enum NextcladeCommands { /// Completions { /// Name of the shell to generate appropriate completions - #[clap(value_name = "SHELL", default_value_t = String::from("bash"), value_parser = check_shells)] + #[clap(value_name = "SHELL", default_value_t = String::from("bash"), value_parser = SHELLS.clone())] shell: String, }, From 42fb63fe032c142edcfc3634bea5c0b24d0cbd14 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 8 Sep 2023 08:43:22 +0200 Subject: [PATCH 8/8] feat(cli): bring back cli colors using clap v3 styling interface --- Cargo.lock | 52 +++++++++---------- packages_rs/nextclade-cli/Cargo.toml | 6 +-- .../nextclade-cli/src/cli/nextclade_cli.rs | 10 ++++ packages_rs/nextclade/Cargo.toml | 7 ++- 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44d8841e9..b0c6ba611 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,16 +75,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.3.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", "utf8parse", ] @@ -114,9 +113,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" dependencies = [ "anstyle", "windows-sys", @@ -469,51 +468,42 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.10" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ "clap_builder", "clap_derive", - "once_cell", -] - -[[package]] -name = "clap-verbosity-flag" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1eef05769009513df2eb1c3b4613e7fad873a14c600ff025b08f250f59fee7de" -dependencies = [ - "clap", - "log", ] [[package]] name = "clap_builder" -version = "4.3.10" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", + "unicase", + "unicode-width", ] [[package]] name = "clap_complete" -version = "4.3.1" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6b5c519bab3ea61843a7923d074b04245624bb84a64a8c150f5deb014e388b" +checksum = "4110a1e6af615a9e6d0a36f805d5c99099f8bab9b8042f5bc1fa220a4a89e36f" dependencies = [ "clap", ] [[package]] name = "clap_complete_fig" -version = "4.3.1" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fee1d30a51305a6c2ed3fc5709be3c8af626c9c958e04dd9ae94e27bcbce9f" +checksum = "9e9bae21b3f6eb417ad3054c8b1094aa0542116eba4979b1b271baefbfa6b965" dependencies = [ "clap", "clap_complete", @@ -521,9 +511,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" dependencies = [ "heck", "proc-macro2", @@ -1722,7 +1712,6 @@ dependencies = [ "bzip2", "chrono", "clap", - "clap-verbosity-flag", "clap_complete", "clap_complete_fig", "color-eyre", @@ -3033,6 +3022,15 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/packages_rs/nextclade-cli/Cargo.toml b/packages_rs/nextclade-cli/Cargo.toml index 2b891e126..5d29296b0 100644 --- a/packages_rs/nextclade-cli/Cargo.toml +++ b/packages_rs/nextclade-cli/Cargo.toml @@ -11,9 +11,9 @@ publish = false [dependencies] assert2 = "=0.3.11" -clap = { version = "=4.3.10", features = ["derive"] } -clap_complete = "=4.3.1" -clap_complete_fig = "=4.3.1" +clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] } +clap_complete = "=4.4.1" +clap_complete_fig = "=4.4.0" color-eyre = "=0.6.2" comfy-table = "=7.0.1" crossbeam = "=0.8.2" diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs index bf45763db..d732b2e11 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs @@ -3,6 +3,7 @@ use crate::cli::nextclade_dataset_list::nextclade_dataset_list; use crate::cli::nextclade_loop::nextclade_run; use crate::cli::verbosity::{Verbosity, WarnLevel}; use crate::io::http_client::ProxyConfig; +use clap::builder::styling; use clap::{ArgGroup, CommandFactory, Parser, Subcommand, ValueEnum, ValueHint}; use clap_complete::{generate, Generator, Shell}; use clap_complete_fig::Fig; @@ -27,10 +28,19 @@ lazy_static! { pub static ref SHELLS: Vec<&'static str> = ["bash", "elvish", "fish", "fig", "powershell", "zsh"].to_vec(); } +fn styles() -> styling::Styles { + styling::Styles::styled() + .header(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD) + .usage(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD) + .literal(styling::AnsiColor::Blue.on_default() | styling::Effects::BOLD) + .placeholder(styling::AnsiColor::Cyan.on_default()) +} + #[derive(Parser, Debug)] #[clap(name = "nextclade")] #[clap(author, version)] #[clap(verbatim_doc_comment)] +#[clap(styles = styles())] /// Viral genome alignment, mutation calling, clade assignment, quality checks and phylogenetic placement. /// /// Nextclade is a part of Nextstrain: https://nextstrain.org diff --git a/packages_rs/nextclade/Cargo.toml b/packages_rs/nextclade/Cargo.toml index 3e60a046d..b8b0962d1 100644 --- a/packages_rs/nextclade/Cargo.toml +++ b/packages_rs/nextclade/Cargo.toml @@ -18,10 +18,9 @@ auto_ops = "=0.3.0" bio = "=1.3.1" bio-types = "=1.0.0" chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] } -clap = { version = "=4.3.10", features = ["derive"] } -clap-verbosity-flag = "=2.0.1" -clap_complete = "=4.3.1" -clap_complete_fig = "=4.3.1" +clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] } +clap_complete = "=4.4.1" +clap_complete_fig = "=4.4.0" color-eyre = "=0.6.2" csv = "=1.2.2" ctor = "=0.2.2"