diff --git a/Cargo.lock b/Cargo.lock index 86da874b8..991b436fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,16 +75,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.3.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", "utf8parse", ] @@ -114,9 +113,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" dependencies = [ "anstyle", "windows-sys", @@ -469,51 +468,42 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.10" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ "clap_builder", "clap_derive", - "once_cell", -] - -[[package]] -name = "clap-verbosity-flag" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1eef05769009513df2eb1c3b4613e7fad873a14c600ff025b08f250f59fee7de" -dependencies = [ - "clap", - "log", ] [[package]] name = "clap_builder" -version = "4.3.10" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", + "unicase", + "unicode-width", ] [[package]] name = "clap_complete" -version = "4.3.1" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6b5c519bab3ea61843a7923d074b04245624bb84a64a8c150f5deb014e388b" +checksum = "4110a1e6af615a9e6d0a36f805d5c99099f8bab9b8042f5bc1fa220a4a89e36f" dependencies = [ "clap", ] [[package]] name = "clap_complete_fig" -version = "4.3.1" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fee1d30a51305a6c2ed3fc5709be3c8af626c9c958e04dd9ae94e27bcbce9f" +checksum = "9e9bae21b3f6eb417ad3054c8b1094aa0542116eba4979b1b271baefbfa6b965" dependencies = [ "clap", "clap_complete", @@ -521,9 +511,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" dependencies = [ "heck", "proc-macro2", @@ -1722,7 +1712,6 @@ dependencies = [ "bzip2", "chrono", "clap", - "clap-verbosity-flag", "clap_complete", "clap_complete_fig", "color-eyre", @@ -3041,6 +3030,15 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/packages_rs/nextclade-cli/Cargo.toml b/packages_rs/nextclade-cli/Cargo.toml index aa8366840..c87a9b83f 100644 --- a/packages_rs/nextclade-cli/Cargo.toml +++ b/packages_rs/nextclade-cli/Cargo.toml @@ -11,9 +11,9 @@ publish = false [dependencies] assert2 = "=0.3.11" -clap = { version = "=4.3.10", features = ["derive"] } -clap_complete = "=4.3.1" -clap_complete_fig = "=4.3.1" +clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] } +clap_complete = "=4.4.1" +clap_complete_fig = "=4.4.0" color-eyre = "=0.6.2" comfy-table = "=7.0.1" crossbeam = "=0.8.2" diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs index 49f53f5bb..328661017 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_cli.rs @@ -4,6 +4,7 @@ use crate::cli::nextclade_loop::nextclade_run; use crate::cli::nextclade_seq_sort::nextclade_seq_sort; use crate::cli::verbosity::{Verbosity, WarnLevel}; use crate::io::http_client::ProxyConfig; +use clap::builder::styling; use clap::{ArgGroup, CommandFactory, Parser, Subcommand, ValueEnum, ValueHint}; use clap_complete::{generate, Generator, Shell}; use clap_complete_fig::Fig; @@ -29,10 +30,19 @@ lazy_static! { pub static ref SHELLS: Vec<&'static str> = ["bash", "elvish", "fish", "fig", "powershell", "zsh"].to_vec(); } +fn styles() -> styling::Styles { + styling::Styles::styled() + .header(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD) + .usage(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD) + .literal(styling::AnsiColor::Blue.on_default() | styling::Effects::BOLD) + .placeholder(styling::AnsiColor::Cyan.on_default()) +} + #[derive(Parser, Debug)] #[clap(name = "nextclade")] #[clap(author, version)] #[clap(verbatim_doc_comment)] +#[clap(styles = styles())] /// Viral genome alignment, mutation calling, clade assignment, quality checks and phylogenetic placement. /// /// Nextclade is a part of Nextstrain: https://nextstrain.org diff --git a/packages_rs/nextclade/Cargo.toml b/packages_rs/nextclade/Cargo.toml index 8fabe54c6..e49b356b5 100644 --- a/packages_rs/nextclade/Cargo.toml +++ b/packages_rs/nextclade/Cargo.toml @@ -18,10 +18,9 @@ auto_ops = "=0.3.0" bio = "=1.3.1" bio-types = "=1.0.0" chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] } -clap = { version = "=4.3.10", features = ["derive"] } -clap-verbosity-flag = "=2.0.1" -clap_complete = "=4.3.1" -clap_complete_fig = "=4.3.1" +clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] } +clap_complete = "=4.4.1" +clap_complete_fig = "=4.4.0" color-eyre = "=0.6.2" csv = "=1.2.2" ctor = "=0.2.2" diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index 705f295a3..37e97bfa2 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -11,6 +11,7 @@ use crate::alphabet::nuc::Nuc; use crate::make_error; use eyre::{Report, WrapErr}; use log::{info, trace}; +use std::cmp::max; fn align_pairwise>( qry_seq: &[T], @@ -62,42 +63,60 @@ pub fn align_nuc( let mut terminal_bandwidth = params.terminal_bandwidth as isize; let mut excess_bandwidth = params.excess_bandwidth as isize; - let mut allowed_mismatches = params.allowed_mismatches as isize; - + let mut minimal_bandwidth = max(1, params.allowed_mismatches as isize); + let max_band_area = params.max_band_area; let mut attempt = 0; - loop { - let stripes = create_alignment_band( + let (mut stripes, mut band_area) = create_alignment_band( + &seed_matches, + qry_len as isize, + ref_len as isize, + terminal_bandwidth, + excess_bandwidth, + minimal_bandwidth, + ); + if band_area > max_band_area { + return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); + } + + let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + + while alignment.hit_boundary && attempt < params.max_alignment_attempts { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt+1, alignment.alignment_score); + // double bandwidth parameters or increase to one if 0 + terminal_bandwidth = max(2 * terminal_bandwidth, 1); + excess_bandwidth = max(2 * excess_bandwidth, 1); + minimal_bandwidth = max(2 * minimal_bandwidth, 1); + attempt += 1; + // make new band + (stripes, band_area) = create_alignment_band( &seed_matches, qry_len as isize, ref_len as isize, terminal_bandwidth, excess_bandwidth, - allowed_mismatches, - params.max_band_area, - )?; - - let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); - alignment.is_reverse_complement = is_reverse_complement; - - if alignment.hit_boundary { - terminal_bandwidth *= 2; - excess_bandwidth *= 2; - allowed_mismatches *= 2; - attempt += 1; - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt, alignment.alignment_score); - } else { - if attempt > 0 { - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score); - } - return Ok(alignment); + minimal_bandwidth, + ); + // discard stripes and break to return previous alignment + if band_area > max_band_area { + break; } - - if attempt > params.max_alignment_attempts { - info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Alignment score was: {}", alignment.alignment_score); - return Ok(alignment); + // realign + alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + } + // report success/failure of broadening of band width + if alignment.hit_boundary { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Returning last attempt with score: {}", alignment.alignment_score); + if band_area > max_band_area { + info!( + "When processing sequence #{index} '{seq_name}': final band area {band_area} exceeded the cutoff {max_band_area}" + ); } + } else if attempt > 0 { + info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score); } + alignment.is_reverse_complement = is_reverse_complement; + Ok(alignment) } /// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels diff --git a/packages_rs/nextclade/src/align/seed_alignment.rs b/packages_rs/nextclade/src/align/seed_alignment.rs index d3877e581..5c8ee9e61 100644 --- a/packages_rs/nextclade/src/align/seed_alignment.rs +++ b/packages_rs/nextclade/src/align/seed_alignment.rs @@ -215,8 +215,7 @@ pub fn create_alignment_band( terminal_bandwidth: isize, excess_bandwidth: isize, minimal_bandwidth: isize, - max_band_area: usize, -) -> Result, Report> { +) -> (Vec, usize) { // This function steps through the chained seeds and determines and appropriate band // defined via stripes in query coordinates. These bands will later be chopped to reachable ranges @@ -306,13 +305,7 @@ pub fn create_alignment_band( // write_stripes_to_file(&stripes, "stripes.csv"); // trim stripes to reachable regions - let (regularized_stripes, band_area) = regularize_stripes(stripes, qry_len as usize); - - if band_area > max_band_area { - return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json"); - } - - Ok(regularized_stripes) + regularize_stripes(stripes, qry_len as usize) } #[derive(Clone, Copy, Debug)] @@ -420,7 +413,6 @@ mod tests { let max_indel = 100; let qry_len = 30; let ref_len = 40; - let max_band_area = 500_000_000; let result = create_alignment_band( &seed_matches, @@ -429,7 +421,6 @@ mod tests { terminal_bandwidth, excess_bandwidth, allowed_mismatches, - max_band_area, ); Ok(())