Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into feat/ref-minimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Sep 8, 2023
2 parents bfa0e9b + 42fb63f commit 1a7c48e
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 71 deletions.
52 changes: 25 additions & 27 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions packages_rs/nextclade-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ publish = false

[dependencies]
assert2 = "=0.3.11"
clap = { version = "=4.3.10", features = ["derive"] }
clap_complete = "=4.3.1"
clap_complete_fig = "=4.3.1"
clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] }
clap_complete = "=4.4.1"
clap_complete_fig = "=4.4.0"
color-eyre = "=0.6.2"
comfy-table = "=7.0.1"
crossbeam = "=0.8.2"
Expand Down
10 changes: 10 additions & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::cli::nextclade_loop::nextclade_run;
use crate::cli::nextclade_seq_sort::nextclade_seq_sort;
use crate::cli::verbosity::{Verbosity, WarnLevel};
use crate::io::http_client::ProxyConfig;
use clap::builder::styling;
use clap::{ArgGroup, CommandFactory, Parser, Subcommand, ValueEnum, ValueHint};
use clap_complete::{generate, Generator, Shell};
use clap_complete_fig::Fig;
Expand All @@ -29,10 +30,19 @@ lazy_static! {
pub static ref SHELLS: Vec<&'static str> = ["bash", "elvish", "fish", "fig", "powershell", "zsh"].to_vec();
}

fn styles() -> styling::Styles {
styling::Styles::styled()
.header(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD)
.usage(styling::AnsiColor::Green.on_default() | styling::Effects::BOLD)
.literal(styling::AnsiColor::Blue.on_default() | styling::Effects::BOLD)
.placeholder(styling::AnsiColor::Cyan.on_default())
}

#[derive(Parser, Debug)]
#[clap(name = "nextclade")]
#[clap(author, version)]
#[clap(verbatim_doc_comment)]
#[clap(styles = styles())]
/// Viral genome alignment, mutation calling, clade assignment, quality checks and phylogenetic placement.
///
/// Nextclade is a part of Nextstrain: https://nextstrain.org
Expand Down
7 changes: 3 additions & 4 deletions packages_rs/nextclade/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ auto_ops = "=0.3.0"
bio = "=1.3.1"
bio-types = "=1.0.0"
chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] }
clap = { version = "=4.3.10", features = ["derive"] }
clap-verbosity-flag = "=2.0.1"
clap_complete = "=4.3.1"
clap_complete_fig = "=4.3.1"
clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] }
clap_complete = "=4.4.1"
clap_complete_fig = "=4.4.0"
color-eyre = "=0.6.2"
csv = "=1.2.2"
ctor = "=0.2.2"
Expand Down
71 changes: 45 additions & 26 deletions packages_rs/nextclade/src/align/align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::alphabet::nuc::Nuc;
use crate::make_error;
use eyre::{Report, WrapErr};
use log::{info, trace};
use std::cmp::max;

fn align_pairwise<T: Letter<T>>(
qry_seq: &[T],
Expand Down Expand Up @@ -62,42 +63,60 @@ pub fn align_nuc(

let mut terminal_bandwidth = params.terminal_bandwidth as isize;
let mut excess_bandwidth = params.excess_bandwidth as isize;
let mut allowed_mismatches = params.allowed_mismatches as isize;

let mut minimal_bandwidth = max(1, params.allowed_mismatches as isize);
let max_band_area = params.max_band_area;
let mut attempt = 0;

loop {
let stripes = create_alignment_band(
let (mut stripes, mut band_area) = create_alignment_band(
&seed_matches,
qry_len as isize,
ref_len as isize,
terminal_bandwidth,
excess_bandwidth,
minimal_bandwidth,
);
if band_area > max_band_area {
return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json");
}

let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);

while alignment.hit_boundary && attempt < params.max_alignment_attempts {
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt+1, alignment.alignment_score);
// double bandwidth parameters or increase to one if 0
terminal_bandwidth = max(2 * terminal_bandwidth, 1);
excess_bandwidth = max(2 * excess_bandwidth, 1);
minimal_bandwidth = max(2 * minimal_bandwidth, 1);
attempt += 1;
// make new band
(stripes, band_area) = create_alignment_band(
&seed_matches,
qry_len as isize,
ref_len as isize,
terminal_bandwidth,
excess_bandwidth,
allowed_mismatches,
params.max_band_area,
)?;

let mut alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
alignment.is_reverse_complement = is_reverse_complement;

if alignment.hit_boundary {
terminal_bandwidth *= 2;
excess_bandwidth *= 2;
allowed_mismatches *= 2;
attempt += 1;
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Band boundary is hit on attempt {}. Retrying with relaxed parameters. Alignment score was: {}", attempt, alignment.alignment_score);
} else {
if attempt > 0 {
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score);
}
return Ok(alignment);
minimal_bandwidth,
);
// discard stripes and break to return previous alignment
if band_area > max_band_area {
break;
}

if attempt > params.max_alignment_attempts {
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Alignment score was: {}", alignment.alignment_score);
return Ok(alignment);
// realign
alignment = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
}
// report success/failure of broadening of band width
if alignment.hit_boundary {
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Attempted to relax band parameters {attempt} times, but still hitting the band boundary. Returning last attempt with score: {}", alignment.alignment_score);
if band_area > max_band_area {
info!(
"When processing sequence #{index} '{seq_name}': final band area {band_area} exceeded the cutoff {max_band_area}"
);
}
} else if attempt > 0 {
info!("When processing sequence #{index} '{seq_name}': In nucleotide alignment: Succeeded without hitting band boundary on attempt {}. Alignment score was: {}", attempt+1, alignment.alignment_score);
}
alignment.is_reverse_complement = is_reverse_complement;
Ok(alignment)
}

/// align amino acids using a fixed bandwidth banded alignment while penalizing terminal indels
Expand Down
13 changes: 2 additions & 11 deletions packages_rs/nextclade/src/align/seed_alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,7 @@ pub fn create_alignment_band(
terminal_bandwidth: isize,
excess_bandwidth: isize,
minimal_bandwidth: isize,
max_band_area: usize,
) -> Result<Vec<Stripe>, Report> {
) -> (Vec<Stripe>, usize) {
// This function steps through the chained seeds and determines and appropriate band
// defined via stripes in query coordinates. These bands will later be chopped to reachable ranges

Expand Down Expand Up @@ -306,13 +305,7 @@ pub fn create_alignment_band(
// write_stripes_to_file(&stripes, "stripes.csv");

// trim stripes to reachable regions
let (regularized_stripes, band_area) = regularize_stripes(stripes, qry_len as usize);

if band_area > max_band_area {
return make_error!("Alignment matrix size {band_area} exceeds maximum value {max_band_area}. The threshold can be adjusted using CLI flag '--max-band-area' or using 'maxBandArea' field in the dataset's virus_properties.json");
}

Ok(regularized_stripes)
regularize_stripes(stripes, qry_len as usize)
}

#[derive(Clone, Copy, Debug)]
Expand Down Expand Up @@ -420,7 +413,6 @@ mod tests {
let max_indel = 100;
let qry_len = 30;
let ref_len = 40;
let max_band_area = 500_000_000;

let result = create_alignment_band(
&seed_matches,
Expand All @@ -429,7 +421,6 @@ mod tests {
terminal_bandwidth,
excess_bandwidth,
allowed_mismatches,
max_band_area,
);

Ok(())
Expand Down

0 comments on commit 1a7c48e

Please sign in to comment.