Skip to content

Commit

Permalink
Merge pull request #1244 from nextstrain/feat/ref-minimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Sep 18, 2023
2 parents fe41dd8 + 996980d commit 57c3f47
Show file tree
Hide file tree
Showing 71 changed files with 3,044 additions and 771 deletions.
13 changes: 11 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages_rs/nextclade-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ lazy_static = "=1.4.0"
log = "=0.4.19"
nextclade = { path = "../nextclade" }
num_cpus = "=1.16.0"
ordered-float = { version = "=3.9.1", features = ["rand", "serde", "schemars"] }
owo-colors = "=3.5.0"
pretty_assertions = "=1.3.0"
rayon = "=1.7.0"
Expand All @@ -39,6 +40,7 @@ serde = { version = "=1.0.164", features = ["derive"] }
serde_json = { version = "=1.0.99", features = ["preserve_order", "indexmap", "unbounded_depth"] }
strum = "=0.25.0"
strum_macros = "=0.25"
tinytemplate = "=1.2.1"
url = { version = "=2.4.0", features = ["serde"] }
zip = { version = "=0.6.6", default-features = false, features = ["aes-crypto", "bzip2", "deflate", "time"] }

Expand Down
1 change: 1 addition & 0 deletions packages_rs/nextclade-cli/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pub mod nextclade_dataset_get;
pub mod nextclade_dataset_list;
pub mod nextclade_loop;
pub mod nextclade_ordered_writer;
pub mod nextclade_seq_sort;
pub mod verbosity;
88 changes: 85 additions & 3 deletions packages_rs/nextclade-cli/src/cli/nextclade_cli.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::cli::nextclade_dataset_get::nextclade_dataset_get;
use crate::cli::nextclade_dataset_list::nextclade_dataset_list;
use crate::cli::nextclade_loop::nextclade_run;
use crate::cli::nextclade_seq_sort::nextclade_seq_sort;
use crate::cli::verbosity::{Verbosity, WarnLevel};
use crate::io::http_client::ProxyConfig;
use clap::builder::styling;
Expand All @@ -12,6 +13,7 @@ use itertools::Itertools;
use lazy_static::lazy_static;
use nextclade::io::fs::add_extension;
use nextclade::run::params::NextcladeInputParamsOptional;
use nextclade::sort::params::NextcladeSeqSortParams;
use nextclade::utils::global_init::setup_logger;
use nextclade::{getenv, make_error};
use std::fmt::Debug;
Expand Down Expand Up @@ -76,15 +78,20 @@ pub enum NextcladeCommands {
shell: String,
},

/// Run alignment, mutation calling, clade assignment, quality checks and phylogenetic placement
/// Run sequence analysis: alignment, mutation calling, clade assignment, quality checks and phylogenetic placement
///
/// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade run --help`.
Run(Box<NextcladeRunArgs>),

/// List and download available Nextclade datasets
/// List and download available Nextclade datasets (pathogens)
///
/// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade run --help`.
/// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade dataset --help`.
Dataset(Box<NextcladeDatasetArgs>),

/// Sort sequences according to the inferred Nextclade dataset (pathogen)
///
/// For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Each subcommand has its own help, for example: `nextclade sort --help`.
Sort(Box<NextcladeSortArgs>),
}

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -621,6 +628,80 @@ pub struct NextcladeRunArgs {
pub other_params: NextcladeRunOtherParams,
}

#[allow(clippy::struct_excessive_bools)]
#[derive(Parser, Debug)]
#[clap(verbatim_doc_comment)]
pub struct NextcladeSortArgs {
/// Path to one or multiple FASTA files with input sequences
///
/// Supports the following compression formats: "gz", "bz2", "xz", "zst". If no files provided, the plain fasta input is read from standard input (stdin).
///
/// See: https://en.wikipedia.org/wiki/FASTA_format
#[clap(value_hint = ValueHint::FilePath)]
pub input_fastas: Vec<PathBuf>,

/// Path to input minimizer index JSON file.
///
/// By default the latest reference minimizer index is fetched from the dataset server (default or customized with `--server` argument). If this argument is provided, the algorithm skips fetching the default index and uses the index provided in the the JSON file.
///
/// Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin).
#[clap(long, short = 'm')]
#[clap(value_hint = ValueHint::FilePath)]
pub input_minimizer_index_json: Option<PathBuf>,

/// Path to output directory
///
/// Sequences will be written in subdirectories: one subdirectory per dataset. Sequences inferred to be belonging to a particular dataset wil lbe places in the corresponding subdirectory. The subdirectory tree can be nested, depending on how dataset names are organized.
///
/// Mutually exclusive with `--output`.
///
#[clap(short = 'O', long)]
#[clap(value_hint = ValueHint::DirPath)]
#[clap(group = "outputs")]
pub output_dir: Option<PathBuf>,

/// Template string for the file path to output sorted sequences. A separate file will be generated per dataset.
///
/// The string should contain template variable `{name}`, where the dataset name will be substituted. Note that if the `{name}` variable contains slashes, they will be interpreted as path segments and subdirectories will be created.
///
/// Make sure you properly quote and/or escape the curly braces, so that your shell, programming language or pipeline manager does not attempt to substitute the variables.
///
/// Mutually exclusive with `--output-dir`.
///
/// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. If the required directory tree does not exist, it will be created.
///
/// Example for bash shell:
///
/// --output='outputs/{name}/sorted.fasta.gz'
#[clap(short = 'o', long)]
#[clap(group = "outputs")]
pub output_path: Option<String>,

/// Path to output results TSV file
///
/// If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write uncompressed to standard output (stdout). If the required directory tree does not exist, it will be created.
#[clap(short = 'r', long)]
#[clap(value_hint = ValueHint::FilePath)]
pub output_results_tsv: Option<String>,

#[clap(flatten, next_help_heading = "Algorithm")]
pub search_params: NextcladeSeqSortParams,

#[clap(flatten, next_help_heading = "Other")]
pub other_params: NextcladeRunOtherParams,

/// Use custom dataset server.
///
/// You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details.
#[clap(long)]
#[clap(value_hint = ValueHint::Url)]
#[clap(default_value_t = Url::from_str(DATA_FULL_DOMAIN).expect("Invalid URL"))]
pub server: Url,

#[clap(flatten)]
pub proxy_config: ProxyConfig,
}

fn generate_completions(shell: &str) -> Result<(), Report> {
let mut command = NextcladeArgs::command();

Expand Down Expand Up @@ -907,5 +988,6 @@ pub fn nextclade_parse_cli_args() -> Result<(), Report> {
NextcladeDatasetCommands::List(dataset_list_args) => nextclade_dataset_list(dataset_list_args),
NextcladeDatasetCommands::Get(dataset_get_args) => nextclade_dataset_get(&dataset_get_args),
},
NextcladeCommands::Sort(seq_sort_args) => nextclade_seq_sort(&seq_sort_args),
}
}
Loading

1 comment on commit 57c3f47

@vercel
Copy link

@vercel vercel bot commented on 57c3f47 Sep 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade-git-master-nextstrain.vercel.app
nextclade-nextstrain.vercel.app
nextclade.vercel.app

Please sign in to comment.