diff --git a/Cargo.lock b/Cargo.lock index 14294cbb3..098454ecf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -480,9 +480,9 @@ dependencies = [ [[package]] name = "clap-markdown" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325f50228f76921784b6d9f2d62de6778d834483248eefecd27279174797e579" +checksum = "8ebc67e6266e14f8b31541c2f204724fa2ac7ad5c17d6f5908fbb92a60f42cff" dependencies = [ "clap", ] diff --git a/Cargo.toml b/Cargo.toml index 2c74ed66e..b057bd3b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ bio-types = "=1.0.0" bzip2 = { version = "=0.4.4", features = ["static"] } chrono = { version = "=0.4.26", default-features = false, features = ["clock", "std", "wasmbind"] } clap = { version = "=4.4.2", features = ["derive", "color", "unicode", "unstable-styles"] } -clap-markdown = "=0.1.3" +clap-markdown = "=0.1.4" clap_complete = "=4.4.1" clap_complete_fig = "=4.4.0" color-eyre = "=0.6.2" diff --git a/docs/user/nextclade-cli/reference.md b/docs/user/nextclade-cli/reference.md index d865f8f2e..dcf7a67b5 100644 --- a/docs/user/nextclade-cli/reference.md +++ b/docs/user/nextclade-cli/reference.md @@ -92,17 +92,71 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. * `` — Path to one or multiple FASTA files with input sequences + Supports the following compression formats: "gz", "bz2", "xz", "zst". If no files provided, the plain fasta input is read from standard input (stdin). + + See: https://en.wikipedia.org/wiki/FASTA_format + ###### **Options:** -* `-D`, `--input-dataset ` — Path to a directory or a zip file containing a dataset + + Example: nextclade run -D dataset/ -O out/ seq1.fasta seq2.fasta +* `-D`, `--input-dataset ` — Path to a directory or a zip file containing a dataset. + + See `nextclade dataset --help` on how to obtain datasets. + + If this flag is not provided, no dataset will be loaded and individual input files have to be provided instead. In this case `--input-ref` is required and `--input-annotation, `--input-tree` and `--input-pathogen-json` are optional. + + If both the `--input-dataset` and individual `--input-*` flags are provided, each individual flag overrides the corresponding file in the dataset. + + Experimental feature: this argument also accepts a path to Auspice JSON file. In this case the files to be treated as a Nextclade dataset. This requires Auspice JSON file which contains `.root_sequence.nuc` field. + + Please refer to Nextclade documentation for more details about Nextclade datasets and their files. * `-d`, `--dataset-name ` — Name of the dataset to download and use during the run -* `-r`, `--input-ref ` — Path to a FASTA file containing reference sequence. This file should contain exactly 1 sequence -* `-a`, `--input-tree ` — Path to Auspice JSON v2 file containing reference tree -* `-p`, `--input-pathogen-json ` — Path to a JSON file containing configuration and data specific to a pathogen -* `-m`, `--input-annotation ` — Path to a file containing genome annotation in GFF3 format -* `-g`, `--cds-selection ` — Comma-separated list of names of coding sequences (CDSes) to use -* `--input-pcr-primers ` — Path to a CSV file containing a list of custom PCR primer sites. This information is used to report mutations in these sites + + This is a convenience shortcut to first downloading a dataset and then immediately running with it. Providing this flag is equivalent to running 2 commands: `dataset get` followed by `run`, with the difference that the dataset files from the first command are not saved to disk and cannot be reused later. The default parameters are used for the dataset (e.g. default reference name and latest version tag). + + See `dataset get --help` and `dataset list --help` for more details. + + Note that when using this flag, the dataset will be downloaded on every run. If a new version of the dataset is released between two runs, they will use different versions of the dataset and may produce different results. For the most reproducible runs, and for more control, use the usual 2-step flow with `dataset get` followed by `run`. + + This flag is mutually exclusive with `--input_dataset` +* `-r`, `--input-ref ` — Path to a FASTA file containing reference sequence. This file should contain exactly 1 sequence. + + Overrides path to `reference.fasta` in the dataset (`--input-dataset`). + + Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin). +* `-a`, `--input-tree ` — Path to Auspice JSON v2 file containing reference tree. + + See https://nextstrain.org/docs/bioinformatics/data-formats. + + Overrides path to `tree.json` in the dataset (`--input-dataset`). + + Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin). +* `-p`, `--input-pathogen-json ` — Path to a JSON file containing configuration and data specific to a pathogen. + + Overrides path to `pathogen.json` in the dataset (`--input-dataset`). + + Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin). +* `-m`, `--input-annotation ` — Path to a file containing genome annotation in GFF3 format. + + Genome annotation is used to find coding regions. If not supplied, coding regions will not be translated, amino acid sequences will not be output, amino acid mutations will not be detected and nucleotide sequence alignment will not be informed by codon boundaries. + + List of CDSes can be restricted using `--cds-selection` argument. Otherwise, all CDSes found in the genome annotation will be used. + + Overrides genome annotation provided by the dataset (`--input-dataset` or `--dataset-name`). + + Learn more about Generic Feature Format Version 3 (GFF3): https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md + + Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin). +* `-g`, `--cds-selection ` — Comma-separated list of names of coding sequences (CDSes) to use. + + This defines which peptides will be written into outputs, and which CDS will be taken into account during codon-aware alignment and aminoacid mutations detection. Must only contain CDS names present in the genome annotation. + + If this flag is not supplied or its value is an empty string, then all CDSes found in the genome annotation will be used. +* `--input-pcr-primers ` — Path to a CSV file containing a list of custom PCR primer sites. This information is used to report mutations in these sites. + + Supports the following compression formats: "gz", "bz2", "xz", "zstd". Use "-" to read uncompressed data from standard input (stdin). * `--server ` — Use custom dataset server @@ -112,23 +166,129 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. -* `-O`, `--output-all ` — Produce all of the output files into this directory, using default basename and predefined suffixes and extensions. This is equivalent to specifying each of the individual `--output-*` flags. Convenient when you want to receive all or most of output files into the same directory and don't care about their filenames -* `-n`, `--output-basename ` — Set the base filename to use for output files -* `-s`, `--output-selection ` — Restricts outputs for `--output-all` flag +* `-O`, `--output-all ` — Produce all of the output files into this directory, using default basename and predefined suffixes and extensions. This is equivalent to specifying each of the individual `--output-*` flags. Convenient when you want to receive all or most of output files into the same directory and don't care about their filenames. + + Output files can be optionally included or excluded using `--output-selection` flag. The base filename can be set using `--output-basename` flag. + + If both the `--output-all` and individual `--output-*` flags are provided, each individual flag overrides the corresponding default output path. + + At least one of the output flags is required: `--output-all`, `--output-fasta`, `--output-ndjson`, `--output-json`, `--output-csv`, `--output-tsv`, `--output-tree`, `--output-translations`. + + If the required directory tree does not exist, it will be created. +* `-n`, `--output-basename ` — Set the base filename to use for output files. + + By default the base filename is extracted from the input sequences file (provided with `--input-fasta`). + + Only valid together with `--output-all` flag. +* `-s`, `--output-selection ` — Restricts outputs for `--output-all` flag. + + Should contain a comma-separated list of names of output files to produce. + + If 'all' is present in the list, then all other entries are ignored and all outputs are produced. + + Only valid together with `--output-all` flag. Possible values: `all`, `fasta`, `json`, `ndjson`, `csv`, `tsv`, `tree`, `tree-nwk`, `translations` -* `-o`, `--output-fasta ` — Path to output FASTA file with aligned sequences -* `-P`, `--output-translations ` — Template string for path to output fasta files containing translated and aligned peptides. A separate file will be generated for every gene -* `-N`, `--output-ndjson ` — Path to output Newline-delimited JSON (NDJSON) results file -* `-J`, `--output-json ` — Path to output JSON results file +* `-o`, `--output-fasta ` — Path to output FASTA file with aligned sequences. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. +* `-P`, `--output-translations ` — Template string for path to output fasta files containing translated and aligned peptides. A separate file will be generated for every gene. + + The string should contain template variable `{cds}`, where the gene name will be substituted. Make sure you properly quote and/or escape the curly braces, so that your shell, programming language or pipeline manager does not attempt to substitute the variables. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. + + Example for bash shell: + + --output-translations='output_dir/nextclade.cds_translation.{cds}.fasta' +* `-N`, `--output-ndjson ` — Path to output Newline-delimited JSON (NDJSON) results file. + + This file format is most suitable for further machine processing of the results. By contrast to plain json, it can be streamed line-by line, so much bigger outputs are feasible. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. +* `-J`, `--output-json ` — Path to output JSON results file. + + This file format is most suitable for further machine processing of the results. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. * `-c`, `--output-csv ` — Path to output CSV results file (delimiter: semicolon) + + This file format is most suitable for human inspection as well as for limited further machine processing of the results. + + CSV and TSV output files are equivalent and only differ in the column delimiters. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. * `-t`, `--output-tsv ` — Path to output TSV results file (delimiter: tab) -* `-C`, `--output-columns-selection ` — Restricts columns written into tabular output files (CSV and TSV) -* `--output-graph ` — Path to output phylogenetic graph with input sequences placed onto it, in Nextclade graph JSON format -* `-T`, `--output-tree ` — Path to output phylogenetic tree with input sequences placed onto it, in Auspice JSON V2 format + + This file format is most suitable for human inspection as well as for limited further machine processing of the results. + + CSV and TSV output files are equivalent and only differ in the column delimiters. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. +* `-C`, `--output-columns-selection ` — Restricts columns written into tabular output files (CSV and TSV). + + Should contain a comma-separated list of individual column names and/or column category names to include into both CSV and TSV outputs. + + If this flag is omitted, or if category 'all' is present in the list, then all other entries are ignored and all columns are written. + + Only valid together with one or multiple of flags: `--output-csv`, `--output-tsv`, `--output-all`. +* `--output-graph ` — Path to output phylogenetic graph with input sequences placed onto it, in Nextclade graph JSON format. + + Currently this format is not stable and not documented. It can change at any time without a warning. Use it at own risk. + + Due to format limitations, it is only feasible to construct the tree for at most a few hundred to a few thousand sequences. If the tree is not needed, omitting this flag reduces processing time and memory consumption. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. +* `-T`, `--output-tree ` — Path to output phylogenetic tree with input sequences placed onto it, in Auspice JSON V2 format. + + For file format description see: https://nextstrain.org/docs/bioinformatics/data-formats + + Due to format limitations, it is only feasible to construct the tree for at most a few hundred to a few thousand sequences. If the tree is not needed, omitting this flag reduces processing time and memory consumption. + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. * `--output-tree-nwk ` — Path to output phylogenetic tree with input sequences placed onto it, in Newick format (New Hampshire tree format) + For file format description see: https://en.wikipedia.org/wiki/Newick_format + + Takes precedence over paths configured with `--output-all`, `--output-basename` and `--output-selection`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write the uncompressed to standard output (stdout). + + If the required directory tree does not exist, it will be created. + * `--include-reference ` — Whether to include aligned reference nucleotide sequence into output nucleotide sequence FASTA file and reference peptides into output peptide FASTA files @@ -138,12 +298,22 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Possible values: `true`, `false` -* `--in-order ` — Emit output sequences in-order +* `--in-order ` — Emit output sequences in-order. + + With this flag the program will wait for results from the previous sequences to be written to the output files before writing the results of the next sequences, preserving the same order as in the input file. Due to variable sequence processing times, this might introduce unnecessary waiting times, but ensures that the resulting sequences are written in the same order as they occur in the inputs (except for sequences which have errors). By default, without this flag, processing might happen out of order, which is faster, due to the elimination of waiting, but might also lead to results written out of order - the order of results is not specified and depends on thread scheduling and processing times of individual sequences. + + This option is only relevant when `--jobs` is greater than 1 or is omitted. + + Note: the sequences which trigger errors during processing will be omitted from outputs, regardless of this flag. Possible values: `true`, `false` * `--replace-unknown ` — Replace unknown nucleotide characters with 'N' + By default, the sequences containing unknown nucleotide characters are skipped with a warning - they are not analyzed and not included into results. If this flag is provided, then before the alignment, all unknown characters are replaced with 'N'. This replacement allows to analyze these sequences. + + The following characters are considered known: '-', 'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M', 'N', 'R', 'S', 'T', 'V', 'W', 'Y' + Possible values: `true`, `false` * `--without-greedy-tree-builder ` — Disable greedy tree builder algorithm @@ -151,7 +321,9 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. Possible values: `true`, `false` * `--masked-muts-weight ` -* `--min-length ` — Minimum length of nucleotide sequence to consider for alignment +* `--min-length ` — Minimum length of nucleotide sequence to consider for alignment. + + If a sequence is shorter than that, alignment will not be attempted and a warning will be emitted. When adjusting this parameter, note that alignment of short sequences can be unreliable. * `--penalty-gap-extend ` — Penalty for extending a gap in alignment. If zero, all gaps regardless of length incur the same penalty * `--penalty-gap-open ` — Penalty for opening of a gap in alignment. A higher penalty results in fewer gaps and more mismatches. Should be less than `--penalty-gap-open-in-frame` to avoid gaps in genes * `--penalty-gap-open-in-frame ` — As `--penalty-gap-open`, but for opening gaps at the beginning of a codon. Should be greater than `--penalty-gap-open` and less than `--penalty-gap-open-out-of-frame`, to avoid gaps in genes, but favor gaps that align with codons @@ -186,9 +358,6 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. -* `--aa-group-spacing ` -* `--aa-group-padding ` -* `--other ` * `-j`, `--jobs ` — Number of processing jobs. If not specified, all available CPU threads will be used @@ -218,20 +387,44 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. ###### **Options:** -* `-n`, `--name ` — Restrict list to datasets with this *exact* name -* `-s`, `--search ` — Search datasets by name or by reference +* `-n`, `--name ` — Restrict list to datasets with this *exact* name. + + Can be used to test if a dataset exists. + + Mutually exclusive with --search +* `-s`, `--search ` — Search datasets by name or by reference. + + Will only display datasets containing this substring in their name (path), or either of attributes: "name", "reference name", "reference accession". + + Mutually exclusive with --name * `-t`, `--tag ` — Restrict list to datasets with this exact version tag * `--include-incompatible` — Include dataset versions that are incompatible with this version of Nextclade CLI -* `--include-deprecated` — Include deprecated datasets -* `--no-experimental` — Exclude experimental datasets -* `--no-community` — Exclude community datasets and only show official datasets -* `--json` — Print output in JSON format +* `--include-deprecated` — Include deprecated datasets. + + Authors can mark a dataset as deprecated to express that the dataset will no longer be updated and/or supported. Reach out to dataset authors for concrete details. +* `--no-experimental` — Exclude experimental datasets. + + Authors can mark a dataset as experimental when development of the dataset is still in progress, or if the dataset is incomplete or of lower quality than usual. Use at own risk. Reach out to dataset authors if interested in further development and stabilizing of a particular dataset, and consider contributing. +* `--no-community` — Exclude community datasets and only show official datasets. + + Community datasets are the datasets provided by the members of the broader Nextclade community. These datasets may vary in quality and completeness. Depending on authors' goals, these datasets may be created for specific purposes, rather than for general use. Nextclade team is unable to verify correctness of these datasets and does not provide support for them. For all questions regarding a concrete community dataset, please read its documentation and reach out to its authors. +* `--json` — Print output in JSON format. + + This is useful for automated processing. However, at this time, we cannot guarantee stability of the format. Use at own risk. * `--only-names` — Print only names of the datasets, without any other details -* `--server ` — Use custom dataset server +* `--server ` — Use custom dataset server. + + You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details. + + Default value: `https://data.master.clades.nextstrain.org/v3` * `-x`, `--proxy ` — Pass all traffic over proxy server. HTTP, HTTPS, and SOCKS5 proxies are supported * `--proxy-user ` — Username for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted * `--proxy-pass ` — Password for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted -* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle +* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle. + + You can also provide the path to CA certificates in the environment variable `NEXTCLADE_EXTRA_CA_CERTS`. The argument takes precedence over the environment variable if both are provided. + + Default CA certificates are those obtained from the platform/OS-level trust store plus those from a baked-in copy of Mozilla's common CA trust store. You can override the certs obtained from the platform trust store by setting `SSL_CERT_FILE` or `SSL_CERT_DIR`. Filenames in the latter must be hashed in the style of OpenSSL's `c_rehash` utility. @@ -249,14 +442,32 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. ###### **Options:** * `-n`, `--name ` — Name of the dataset to download. Type `nextclade dataset list` to view available datasets -* `-t`, `--tag ` — Version tag of the dataset to download -* `--server ` — Use custom dataset server -* `-o`, `--output-dir ` — Path to directory to write dataset files to -* `-z`, `--output-zip ` — Path to resulting dataset zip file +* `-t`, `--tag ` — Version tag of the dataset to download. + + If this flag is not provided the latest version is downloaded. +* `--server ` — Use custom dataset server. + + You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details. + + Default value: `https://data.master.clades.nextstrain.org/v3` +* `-o`, `--output-dir ` — Path to directory to write dataset files to. + + This flag is mutually exclusive with `--output-zip`, and provides the equivalent output, but in the form of a directory with files, instead of a compressed zip archive. + + If the required directory tree does not exist, it will be created. +* `-z`, `--output-zip ` — Path to resulting dataset zip file. + + This flag is mutually exclusive with `--output-dir`, and provides the equivalent output, but in the form of compressed zip archive instead of a directory with files. + + If the required directory tree does not exist, it will be created. * `-x`, `--proxy ` — Pass all traffic over proxy server. HTTP, HTTPS, and SOCKS5 proxies are supported * `--proxy-user ` — Username for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted * `--proxy-pass ` — Password for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted -* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle +* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle. + + You can also provide the path to CA certificates in the environment variable `NEXTCLADE_EXTRA_CA_CERTS`. The argument takes precedence over the environment variable if both are provided. + + Default CA certificates are those obtained from the platform/OS-level trust store plus those from a baked-in copy of Mozilla's common CA trust store. You can override the certs obtained from the platform trust store by setting `SSL_CERT_FILE` or `SSL_CERT_DIR`. Filenames in the latter must be hashed in the style of OpenSSL's `c_rehash` utility. @@ -274,30 +485,70 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. * `` — Path to one or multiple FASTA files with input sequences + Supports the following compression formats: "gz", "bz2", "xz", "zst". If no files provided, the plain fasta input is read from standard input (stdin). + + See: https://en.wikipedia.org/wiki/FASTA_format + ###### **Options:** -* `-m`, `--input-minimizer-index-json ` — Path to input minimizer index JSON file +* `-m`, `--input-minimizer-index-json ` — Path to input minimizer index JSON file. + + By default, the latest reference minimizer index is fetched from the dataset server (default or customized with `--server` argument). If this argument is provided, the algorithm skips fetching the default index and uses the index provided in the JSON file. + + Supports the following compression formats: "gz", "bz2", "xz", "zst". Use "-" to read uncompressed data from standard input (stdin). * `-O`, `--output-dir ` — Path to output directory -* `-o`, `--output-path ` — Template string for the file path to output sorted sequences. A separate file will be generated per dataset + + Sequences will be written in subdirectories: one subdirectory per dataset. Sequences inferred to be belonging to a particular dataset will be placed in the corresponding subdirectory. The subdirectory tree can be nested, depending on how dataset names are organized - dataset names can contain slashes, and they will be treated as path segment delimiters. + + If the required directory tree does not exist, it will be created. + + Mutually exclusive with `--output-path`. +* `-o`, `--output-path ` — Template string for the file path to output sorted sequences. A separate file will be generated per dataset. + + The string should contain template variable `{name}`, where the dataset name will be substituted. Note that if the `{name}` variable contains slashes, they will be interpreted as path segments and subdirectories will be created. + + Make sure you properly quote and/or escape the curly braces, so that your shell, programming language or pipeline manager does not attempt to substitute the variables. + + Mutually exclusive with `--output-dir`. + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. If the required directory tree does not exist, it will be created. + + Example for bash shell: + + --output='outputs/{name}/sorted.fasta.gz' * `-r`, `--output-results-tsv ` — Path to output results TSV file + + If the provided file path ends with one of the supported extensions: "gz", "bz2", "xz", "zst", then the file will be written compressed. Use "-" to write uncompressed to standard output (stdout). If the required directory tree does not exist, it will be created. * `--min-score ` — Minimum value of the score being considered for a detection Default value: `0.1` * `--min-hits ` — Minimum number of the index hits required for a detection Default value: `5` -* `--max-score-gap ` — Maximum score difference between two adjacent dataset matches, after which the less fitting datasets are not considered +* `--max-score-gap ` — Maximum score difference between two adjacent dataset matches, after which the less fitting datasets are not considered. + + This argument will truncate the list of datasets considered for a detection, such that if there is a large enough difference in score ("gap") in the list, all datasets that are worse than the dataset before the gap are removed from consideration. This allows, in situation when there's 2 or more groups of similar datasets, to filter-out the groups that are worse than the best group. Default value: `0.2` * `--all-matches` — Whether to consider all datasets + By default, only the top matching dataset is considered. When this flag is provided, all datasets reaching the matching criteria are considered. + Default value: `false` * `-j`, `--jobs ` — Number of processing jobs. If not specified, all available CPU threads will be used -* `--server ` — Use custom dataset server +* `--server ` — Use custom dataset server. + + You can host your own dataset server, with one or more datasets, grouped into dataset collections, and use this server to provide datasets to users of Nextclade CLI and Nextclade Web. Refer to Nextclade dataset documentation for more details. + + Default value: `https://data.master.clades.nextstrain.org/v3` * `-x`, `--proxy ` — Pass all traffic over proxy server. HTTP, HTTPS, and SOCKS5 proxies are supported * `--proxy-user ` — Username for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted * `--proxy-pass ` — Password for basic authentication on proxy server, if applicable. Only valid when `--proxy` is also supplied. `--proxy-user` and `--proxy-pass` must be either both specified or both omitted -* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle +* `--extra-ca-certs ` — Path to extra CA certificates as a PEM bundle. + + You can also provide the path to CA certificates in the environment variable `NEXTCLADE_EXTRA_CA_CERTS`. The argument takes precedence over the environment variable if both are provided. + + Default CA certificates are those obtained from the platform/OS-level trust store plus those from a baked-in copy of Mozilla's common CA trust store. You can override the certs obtained from the platform trust store by setting `SSL_CERT_FILE` or `SSL_CERT_DIR`. Filenames in the latter must be hashed in the style of OpenSSL's `c_rehash` utility. @@ -311,11 +562,15 @@ For short help type: `nextclade -h`, for extended help type: `nextclade --help`. ###### **Arguments:** -* `` — Genome annotation file in GFF3 format +* `` — Genome annotation file in GFF3 format. + + Learn more about Generic Feature Format Version 3 (GFF3): https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md ###### **Options:** -* `-o`, `--output ` — Path to output JSON or YAML file +* `-o`, `--output ` — Path to output JSON or YAML file. + + The format is chosen based on file extension: ".json" or ".yaml". * `--feature-tree` — Present features in "feature tree" format. This format is a precursor of genome annotation format - it contains all genetic features, even the ones that Nextclade does not use, but also less information about each feature * `--json` — Print console output in JSON format, rather than human-readable table