-
Notifications
You must be signed in to change notification settings - Fork 440
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3637da0
commit 6d07b87
Showing
2 changed files
with
51 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,14 +109,14 @@ mmseqs easy-linclust | |
<option value="nucleotide">Nucleotide</option> | ||
</param> | ||
<when value="amino_acid"> | ||
<param name="alph_size_amino_acid" type="integer" min="2" max="5" value="5" label="Alphabet size" help=""/> | ||
<param argument="--alph-size" name="alph_size_amino_acid" type="integer" min="2" max="21" value="21" label="Alphabet size" help=""/> | ||
<param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/> | ||
<param name="kmer_per_seq_scale" type="float" min="0" value="0.000" label="Scale k-mer per sequence based on sequence length" help=""/> | ||
<param argument="--kmer-per-seq-scale" type="float" min="0" value="0.000" label="Scale k-mer per sequence based on sequence length" help=""/> | ||
</when> | ||
<when value="nucleotide"> | ||
<param name="alph_size_nucleotide" type="integer" min="2" max="21" value="21" label="Alphabet size" help=""/> | ||
<param name="zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> | ||
<param name="kmer_per_seq_scale" type="float" min="0" value="0.200" label="Scale k-mer per sequence based on sequence length" help=""/> | ||
<param argument="--alph-size" name="alph_size_nucleotide" type="integer" min="2" max="5" value="5" label="Alphabet size" help=""/> | ||
<param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> | ||
<param argument="--kmer-per-seq-scale" type="float" min="0" value="0.200" label="Scale k-mer per sequence based on sequence length" help=""/> | ||
<param argument="--adjust-kmer-len" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Adjust k-mer length based on specificity" help=""/> | ||
</when> | ||
</conditional> | ||
|
@@ -129,10 +129,10 @@ mmseqs easy-linclust | |
<option value="4">Query seq. length has to be at least x% of target length</option> | ||
<option value="5">Short seq. needs to be at least x% of the other seq. length</option> | ||
</param> | ||
<param name="cov" type="float" min="0" value="0.800" label="List matches above this fraction of aligned (covered) residues" help="(-c)"/> | ||
<param argument="-c" name="cov" type="float" min="0" value="0.800" label="List matches above this fraction of aligned (covered) residues" help=""/> | ||
<section name="prefilter" title="Pre-filter"> | ||
<param argument="--add-self-matches" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Artificially add entries of queries with themselves (for clustering)" help=""/> | ||
<param name="kmer_length" type="integer" min="0" value="0" label="k-mer length" help="(0: automatically set to optimum)"/> | ||
<param argument="-k" name="kmer_length" type="integer" min="0" value="0" label="k-mer length" help="(0: automatically set to optimum)"/> | ||
<param argument="--mask" type="select" label="Mask sequences in k-mer stage" help=""> | ||
<option value="0">Without low complexity masking</option> | ||
<option value="1" selected="true">With low complexity masking</option> | ||
|
@@ -148,7 +148,7 @@ mmseqs easy-linclust | |
</param> | ||
</section> | ||
<section name="align" title="Align"> | ||
<param name="convertalis" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add backtrace string" help="Convert to alignments with mmseqs convertalis module (-a)"/> | ||
<param argument="-a" name="convertalis" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add backtrace string" help="Convert to alignments with mmseqs convertalis module"/> | ||
<param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" > | ||
<option value="0" selected="true">Automatic</option> | ||
<option value="1">Only score and end_pos</option> | ||
|
@@ -165,16 +165,16 @@ mmseqs easy-linclust | |
<option value="5">score only (output) cluster format</option> | ||
</param> | ||
<param argument="--wrapped-scoring" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Double the (nucleotide) query sequence during the scoring process" help="Allow wrapped diagonal scoring around end and start"/> | ||
<param name="evalue" type="float" min="0" value="1.000E-03" label="E-value threshold" help="List matches below this E-value (-e)"/> | ||
<param argument="-e" name="evalue" type="float" min="0" value="1.000E-03" label="E-value threshold" help="List matches below this E-value"/> | ||
<param argument="--min-aln-len" type="integer" min="0" value="0" label="Minimum alignment length" help=""/> | ||
<param argument="--seq-id-mode" type="select" label="Sequence identity mode" help="" > | ||
<option value="0" selected="true">Alignment length</option> | ||
<option value="1">Shorter</option> | ||
<option value="2">Longer sequence</option> | ||
</param> | ||
<param argument="--alt-ali" type="integer" min="0" value="0" label="Show up to this many alternative alignments" help=""/> | ||
<param argument="--max-rejected" type="integer" min="0" value="2147483647" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/> | ||
<param argument="--max-accept" type="integer" min="0" value="2147483647" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/> | ||
<param argument="--max-rejected" type="integer" min="0" value="2147483647" optional="true" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/> | ||
<param argument="--max-accept" type="integer" min="0" value="2147483647" optional="true" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/> | ||
<param argument="--score-bias" type="float" value="0" label="Score bias when computing Smith-Waterman alignment" help=""/> | ||
<param argument="--realign" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Compute more conservative, shorter alignments" help="Scores and E-values not changed"/> | ||
<param argument="--realign-score-bias" type="float" value="-0.200" label="Additional bias when computing realignment" help=""/> | ||
|
@@ -221,7 +221,7 @@ mmseqs easy-linclust | |
<param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> | ||
</section> | ||
<section name="common" title="Common"> | ||
<param argument="--max-seq-len" type="integer" min="0" value="65535" label="Maximum sequence length" help=""/> | ||
<param argument="--max-seq-len" type="integer" min="0" value="65535" optional="true" label="Maximum sequence length" help=""/> | ||
</section> | ||
<section name="expert" title="Expert"> | ||
<param argument="--filter-hits" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter hits by seq.id. and coverage" help=""/> | ||
|
@@ -231,7 +231,7 @@ mmseqs easy-linclust | |
</param> | ||
</section> | ||
<section name="output_files" title="Selection of the output files"> | ||
<param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection"> | ||
<param name="output_selection" type="select" min="1" display="checkboxes" multiple="true" label="Output files selection"> | ||
<option value="file_rep_seq" selected="true">Representatives sequences in fasta</option> | ||
<option value="file_all_seq" selected="true">FASTA-like per cluster</option> | ||
<option value="file_cluster_tsv" selected="true">Adjecency list in TSV</option> | ||
|
@@ -261,7 +261,8 @@ mmseqs easy-linclust | |
<output name="output_all_seq" ftype="fasta"> | ||
<assert_contents> | ||
<has_text text="GAATAGCGGGACGCCAAGGGGCGGCCTTGCGTCCGCCCACGTGTGTGCTTGGCACGCGGGGCGTCCGCAAACCTTTGATCGGAACTTGCGATGGAGAAGCT"/> | ||
<has_size value="627000" delta="50000"/> | ||
<has_size value="627000" delta="20000"/> | ||
<has_n_lines n="14806" delta="500"/> | ||
</assert_contents> | ||
</output> | ||
<output name="output_cluster" ftype="tabular"> | ||
|
@@ -286,15 +287,8 @@ It can perform profile searches with the same sensitivity as PSI-BLAST at over 4 | |
MMseqs easy-linclust is useful to clusters entries from a FASTA/FASTQ file using the cascaded clustering algorithm. | ||
It offers an efficient clustering workflow, scaling linearly with input size. Similar to easy-cluster, but more suitable for handling very large datasets efficiently. | ||
https://github.com/soedinglab/MMseqs2 | ||
By Martin Steinegger <[email protected]> & Milot Mirdita <[email protected]> & Florian Breitwieser <[email protected]> & Eli Levy Karin <[email protected]> | ||
----- | ||
**References** | ||
- Steinegger M, Soding J: MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nature Biotechnology, 35(11), 1026-1028 (2017) | ||
- Mirdita M, Steinegger M, Breitwieser F, Soding J, Levy Karin E: Fast and sensitive taxonomic assignment to metagenomic contigs. Bioinformatics, btab184 (2021) | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
Oops, something went wrong.