diff --git a/tools/trimal/.shed.yml b/tools/trimal/.shed.yml new file mode 100644 index 00000000000..5bff86d663f --- /dev/null +++ b/tools/trimal/.shed.yml @@ -0,0 +1,9 @@ +categories: +- Phylogenetics +description: Tool for automated alignment trimming +homepage_url: https://trimal.readthedocs.io +long_description: trimAl is a tool for the automated removal of spurious sequences + or poorly aligned regions from a multiple sequence alignment. +name: trimal +owner: iuc +remote_repository_url: https://github.com/inab/trimal diff --git a/tools/trimal/README.md b/tools/trimal/README.md new file mode 100644 index 00000000000..27037fa01e8 --- /dev/null +++ b/tools/trimal/README.md @@ -0,0 +1,3 @@ +# trimAl + +[trimAl](https://github.com/inab/trimal), a tool for automated alignment trimming in large-scale phylogenetic analyses. diff --git a/tools/trimal/test-data/custom_trimmed_example.009.AA.html b/tools/trimal/test-data/custom_trimmed_example.009.AA.html new file mode 100644 index 00000000000..5091edc18a8 --- /dev/null +++ b/tools/trimal/test-data/custom_trimmed_example.009.AA.html @@ -0,0 +1,74 @@ + +
+ ++ Selected Sequences: 9 /Selected Residues: 63 + Deleted Sequences: 0 /Deleted Residues: 122 + + Gaps Scores: =0= <.001 <.050 <.100 <.150 <.200 <.250 <.350 <.500 <.750 <1.00 =1= + Similarity Scores: =0= <1e-6 <1e-5 <1e-4 <.001 <.010 <.100 <.250 <.500 <.750 <1.00 =1= + + 10 20 30 40 50 60 70 80 90 100 110 120 + =========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+ + Csa004271 ---------------------------------MYMAMGHFFDRDDVALKNISEYFKECSEEEREHANKMIEFHNKRGGTTTYFPIKAPGSFDPANFNTIKAMNCALALEVNVNKSLLAL + Xtr21234 ----MISQVRQNYSHDCEAAVNRMVNLEMYASYTYLSMSHYFDRDDVALHHVAEFFKEQSKEERECAEKLMKCQNKRGGRIVLQDIKKPERDEWG--STLDAMQTALDLEKHVNQALLDL + LcaH ----MSSQVRQNFHQDCEAAINRQINLELYASYVYLSMAYYFDRDDQALHNFAKFFRHQSHEEREHAEKLMKLQNQRGGRIFLQDVRKPDRDEWG--SGVEALECALQLEKSVNQSLLDL + Hsa167996 MTTASTSQVRQNYHQDSEAAINRQINLELYASYVYLSMSYYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIKKPDCDDWE--SGLNAMECALHLEKNVNQSLLEL + Mmu024661 MTTASPSQVRQNYHQDAEAAINRQINLELYASYVYLSMSCYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIKKPDRDDWE--SGLNAMECALHLEKSVNQSLLEL + Dre37936 ---METSQIRQNYVRDCEAAINKMINLELYAGYTYTSMAHYFKRDDVALPGFAKFFKKNSEEEREHAEKFMEFQNKRGGRIVLQDIKKPDRDVWG--NGLIAMQCALQLEKNVNQALLDL + LcaM ----MESQVRQNYHRDCEAAVNRMVNMEMFASYTYTSMAFYFSRDDVALPGFSHFFKENSDEEREHAEKLLSFQNKRGGHIFLQDIKKPERDEWG--SGLEAMQCALQLKKNVNQALLDL + Tru14292 ----MESQVRQNYHRDCEAAINKMINMELYASYTYTSMAFFFSRDDVALPGFAHFFKENSDEEREHAEKLLSFQNKRGGRIFLQDIKKPERDEWG--SGLEAMQCALQLEKKVNQALLDL + Ola20972 ----MESQVRQNYHRDCEAAINRMVNMELFASYTYTSMAFYFDRDDVALPGFSHFFKENSHEEKEHADKLLSFQNKRGGRIFLQDVKKPERDEWG--SGLEAMQCALQLEKNVNQALLDL + + Selected Cols: + + Gaps Scores: + Similarity Scores: + + 130 140 150 160 170 180 + =========+=========+=========+=========+=========+=========+===== + Csa004271 HE--TANGDPEFQDFIEANFLHEQVDAIKKLKDYITNLKLVG---TGLGEFLFDKHFKSS----- + Xtr21234 HNLATERKDPHICDFLESEHLDEQVKHMKKFGDHITNLKRLGVPQNGMGEYLFDKHSLS------ + LcaH HKLCSDHNDPHLCDFIETHYLDEQVKSIKELADWVTNLRRMGAPQNGMAEYLFDKHTLGKES--S + Hsa167996 HKLATDKNDPHLCDFIETHYLNEQVKAIKELGDHVTNLRKMGAPESGLAEYLFDKHTLGDSDNES + Mmu024661 HKLATDKNDPHLCDFIETYYLSEQVKSIKELGDHVTNLRKMGAPEAGMAEYLFDKHTLGHGD-ES + Dre37936 HKLATEMGDPHLCDFLETHYLNEQVEAIKKLGDHITNLSKMDAGNNRMAEYLFDKHTLDS----- + LcaM HKLASDHGDPHLCDFLETHYLNEQVEAIKKLGDYISNLSRMDAQKNKMAEYLFDKHSLGGKS--- + Tru14292 HKLASDHVDPHLCDFLESHYLNEQVEAIKKLGDYITNLSRMDAQNNKMAEYLFDKHTLGSKS--- + Ola20972 HKVASDHKDPHMCDFLETHYLNEQVESIKKIGDHITNLTRMDAHTNKMAEYLFDKHTLGSKS--- + + Selected Cols: + + Gaps Scores: + Similarity Scores: ++ + diff --git a/tools/trimal/test-data/custom_trimmed_example.009.AA.phy b/tools/trimal/test-data/custom_trimmed_example.009.AA.phy new file mode 100644 index 00000000000..bf5f288872a --- /dev/null +++ b/tools/trimal/test-data/custom_trimmed_example.009.AA.phy @@ -0,0 +1,11 @@ + 9 63 +Csa004271 --------------YMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Xtr21234 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +LcaH SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Hsa167996 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Mmu024661 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Dre37936 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +LcaM SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Tru14292 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH +Ola20972 SQRQNDEAANNEAYYMFRDDALFSEEEAKNRGGPAALLVNLLLHDPDFELEQVKDNLELFDKH + diff --git a/tools/trimal/test-data/example.004.AA.fasta b/tools/trimal/test-data/example.004.AA.fasta new file mode 100644 index 00000000000..201d81fe00b --- /dev/null +++ b/tools/trimal/test-data/example.004.AA.fasta @@ -0,0 +1,18 @@ +>Sp8 +FPWNGLQIHMMGIII + +>Sp17 +FPWNGLQIHMMGIII + +>Sp10 +FPWNGLQIHMMGIII + +>Sp26 +FPWNGLQIHMMGIII + +>Sp33 +FPWNGLQIHMMGIII + +>Sp6 +FPWNGLQIHMMGIII + diff --git a/tools/trimal/test-data/example.009.AA.fasta b/tools/trimal/test-data/example.009.AA.fasta new file mode 100644 index 00000000000..a878bc2869e --- /dev/null +++ b/tools/trimal/test-data/example.009.AA.fasta @@ -0,0 +1,45 @@ +>Csa004271 +---------------------------------MYMAMGHFFDRDDVALKNISEYFKECS +EEEREHANKMIEFHNKRGGTTTYFPIKAPGSFDPANFNTIKAMNCALALEVNVNKSLLAL +HE--TANGDPEFQDFIEANFLHEQVDAIKKLKDYITNLKLVG---TGLGEFLFDKHFKSS +----- +>Xtr21234 +----MISQVRQNYSHDCEAAVNRMVNLEMYASYTYLSMSHYFDRDDVALHHVAEFFKEQS +KEERECAEKLMKCQNKRGGRIVLQDIKKPERDEWG--STLDAMQTALDLEKHVNQALLDL +HNLATERKDPHICDFLESEHLDEQVKHMKKFGDHITNLKRLGVPQNGMGEYLFDKHSLS- +----- +>LcaH +----MSSQVRQNFHQDCEAAINRQINLELYASYVYLSMAYYFDRDDQALHNFAKFFRHQS +HEEREHAEKLMKLQNQRGGRIFLQDVRKPDRDEWG--SGVEALECALQLEKSVNQSLLDL +HKLCSDHNDPHLCDFIETHYLDEQVKSIKELADWVTNLRRMGAPQNGMAEYLFDKHTLGK +ES--S +>Hsa167996 +MTTASTSQVRQNYHQDSEAAINRQINLELYASYVYLSMSYYFDRDDVALKNFAKYFLHQS +HEEREHAEKLMKLQNQRGGRIFLQDIKKPDCDDWE--SGLNAMECALHLEKNVNQSLLEL +HKLATDKNDPHLCDFIETHYLNEQVKAIKELGDHVTNLRKMGAPESGLAEYLFDKHTLGD +SDNES +>Mmu024661 +MTTASPSQVRQNYHQDAEAAINRQINLELYASYVYLSMSCYFDRDDVALKNFAKYFLHQS +HEEREHAEKLMKLQNQRGGRIFLQDIKKPDRDDWE--SGLNAMECALHLEKSVNQSLLEL +HKLATDKNDPHLCDFIETYYLSEQVKSIKELGDHVTNLRKMGAPEAGMAEYLFDKHTLGH +GD-ES +>Dre37936 +---METSQIRQNYVRDCEAAINKMINLELYAGYTYTSMAHYFKRDDVALPGFAKFFKKNS +EEEREHAEKFMEFQNKRGGRIVLQDIKKPDRDVWG--NGLIAMQCALQLEKNVNQALLDL +HKLATEMGDPHLCDFLETHYLNEQVEAIKKLGDHITNLSKMDAGNNRMAEYLFDKHTLDS +----- +>LcaM +----MESQVRQNYHRDCEAAVNRMVNMEMFASYTYTSMAFYFSRDDVALPGFSHFFKENS +DEEREHAEKLLSFQNKRGGHIFLQDIKKPERDEWG--SGLEAMQCALQLKKNVNQALLDL +HKLASDHGDPHLCDFLETHYLNEQVEAIKKLGDYISNLSRMDAQKNKMAEYLFDKHSLGG +KS--- +>Tru14292 +----MESQVRQNYHRDCEAAINKMINMELYASYTYTSMAFFFSRDDVALPGFAHFFKENS +DEEREHAEKLLSFQNKRGGRIFLQDIKKPERDEWG--SGLEAMQCALQLEKKVNQALLDL +HKLASDHVDPHLCDFLESHYLNEQVEAIKKLGDYITNLSRMDAQNNKMAEYLFDKHTLGS +KS--- +>Ola20972 +----MESQVRQNYHRDCEAAINRMVNMELFASYTYTSMAFYFDRDDVALPGFSHFFKENS +HEEKEHADKLLSFQNKRGGRIFLQDVKKPERDEWG--SGLEAMQCALQLEKNVNQALLDL +HKVASDHKDPHMCDFLETHYLNEQVESIKKIGDHITNLTRMDAHTNKMAEYLFDKHTLGS +KS--- \ No newline at end of file diff --git a/tools/trimal/test-data/trimmed_example.009.AA.html b/tools/trimal/test-data/trimmed_example.009.AA.html new file mode 100644 index 00000000000..ca5052d7871 --- /dev/null +++ b/tools/trimal/test-data/trimmed_example.009.AA.html @@ -0,0 +1,71 @@ + + + +
+ Selected Sequences: 9 /Selected Residues: 174 + Deleted Sequences: 0 /Deleted Residues: 11 + + Gaps Scores: =0= <.001 <.050 <.100 <.150 <.200 <.250 <.350 <.500 <.750 <1.00 =1= + + 10 20 30 40 50 60 70 80 90 100 110 120 + =========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+ + Csa004271 ---------------------------------MYMAMGHFFDRDDVALKNISEYFKECSEEEREHANKMIEFHNKRGGTTTYFPIKAPGSFDPANFNTIKAMNCALALEVNVNKSLLAL + Xtr21234 ----MISQVRQNYSHDCEAAVNRMVNLEMYASYTYLSMSHYFDRDDVALHHVAEFFKEQSKEERECAEKLMKCQNKRGGRIVLQDIKKPERDEWG--STLDAMQTALDLEKHVNQALLDL + LcaH ----MSSQVRQNFHQDCEAAINRQINLELYASYVYLSMAYYFDRDDQALHNFAKFFRHQSHEEREHAEKLMKLQNQRGGRIFLQDVRKPDRDEWG--SGVEALECALQLEKSVNQSLLDL + Hsa167996 MTTASTSQVRQNYHQDSEAAINRQINLELYASYVYLSMSYYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIKKPDCDDWE--SGLNAMECALHLEKNVNQSLLEL + Mmu024661 MTTASPSQVRQNYHQDAEAAINRQINLELYASYVYLSMSCYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIKKPDRDDWE--SGLNAMECALHLEKSVNQSLLEL + Dre37936 ---METSQIRQNYVRDCEAAINKMINLELYAGYTYTSMAHYFKRDDVALPGFAKFFKKNSEEEREHAEKFMEFQNKRGGRIVLQDIKKPDRDVWG--NGLIAMQCALQLEKNVNQALLDL + LcaM ----MESQVRQNYHRDCEAAVNRMVNMEMFASYTYTSMAFYFSRDDVALPGFSHFFKENSDEEREHAEKLLSFQNKRGGHIFLQDIKKPERDEWG--SGLEAMQCALQLKKNVNQALLDL + Tru14292 ----MESQVRQNYHRDCEAAINKMINMELYASYTYTSMAFFFSRDDVALPGFAHFFKENSDEEREHAEKLLSFQNKRGGRIFLQDIKKPERDEWG--SGLEAMQCALQLEKKVNQALLDL + Ola20972 ----MESQVRQNYHRDCEAAINRMVNMELFASYTYTSMAFYFDRDDVALPGFSHFFKENSHEEKEHADKLLSFQNKRGGRIFLQDVKKPERDEWG--SGLEAMQCALQLEKNVNQALLDL + + Selected Cols: + + Gaps Scores: + + 130 140 150 160 170 180 + =========+=========+=========+=========+=========+=========+===== + Csa004271 HE--TANGDPEFQDFIEANFLHEQVDAIKKLKDYITNLKLVG---TGLGEFLFDKHFKSS----- + Xtr21234 HNLATERKDPHICDFLESEHLDEQVKHMKKFGDHITNLKRLGVPQNGMGEYLFDKHSLS------ + LcaH HKLCSDHNDPHLCDFIETHYLDEQVKSIKELADWVTNLRRMGAPQNGMAEYLFDKHTLGKES--S + Hsa167996 HKLATDKNDPHLCDFIETHYLNEQVKAIKELGDHVTNLRKMGAPESGLAEYLFDKHTLGDSDNES + Mmu024661 HKLATDKNDPHLCDFIETYYLSEQVKSIKELGDHVTNLRKMGAPEAGMAEYLFDKHTLGHGD-ES + Dre37936 HKLATEMGDPHLCDFLETHYLNEQVEAIKKLGDHITNLSKMDAGNNRMAEYLFDKHTLDS----- + LcaM HKLASDHGDPHLCDFLETHYLNEQVEAIKKLGDYISNLSRMDAQKNKMAEYLFDKHSLGGKS--- + Tru14292 HKLASDHVDPHLCDFLESHYLNEQVEAIKKLGDYITNLSRMDAQNNKMAEYLFDKHTLGSKS--- + Ola20972 HKVASDHKDPHMCDFLETHYLNEQVESIKKIGDHITNLTRMDAHTNKMAEYLFDKHTLGSKS--- + + Selected Cols: + + Gaps Scores: ++ + diff --git a/tools/trimal/test-data/trimmed_example.009.AA.mega b/tools/trimal/test-data/trimmed_example.009.AA.mega new file mode 100644 index 00000000000..2ce931d48c5 --- /dev/null +++ b/tools/trimal/test-data/trimmed_example.009.AA.mega @@ -0,0 +1,58 @@ +#MEGA +!Title ./test-data/example.009.AA.fasta; +!Format DataType=protein NSeqs=9 Nsites=174 indel=- CodeTable=Standard; + +#Csa004271 +---------- ---------- ---------M YMAMGHFFDR DDVALKNISE +YFKECSEEER EHANKMIEFH NKRGGTTTYF PIKAPGSFDP ANTIKAMNCA +LALEVNVNKS LLALHE--TA NGDPEFQDFI EANFLHEQVD AIKKLKDYIT +NLKLVG---T GLGEFLFDKH FKSS + +#Xtr21234 +MISQVRQNYS HDCEAAVNRM VNLEMYASYT YLSMSHYFDR DDVALHHVAE +FFKEQSKEER ECAEKLMKCQ NKRGGRIVLQ DIKKPERDEW GSTLDAMQTA +LDLEKHVNQA LLDLHNLATE RKDPHICDFL ESEHLDEQVK HMKKFGDHIT +NLKRLGVPQN GMGEYLFDKH SLS- + +#LcaH +MSSQVRQNFH QDCEAAINRQ INLELYASYV YLSMAYYFDR DDQALHNFAK +FFRHQSHEER EHAEKLMKLQ NQRGGRIFLQ DVRKPDRDEW GSGVEALECA +LQLEKSVNQS LLDLHKLCSD HNDPHLCDFI ETHYLDEQVK SIKELADWVT +NLRRMGAPQN GMAEYLFDKH TLGK + +#Hsa167996 +STSQVRQNYH QDSEAAINRQ INLELYASYV YLSMSYYFDR DDVALKNFAK +YFLHQSHEER EHAEKLMKLQ NQRGGRIFLQ DIKKPDCDDW ESGLNAMECA +LHLEKNVNQS LLELHKLATD KNDPHLCDFI ETHYLNEQVK AIKELGDHVT +NLRKMGAPES GLAEYLFDKH TLGD + +#Mmu024661 +SPSQVRQNYH QDAEAAINRQ INLELYASYV YLSMSCYFDR DDVALKNFAK +YFLHQSHEER EHAEKLMKLQ NQRGGRIFLQ DIKKPDRDDW ESGLNAMECA +LHLEKSVNQS LLELHKLATD KNDPHLCDFI ETYYLSEQVK SIKELGDHVT +NLRKMGAPEA GMAEYLFDKH TLGH + +#Dre37936 +ETSQIRQNYV RDCEAAINKM INLELYAGYT YTSMAHYFKR DDVALPGFAK +FFKKNSEEER EHAEKFMEFQ NKRGGRIVLQ DIKKPDRDVW GNGLIAMQCA +LQLEKNVNQA LLDLHKLATE MGDPHLCDFL ETHYLNEQVE AIKKLGDHIT +NLSKMDAGNN RMAEYLFDKH TLDS + +#LcaM +MESQVRQNYH RDCEAAVNRM VNMEMFASYT YTSMAFYFSR DDVALPGFSH +FFKENSDEER EHAEKLLSFQ NKRGGHIFLQ DIKKPERDEW GSGLEAMQCA +LQLKKNVNQA LLDLHKLASD HGDPHLCDFL ETHYLNEQVE AIKKLGDYIS +NLSRMDAQKN KMAEYLFDKH SLGG + +#Tru14292 +MESQVRQNYH RDCEAAINKM INMELYASYT YTSMAFFFSR DDVALPGFAH +FFKENSDEER EHAEKLLSFQ NKRGGRIFLQ DIKKPERDEW GSGLEAMQCA +LQLEKKVNQA LLDLHKLASD HVDPHLCDFL ESHYLNEQVE AIKKLGDYIT +NLSRMDAQNN KMAEYLFDKH TLGS + +#Ola20972 +MESQVRQNYH RDCEAAINRM VNMELFASYT YTSMAFYFDR DDVALPGFSH +FFKENSHEEK EHADKLLSFQ NKRGGRIFLQ DVKKPERDEW GSGLEAMQCA +LQLEKNVNQA LLDLHKVASD HKDPHMCDFL ETHYLNEQVE SIKKIGDHIT +NLTRMDAHTN KMAEYLFDKH TLGS + diff --git a/tools/trimal/trimal.xml b/tools/trimal/trimal.xml new file mode 100644 index 00000000000..abffeaf4f37 --- /dev/null +++ b/tools/trimal/trimal.xml @@ -0,0 +1,131 @@ +