Skip to content

Commit

Permalink
v.1.3.3
Browse files Browse the repository at this point in the history
Add possibility to estimate gene trees using 'by exon' partitioned dataset. A *.part file is prepared when concatenating *.mafft alignments in script 4 and later used in RAxML gene tree building in script 6a.
  • Loading branch information
tomas-fer authored Feb 6, 2017
1 parent 5e1909e commit 5b804dd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
14 changes: 11 additions & 3 deletions HybPhyloMaker4_processpslx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# ********************************************************************************
# * HybPhyloMaker - Pipeline for Hyb-Seq data processing and tree building *
# * Script 04 - Process pslx files *
# * v.1.3.2 *
# * v.1.3.3 *
# * Tomas Fer, Dept. of Botany, Charles University, Prague, Czech Republic, 2016 *
# * [email protected] *
# * based on Weitemier et al. (2014), Applications in Plant Science 2(9): 1400042*
Expand Down Expand Up @@ -324,8 +324,10 @@ if [[ $cp =~ "no" ]]; then
#Copy script
if [[ $location == "1" ]]; then
cp -r $source/catfasta2phyml.pl .
cp -r $source/AMAS.py .
else
cp -r ../$source/catfasta2phyml.pl .
cp -r ../$source/AMAS.py .
fi
#Modify mafft file names (from, i.e., To_align_Assembly_10372_Contig_1_516.fasta.mafft to To_align_Assembly_10372_*mafft)
#(all files starting with "To_align_Assembly_10372_" will be merged)
Expand All @@ -347,8 +349,14 @@ if [[ $cp =~ "no" ]]; then
#transform fasta to phylip format, copy results from scratch to home
cat fileForLoop.txt | while read -r a b
do
perl catfasta2phyml.pl -f $a > $b.fasta
perl catfasta2phyml.pl $b.fasta > $b.phylip
#concatenate exons from the same locus
python3 AMAS.py concat -i $a -f fasta -d dna -u fasta -t ${b}.fasta >/dev/null
python3 AMAS.py concat -i $a -f fasta -d dna -u phylip -t ${b}.phylip >/dev/null
#modify and rename partitions.txt
sed -i.bak -e 's/^/DNA, /g' -e 's/_To_align//g' partitions.txt
mv partitions.txt ${b}.part
#perl catfasta2phyml.pl -f $a > $b.fasta
#perl catfasta2phyml.pl $b.fasta > $b.phylip
if [[ $location == "1" ]]; then
cp $b.* $path/$type/70concatenated_exon_alignments
else
Expand Down
32 changes: 26 additions & 6 deletions HybPhyloMaker6a_RAxML_for_selected.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# ********************************************************************************
# * HybPhyloMaker - Pipeline for Hyb-Seq data processing and tree building *
# * Script 06a - RAxML gene tree building *
# * v.1.3.2 *
# * v.1.3.3 *
# * Tomas Fer, Dept. of Botany, Charles University, Prague, Czech Republic, 2015 *
# * [email protected] *
# ********************************************************************************
Expand Down Expand Up @@ -164,11 +164,14 @@ if [[ $location == "1" || $location == "2" ]]; then
echo 'SPECIESPRESENCE='"$SPECIESPRESENCE" >> ${group}.sh
echo 'type='"$type" >> ${group}.sh
echo 'location='"$location" >> ${group}.sh
echo 'genetreepart='"$genetreepart" >> ${group}.sh
echo 'raxmlpthreads='"$raxmlpthreads" >> ${group}.sh
echo 'cp '"$path"'/$type/71selected${MISSINGPERCENT}/'"$group"' .' >> ${group}.sh
echo 'cp '"$source"'/catfasta2phyml.pl .' >> ${group}.sh
echo 'for i in $(cat '"$group"')' >> ${group}.sh
echo 'do' >> ${group}.sh
echo ' cp '"$path"'/$type/71selected${MISSINGPERCENT}/deleted_above${MISSINGPERCENT}/${i}_modif${MISSINGPERCENT}.fas .' >> ${group}.sh
echo ' cp $path/$type/70concatenated_exon_alignments/${i}.part .' >> ${group}.sh
echo ' #Substitute '"'('"' by '"'_'"' and '"')'"' by nothing ('"'('"' and '"')'"' not allowed in RAxML)' >> ${group}.sh
echo ' sed -i '"'s/(/_/g'"' ${i}_modif${MISSINGPERCENT}.fas' >> ${group}.sh
echo ' sed -i '"'s/)//g'"' ${i}_modif${MISSINGPERCENT}.fas' >> ${group}.sh
Expand All @@ -179,13 +182,22 @@ if [[ $location == "1" || $location == "2" ]]; then
echo 'done' >> ${group}.sh
echo '#Make a list of all phylip files' >> ${group}.sh
echo 'ls *.phylip | cut -d"." -f1 > FileForRAxML.txt' >> ${group}.sh
echo 'for file in $(cat FileForRAxML.txt)' >> ${group}.sh
echo 'do' >> ${group}.sh
echo 'for file in $(cat FileForRAxML.txt); do' >> ${group}.sh
echo ' #modify name for partition file (remove '_modif${MISSINGPERCENT}')' >> ${group}.sh
echo ' filepart=$(sed "s/_modif${MISSINGPERCENT}//" <<< $file)' >> ${group}.sh
echo ' #RAxML with 100 rapid bootstrap' >> ${group}.sh
echo ' if [[ $location == "1" ]]; then' >> ${group}.sh
echo ' raxmlHPC-PTHREADS -T $TORQUE_RESC_TOTAL_PROCS -f a -s $file.phylip -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' if [[ $genetreepart == "yes" ]]; then' >> ${group}.sh
echo ' raxmlHPC-PTHREADS -T $TORQUE_RESC_TOTAL_PROCS -f a -s $file.phylip -q $filepart.part -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' else' >> ${group}.sh
echo ' raxmlHPC-PTHREADS -T $TORQUE_RESC_TOTAL_PROCS -f a -s $file.phylip -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' fi' >> ${group}.sh
echo ' elif [[ $location == "2" ]]; then' >> ${group}.sh
echo ' $raxmlpthreads -T $NSLOTS -f a -s $file.phylip -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' if [[ $genetreepart == "yes" ]]; then' >> ${group}.sh
echo ' $raxmlpthreads -T $NSLOTS -f a -s $file.phylip -q $filepart.part -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' else' >> ${group}.sh
echo ' $raxmlpthreads -T $NSLOTS -f a -s $file.phylip -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100' >> ${group}.sh
echo ' fi' >> ${group}.sh
echo ' fi' >> ${group}.sh
echo ' cp *$file.result '"$path"'/$type/72trees'"${MISSINGPERCENT}_${SPECIESPRESENCE}"'/RAxML' >> ${group}.sh
echo 'done' >> ${group}.sh
Expand Down Expand Up @@ -216,6 +228,7 @@ else
for i in $(cat selected_genes_${MISSINGPERCENT}_${SPECIESPRESENCE}.txt)
do
cp $path/$type/71selected${MISSINGPERCENT}/deleted_above${MISSINGPERCENT}/${i}_modif${MISSINGPERCENT}.fas .
cp $path/$type/70concatenated_exon_alignments/${i}.part .
#Substitute '(' by '_' and ')' by nothing ('(' and ')' not allowed in RAxML)
sed -i.bak 's/(/_/g' ${i}_modif${MISSINGPERCENT}.fas
sed -i.bak 's/)//g' ${i}_modif${MISSINGPERCENT}.fas
Expand All @@ -232,7 +245,14 @@ else
calculating=$((calculating + 1))
echo -e "\nProcessing file: ${file}" >> raxml.log
echo -e "Processing file: ${file} ($calculating out of $numbertrees)"
$raxmlpthreads -T $numbcores -f a -s $file.fas -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100 >> raxml.log
#modify name for partition file (remove '_modif${MISSINGPERCENT}'
filepart=$(sed "s/_modif${MISSINGPERCENT}//" <<< $file)
#run RAxML
if [[ $genetreepart == "yes" ]]; then
$raxmlpthreads -T $numbcores -f a -s $file.fas -q $filepart.part -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100 >> raxml.log
else
$raxmlpthreads -T $numbcores -f a -s $file.fas -q -n $file.result -m GTRCAT -p 1234 -x 1234 -N 100 >> raxml.log
fi
cp *$file.result $path/$type/72trees${MISSINGPERCENT}_${SPECIESPRESENCE}/RAxML
done
#Copy raxml.log to home
Expand Down

0 comments on commit 5b804dd

Please sign in to comment.