Skip to content

Commit

Permalink
ls6_launcher added
Browse files Browse the repository at this point in the history
  • Loading branch information
z0on committed Feb 18, 2022
1 parent 5063c8e commit f0bfd54
Show file tree
Hide file tree
Showing 2 changed files with 477 additions and 16 deletions.
32 changes: 16 additions & 16 deletions automatic_RAD_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ $HOME/edirect/esearch -db sra -query $BioProject | efetch --format runinfo |cut
for A in `cat $BioProject.SRR`;do
echo "fastq-dump-orig.2.10.5 $A">>gets;
done
ls5_launcher_creator.py -j gets -n gets -a tagmap -e [email protected] -t 12:00:00 -w 24 -q normal
s2_launcher_creator.py -j gets -n gets -a tagmap -e [email protected] -t 12:00:00 -w 24 -q normal
getsjob=$(sbatch gets.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

Q= -log10(Perror)*10
Expand Down Expand Up @@ -120,25 +120,25 @@ export GENOME_REF=all_cc.fasta
for file in *.fastq; do
echo "cutadapt --format fastq -q 15,15 -a AGATCGGA -m $TagLen -l $TagLen -o ${file/.fastq/}.trim0 $file > ${file}_trimlog.txt && head -12000000 ${file/.fastq/}.trim0 > ${file/.fastq/}.trim && rm ${file/.fastq/}.trim0" >> trim;
done
ls5_launcher_creator.py -j trim -n trim -a tagmap -e [email protected] -t 1:00:00 -w 48 -q normal
s2_launcher_creator.py -j trim -n trim -a tagmap -e [email protected] -t 1:00:00 -w 48 -q normal
trimjob=$(sbatch trim.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

# converting fastq to fasta (for kmer analysis)
>f2f
for F in `ls *fastq`; do echo "paste - - - - < ${F/.fastq/}.trim | cut -f 1,2 | sed 's/^@/>/' | tr \"\t\" \"\n\" > ${F/.fastq/}.fasta" >>f2f ;done
ls5_launcher_creator.py -j f2f -n f2f -a tagmap -e [email protected] -t 0:05:00 -w 48 -q normal
s2_launcher_creator.py -j f2f -n f2f -a tagmap -e [email protected] -t 0:05:00 -w 48 -q normal
f2fjob=$(sbatch --dependency=afterok:$trimjob f2f.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

# analyzing kmers, removing singletons from each file (kmerer.pl)
>jelly
for F in `ls *fastq`; do echo "jellyfish count -m $TagLen -s 100M -t 1 -C ${F/.fastq/}.fasta -o ${F/.fastq/}.jf && jellyfish dump ${F/.fastq/}.jf | kmerer.pl - 2 > ${F/.fastq/}.kmers.fa">>jelly;done
ls5_launcher_creator.py -j jelly -n jelly -a tagmap -e [email protected] -t 0:10:00 -w 24 -q normal
s2_launcher_creator.py -j jelly -n jelly -a tagmap -e [email protected] -t 0:10:00 -w 24 -q normal
jellyjob=$(sbatch --dependency=afterok:$f2fjob jelly.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

# merging kmer lists and filtering kmers aiming to get major alleles
# clustering, concatenating into fake genome (10 chromosomes), and indexing it
echo "ls *kmers.fa > all.kf && mergeKmers.pl all.kf minDP=10 minInd=5 > kmers.tab && cat kmers.tab | shuf | awk '{print \">\"\$1\"\n\"\$2}' > all.fasta && cd-hit-est -i all.fasta -o all.clust -aL 1 -aS 1 -g 1 -c $MatchFrac -M 0 -T 0 && concatFasta.pl fasta=all.clust num=10 && bowtie2-build all_cc.fasta all_cc.fasta && samtools faidx all_cc.fasta" >mk
ls5_launcher_creator.py -j mk -n mk -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j mk -n mk -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
mergejob=$(sbatch --dependency=afterok:$jellyjob mk.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
#mergejob=$(sbatch mk.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

Expand All @@ -148,7 +148,7 @@ for F in `ls *.fastq`; do
REF=all_cc.fasta
echo "bowtie2 --no-unal -x $REF -U ${F/.fastq/}.trim -S ${F/.fastq/}.sam && samtools sort -O bam -o ${F/.fastq/}.bam ${F/.fastq/}.sam && samtools index ${F/.fastq/}.bam">>maps
done
ls5_launcher_creator.py -j maps -n maps -a tagmap -e [email protected] -t 2:00:00 -w 24 -q normal
s2_launcher_creator.py -j maps -n maps -a tagmap -e [email protected] -t 2:00:00 -w 24 -q normal
mapsjob=$(sbatch --dependency=afterok:$mergejob maps.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

# quality assessment, removing bams with log(coverage)<3SD
Expand All @@ -158,7 +158,7 @@ FILTERSQ='-uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minInd $MI'
TODOQ="-doQsDist 1 -doDepth 1 -doCounts 1 -dumpCounts 2"
echo 'export NIND=`cat bams | wc -l`; export MI=`echo "($NIND*$MinIndPerc+0.5)/1" | bc`' >calc1
echo "ls *.bam > bams && source calc1 && angsd -b bams -r chr1:1-1000000 -GL 1 $FILTERSQ $TODOQ -P 12 -out dd && Rscript ~/bin/plotQC.R prefix=dd">a0
ls5_launcher_creator.py -j a0 -n a0 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j a0 -n a0 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
qjob=$(sbatch --dependency=afterok:$mapsjob a0.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

#------------ examine dd.pdf, decide on MinIndPerc and whether bams.qc is reasonable
Expand All @@ -173,7 +173,7 @@ FILTERS0='-minInd $MI -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pva
TODO0='-doMajorMinor 1 -doMaf 1 -doCounts 1 -makeMatrix 1 -doIBS 1 -doCov 1 -doPost 1 -doGlf 2'
echo 'export NIND=`cat bams.qc | wc -l`; export MI=`echo "($NIND*$MinIndPerc+0.5)/1" | bc`' >calc1
echo "source calc1 && angsd -b bams.qc -GL 1 $FILTERS0 $TODO0 -P 12 -out myresult && Rscript ~/bin/detect_clones.R bams.qc myresult.ibsMat 0.15">a1
ls5_launcher_creator.py -j a1 -n a1 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j a1 -n a1 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
# a1job=$(sbatch --dependency=afterok:$qjob a1.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
a1job=$(sbatch a1.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

Expand All @@ -187,7 +187,7 @@ FILTERS1='-minInd $MI2 -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pv
TODO1='-doMajorMinor 1 -doMaf 1 -doCounts 1 -makeMatrix 1 -doIBS 1 -doCov 1 -doPost 1 -doGlf 2'
echo 'cat bams.nr | sort > bams.NR && mv bams.NR bams.nr && export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`; export MAF=`echo "3/(2*$NIND2)" | bc -l`' >calc2
echo "source calc2 && angsd -b bams.nr -GL 1 $FILTERS1 $TODO1 -P 12 -out myresult2 && Rscript ~/bin/pcaStructure.R myresult2.ibsMat">a2
ls5_launcher_creator.py -j a2 -n a2 -a tagmap -e [email protected] -t 2:00:00 -w 1
s2_launcher_creator.py -j a2 -n a2 -a tagmap -e [email protected] -t 2:00:00 -w 1
-q normal
a2job=$(sbatch --dependency=afterok:$a1job a2.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
#a2job=$(sbatch a2.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
Expand All @@ -196,7 +196,7 @@ a2job=$(sbatch --dependency=afterok:$a1job a2.slurm | grep "Submitted batch job"

module load python2
echo 'python ~/pcangsd/pcangsd.py -beagle myresult2.beagle.gz -admix -o pcangsd -inbreed 2 -kinship -selection -threads 12' >adm
ls5_launcher_creator.py -j adm -n adm -a tagmap -e [email protected] -t 0:10:00 -w 1 -q normal
s2_launcher_creator.py -j adm -n adm -a tagmap -e [email protected] -t 0:10:00 -w 1 -q normal
admjob=$(sbatch --dependency=afterok:$a2job adm.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

# ------- genetic diversity stats based on all well-genotyped sites (including invariants)
Expand All @@ -209,21 +209,21 @@ FILTERS='-minInd $MI2 -uniqueOnly 1 -skipTriallelic 1 -minMapQ 20 -minQ 20 -doHW
TODO="-doSaf 1 -anc $REF -ref $GENOME_REF -doMajorMinor 1 -doMaf 1 -dosnpstat 1 -doPost 1 -doGlf 2"
echo 'export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`' >calc2
echo "source calc2 && angsd -b bams.nr -r chr5 -GL 1 -P 12 $FILTERS $TODO -out chr5 && realSFS chr5.saf.idx -P 12 -fold 1 > chr5.sfs && realSFS saf2theta chr5.saf.idx -outname chr5 -sfs chr5.sfs -fold 1 && thetaStat do_stat chr5.thetas.idx -outnames chr5 && grep \"chr\" chr5.pestPG | awk '{ print \$4/\$14}' >piPerChrom">sfsj
ls5_launcher_creator.py -j sfsj -n sfsj -t 2:00:00 -e [email protected] -w 1 -a tagmap -q normal
s2_launcher_creator.py -j sfsj -n sfsj -t 2:00:00 -e [email protected] -w 1 -a tagmap -q normal
sfsjob=$(sbatch --dependency=afterok:$a1job sfsj.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
#sfsjob=$(sbatch sfsj.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')


# individual heterozygosities (proportion of heterozygotes across SNPs that pass sfsjob filters)
echo "Rscript ~/bin/heterozygosity_beagle.R chr5.beagle.gz >indHets" >bg
ls5_launcher_creator.py -j bg -n bg -a tagmap -e [email protected] -t 12:00:00 -w 1 -q normal
s2_launcher_creator.py -j bg -n bg -a tagmap -e [email protected] -t 12:00:00 -w 1 -q normal
sbatch --dependency=afterok:$sfsjob bg.slurm
# heterozygosity_beagle.R script (by Nathaniel Pope) outputs *_zygosity.RData R data bundle containing AFS (rows) for each individual (columns). The proportion of heterozygotes is the second row. Individual heterozygosities are also printed out to STDOUT (in the case above, saved to text file indHets)

# relatedness with NgsRelate
echo 'export NIND2=`cat bams.nr | wc -l`; export NS=``zcat g3.mafs.gz | wc -l`' >calc3
echo 'source calc3 && zcat g3.mafs.gz | cut -f5 |sed 1d >freq && ngsRelate -g g3.glf.gz -n $NIND -f freq >g3.relatedness' >rel
ls5_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
reljob=$(sbatch rel.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')


Expand All @@ -243,13 +243,13 @@ FILTERS1='-minInd $MI2 -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pv
TODO1='-doMajorMinor 1 -doMaf 1 -doCounts 1 -doPost 1 -doGlf 3'
echo 'cat bams.nr | sort > bams.NR && mv bams.NR bams.nr && export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`' >calc2
echo "source calc2 && angsd -b bams.nr -GL 1 $FILTERS1 $TODO1 -P 12 -out g3">g3
ls5_launcher_creator.py -j g3 -n g3 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j g3 -n g3 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
g3job=$(sbatch g3.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')


# individual inbreeding coeffs
echo 'zcat g3.glf.gz | ngsF --glf - --n_ind 44 --n_sites 909052 --out inbr' >nf
ls5_launcher_creator.py -j nf -n nf -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j nf -n nf -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
ngsFjob=$(sbatch --dependency=afterok:$g3job nf.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')
#ngsFjob=$(sbatch nf.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')

Expand All @@ -258,7 +258,7 @@ zcat g3.mafs.gz | cut -f5 |sed 1d >freq
# relatedness with NgsRelate
echo 'export NIND2=`cat bams.nr | wc -l`; export NS=``zcat g3.mafs.gz | wc -l`' >calc3
echo 'source calc3 && zcat g3.mafs.gz | cut -f5 |sed 1d >freq && ngsRelate -g g3.glf.gz -n $NIND -f freq >g3.relatedness' >rel
ls5_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
s2_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal
reljob=$(sbatch rel.slurm | grep "Submitted batch job" | perl -pe 's/\D//g')


Expand Down
Loading

0 comments on commit f0bfd54

Please sign in to comment.