-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
477 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,7 @@ $HOME/edirect/esearch -db sra -query $BioProject | efetch --format runinfo |cut | |
for A in `cat $BioProject.SRR`;do | ||
echo "fastq-dump-orig.2.10.5 $A">>gets; | ||
done | ||
ls5_launcher_creator.py -j gets -n gets -a tagmap -e [email protected] -t 12:00:00 -w 24 -q normal | ||
s2_launcher_creator.py -j gets -n gets -a tagmap -e [email protected] -t 12:00:00 -w 24 -q normal | ||
getsjob=$(sbatch gets.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
Q= -log10(Perror)*10 | ||
|
@@ -120,25 +120,25 @@ export GENOME_REF=all_cc.fasta | |
for file in *.fastq; do | ||
echo "cutadapt --format fastq -q 15,15 -a AGATCGGA -m $TagLen -l $TagLen -o ${file/.fastq/}.trim0 $file > ${file}_trimlog.txt && head -12000000 ${file/.fastq/}.trim0 > ${file/.fastq/}.trim && rm ${file/.fastq/}.trim0" >> trim; | ||
done | ||
ls5_launcher_creator.py -j trim -n trim -a tagmap -e [email protected] -t 1:00:00 -w 48 -q normal | ||
s2_launcher_creator.py -j trim -n trim -a tagmap -e [email protected] -t 1:00:00 -w 48 -q normal | ||
trimjob=$(sbatch trim.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
# converting fastq to fasta (for kmer analysis) | ||
>f2f | ||
for F in `ls *fastq`; do echo "paste - - - - < ${F/.fastq/}.trim | cut -f 1,2 | sed 's/^@/>/' | tr \"\t\" \"\n\" > ${F/.fastq/}.fasta" >>f2f ;done | ||
ls5_launcher_creator.py -j f2f -n f2f -a tagmap -e [email protected] -t 0:05:00 -w 48 -q normal | ||
s2_launcher_creator.py -j f2f -n f2f -a tagmap -e [email protected] -t 0:05:00 -w 48 -q normal | ||
f2fjob=$(sbatch --dependency=afterok:$trimjob f2f.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
# analyzing kmers, removing singletons from each file (kmerer.pl) | ||
>jelly | ||
for F in `ls *fastq`; do echo "jellyfish count -m $TagLen -s 100M -t 1 -C ${F/.fastq/}.fasta -o ${F/.fastq/}.jf && jellyfish dump ${F/.fastq/}.jf | kmerer.pl - 2 > ${F/.fastq/}.kmers.fa">>jelly;done | ||
ls5_launcher_creator.py -j jelly -n jelly -a tagmap -e [email protected] -t 0:10:00 -w 24 -q normal | ||
s2_launcher_creator.py -j jelly -n jelly -a tagmap -e [email protected] -t 0:10:00 -w 24 -q normal | ||
jellyjob=$(sbatch --dependency=afterok:$f2fjob jelly.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
# merging kmer lists and filtering kmers aiming to get major alleles | ||
# clustering, concatenating into fake genome (10 chromosomes), and indexing it | ||
echo "ls *kmers.fa > all.kf && mergeKmers.pl all.kf minDP=10 minInd=5 > kmers.tab && cat kmers.tab | shuf | awk '{print \">\"\$1\"\n\"\$2}' > all.fasta && cd-hit-est -i all.fasta -o all.clust -aL 1 -aS 1 -g 1 -c $MatchFrac -M 0 -T 0 && concatFasta.pl fasta=all.clust num=10 && bowtie2-build all_cc.fasta all_cc.fasta && samtools faidx all_cc.fasta" >mk | ||
ls5_launcher_creator.py -j mk -n mk -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j mk -n mk -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
mergejob=$(sbatch --dependency=afterok:$jellyjob mk.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
#mergejob=$(sbatch mk.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
@@ -148,7 +148,7 @@ for F in `ls *.fastq`; do | |
REF=all_cc.fasta | ||
echo "bowtie2 --no-unal -x $REF -U ${F/.fastq/}.trim -S ${F/.fastq/}.sam && samtools sort -O bam -o ${F/.fastq/}.bam ${F/.fastq/}.sam && samtools index ${F/.fastq/}.bam">>maps | ||
done | ||
ls5_launcher_creator.py -j maps -n maps -a tagmap -e [email protected] -t 2:00:00 -w 24 -q normal | ||
s2_launcher_creator.py -j maps -n maps -a tagmap -e [email protected] -t 2:00:00 -w 24 -q normal | ||
mapsjob=$(sbatch --dependency=afterok:$mergejob maps.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
# quality assessment, removing bams with log(coverage)<3SD | ||
|
@@ -158,7 +158,7 @@ FILTERSQ='-uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minInd $MI' | |
TODOQ="-doQsDist 1 -doDepth 1 -doCounts 1 -dumpCounts 2" | ||
echo 'export NIND=`cat bams | wc -l`; export MI=`echo "($NIND*$MinIndPerc+0.5)/1" | bc`' >calc1 | ||
echo "ls *.bam > bams && source calc1 && angsd -b bams -r chr1:1-1000000 -GL 1 $FILTERSQ $TODOQ -P 12 -out dd && Rscript ~/bin/plotQC.R prefix=dd">a0 | ||
ls5_launcher_creator.py -j a0 -n a0 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j a0 -n a0 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
qjob=$(sbatch --dependency=afterok:$mapsjob a0.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
#------------ examine dd.pdf, decide on MinIndPerc and whether bams.qc is reasonable | ||
|
@@ -173,7 +173,7 @@ FILTERS0='-minInd $MI -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pva | |
TODO0='-doMajorMinor 1 -doMaf 1 -doCounts 1 -makeMatrix 1 -doIBS 1 -doCov 1 -doPost 1 -doGlf 2' | ||
echo 'export NIND=`cat bams.qc | wc -l`; export MI=`echo "($NIND*$MinIndPerc+0.5)/1" | bc`' >calc1 | ||
echo "source calc1 && angsd -b bams.qc -GL 1 $FILTERS0 $TODO0 -P 12 -out myresult && Rscript ~/bin/detect_clones.R bams.qc myresult.ibsMat 0.15">a1 | ||
ls5_launcher_creator.py -j a1 -n a1 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j a1 -n a1 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
# a1job=$(sbatch --dependency=afterok:$qjob a1.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
a1job=$(sbatch a1.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
@@ -187,7 +187,7 @@ FILTERS1='-minInd $MI2 -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pv | |
TODO1='-doMajorMinor 1 -doMaf 1 -doCounts 1 -makeMatrix 1 -doIBS 1 -doCov 1 -doPost 1 -doGlf 2' | ||
echo 'cat bams.nr | sort > bams.NR && mv bams.NR bams.nr && export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`; export MAF=`echo "3/(2*$NIND2)" | bc -l`' >calc2 | ||
echo "source calc2 && angsd -b bams.nr -GL 1 $FILTERS1 $TODO1 -P 12 -out myresult2 && Rscript ~/bin/pcaStructure.R myresult2.ibsMat">a2 | ||
ls5_launcher_creator.py -j a2 -n a2 -a tagmap -e [email protected] -t 2:00:00 -w 1 | ||
s2_launcher_creator.py -j a2 -n a2 -a tagmap -e [email protected] -t 2:00:00 -w 1 | ||
-q normal | ||
a2job=$(sbatch --dependency=afterok:$a1job a2.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
#a2job=$(sbatch a2.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
@@ -196,7 +196,7 @@ a2job=$(sbatch --dependency=afterok:$a1job a2.slurm | grep "Submitted batch job" | |
|
||
module load python2 | ||
echo 'python ~/pcangsd/pcangsd.py -beagle myresult2.beagle.gz -admix -o pcangsd -inbreed 2 -kinship -selection -threads 12' >adm | ||
ls5_launcher_creator.py -j adm -n adm -a tagmap -e [email protected] -t 0:10:00 -w 1 -q normal | ||
s2_launcher_creator.py -j adm -n adm -a tagmap -e [email protected] -t 0:10:00 -w 1 -q normal | ||
admjob=$(sbatch --dependency=afterok:$a2job adm.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
# ------- genetic diversity stats based on all well-genotyped sites (including invariants) | ||
|
@@ -209,21 +209,21 @@ FILTERS='-minInd $MI2 -uniqueOnly 1 -skipTriallelic 1 -minMapQ 20 -minQ 20 -doHW | |
TODO="-doSaf 1 -anc $REF -ref $GENOME_REF -doMajorMinor 1 -doMaf 1 -dosnpstat 1 -doPost 1 -doGlf 2" | ||
echo 'export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`' >calc2 | ||
echo "source calc2 && angsd -b bams.nr -r chr5 -GL 1 -P 12 $FILTERS $TODO -out chr5 && realSFS chr5.saf.idx -P 12 -fold 1 > chr5.sfs && realSFS saf2theta chr5.saf.idx -outname chr5 -sfs chr5.sfs -fold 1 && thetaStat do_stat chr5.thetas.idx -outnames chr5 && grep \"chr\" chr5.pestPG | awk '{ print \$4/\$14}' >piPerChrom">sfsj | ||
ls5_launcher_creator.py -j sfsj -n sfsj -t 2:00:00 -e [email protected] -w 1 -a tagmap -q normal | ||
s2_launcher_creator.py -j sfsj -n sfsj -t 2:00:00 -e [email protected] -w 1 -a tagmap -q normal | ||
sfsjob=$(sbatch --dependency=afterok:$a1job sfsj.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
#sfsjob=$(sbatch sfsj.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
||
# individual heterozygosities (proportion of heterozygotes across SNPs that pass sfsjob filters) | ||
echo "Rscript ~/bin/heterozygosity_beagle.R chr5.beagle.gz >indHets" >bg | ||
ls5_launcher_creator.py -j bg -n bg -a tagmap -e [email protected] -t 12:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j bg -n bg -a tagmap -e [email protected] -t 12:00:00 -w 1 -q normal | ||
sbatch --dependency=afterok:$sfsjob bg.slurm | ||
# heterozygosity_beagle.R script (by Nathaniel Pope) outputs *_zygosity.RData R data bundle containing AFS (rows) for each individual (columns). The proportion of heterozygotes is the second row. Individual heterozygosities are also printed out to STDOUT (in the case above, saved to text file indHets) | ||
|
||
# relatedness with NgsRelate | ||
echo 'export NIND2=`cat bams.nr | wc -l`; export NS=``zcat g3.mafs.gz | wc -l`' >calc3 | ||
echo 'source calc3 && zcat g3.mafs.gz | cut -f5 |sed 1d >freq && ngsRelate -g g3.glf.gz -n $NIND -f freq >g3.relatedness' >rel | ||
ls5_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
reljob=$(sbatch rel.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
||
|
@@ -243,13 +243,13 @@ FILTERS1='-minInd $MI2 -uniqueOnly 1 -remove_bads 1 -minMapQ 20 -minQ 20 -snp_pv | |
TODO1='-doMajorMinor 1 -doMaf 1 -doCounts 1 -doPost 1 -doGlf 3' | ||
echo 'cat bams.nr | sort > bams.NR && mv bams.NR bams.nr && export NIND2=`cat bams.nr | wc -l`; export MI2=`echo "($NIND2*$MinIndPerc+0.5)/1" | bc`' >calc2 | ||
echo "source calc2 && angsd -b bams.nr -GL 1 $FILTERS1 $TODO1 -P 12 -out g3">g3 | ||
ls5_launcher_creator.py -j g3 -n g3 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j g3 -n g3 -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
g3job=$(sbatch g3.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
||
# individual inbreeding coeffs | ||
echo 'zcat g3.glf.gz | ngsF --glf - --n_ind 44 --n_sites 909052 --out inbr' >nf | ||
ls5_launcher_creator.py -j nf -n nf -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j nf -n nf -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
ngsFjob=$(sbatch --dependency=afterok:$g3job nf.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
#ngsFjob=$(sbatch nf.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
@@ -258,7 +258,7 @@ zcat g3.mafs.gz | cut -f5 |sed 1d >freq | |
# relatedness with NgsRelate | ||
echo 'export NIND2=`cat bams.nr | wc -l`; export NS=``zcat g3.mafs.gz | wc -l`' >calc3 | ||
echo 'source calc3 && zcat g3.mafs.gz | cut -f5 |sed 1d >freq && ngsRelate -g g3.glf.gz -n $NIND -f freq >g3.relatedness' >rel | ||
ls5_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
s2_launcher_creator.py -j rel -n rel -a tagmap -e [email protected] -t 2:00:00 -w 1 -q normal | ||
reljob=$(sbatch rel.slurm | grep "Submitted batch job" | perl -pe 's/\D//g') | ||
|
||
|
||
|
Oops, something went wrong.