for i in *R1_001.fastq.gz;do echo -e "${i/_S*}\t`pwd`/$i\t`pwd`/${i/R1/R2}" >> samples.tab;done
for i in *fasta;do echo -e "${i/.fasta*}\tGENOME\t`pwd`/$i" >> samples.tab;done
conda deactivate
PERL5LIB="";
conda activate nullarbor-new
rm ~/miniconda3/envs/nullarbor-new/db/vfdb/sequences*
cp ~/miniconda3/envs/nullarbor-new/db/vfdb/vfdb_all/sequences* ~/miniconda3/envs/nullarbor-new/db/vfdb
PERL5LIB="";nullarbor.pl --cpus 50 --run --trim --mlst hparasuis --treebuilder iqtree_slow --taxoner centrifuge --name Hparasuis_analysis --ref ref.fasta --input samples.tab --outdir results
blastn -task blastn-short -query Hparasuis_sero_primers.fasta -db HPS.all_merged.fasta -out HPS.all_merged.blastn -outfmt "6 qseqid qlen qcovs stitle slen pident length mismatch gapopen qstart qend sstart send evalue bitscore" -max_target_seqs 5 -num_threads 50
sort -k3,3nr -k15,15nr HPS.all_merged.blastn | awk -F"\t" '!seen[$1]++' | less
alternate method
conda deactivate
PERL5LIB="";
conda activate GparasuisSero-env
for i in *fasta;do HpsuisSero.sh -i $i -x fasta -o . -s ${i/.fasta};done
blastn -task blastn-short -query vtaA_profile.fasta -db scaffolds.fasta -out vtaA_profile.blastn -outfmt "6 qseqid qlen qcovs stitle slen pident length mismatch gapopen qstart qend sstart send evalue bitscore" -max_target_seqs 5 -num_threads 50
sort -k3,3nr -k15,15nr vtaA_profile.blastn | awk -F"\t" '!seen[$1]++' | less
Or with complete vtaA sequences
blastn -task blastn-short -query vtaA-complete.fasta -db D22-004701-2-repeat.fasta -out HPS.all_merged.blastn -outfmt "6 qseqid qlen qcovs stitle slen pident length mismatch gapopen qstart qend sstart send evalue bitscore" -max_target_seqs 5 -num_threads 50
for i in *R1*fastq;do tanoti -P 50 -p 1 -r vtaA-complete.fasta -i $i ${i/R1/R2} -o ${i/_S*/.sam};done
for i in *sam;do SAM2CONSENSUS -i $i -o ${i/sam/fasta};done
for i in *R1_001.fastq.gz;do echo -e "${i/_S*}\t`pwd`/$i\t`pwd`/${i/R1/R2}" >> samples_"$( date +"%Y-%m-%d" )".tab;done
nullarbor.pl --cpus 50 --run --mlst ssuis --treebuilder iqtree_slow --taxoner centrifuge --name Ssuis_analysis-"$( date +"%Y-%m-%d" )" --ref Ssuis_ref.fa --input samples_"$( date +"%Y-%m-%d" )".tab --outdir Ssuis_analysis-"$( date +"%Y-%m-%d" )" --assembler spades
conda deactivate
PERL5LIB="";
conda activate ssuissero-env
for i in *.fasta;do SsuisSero.sh -s ${i/.fasta} -i $i -o . -x fasta;done
cat *tsv | grep -v Sample | less
or
git clone https://github.com/streplab/SsuisSerotyping_pipeline.git
conda activate srst2-env
for i in *L001_R1_001.fastq;do mv $i ${i/L001_R1_001.fastq/R1_001.fastq};done
for i in *L001_R2_001.fastq;do mv $i ${i/L001_R2_001.fastq/R2_001.fastq};done
./Ssuis_serotypingPipeline.pl --fastq_directory `pwd`/data --forward _R1_001 --reverse _R2_001 --ends pe
# column.names
wze-cps2C
cps2E
cps2F
epf
mrp
sly
pan-geneC
pan-geneD
pan-geneE
pan-geneH
abricate --summary *tab > summary.tab
vk_abricate_to_phandango.py -i summary.tab -o summary -c 70
cut -d"," -f1,$(head -n1 summary.csv | tr '\,' '\n' | grep -w -nf column.names | cut -f1 -d: | paste -sd,) summary.csv > virulence-genes.csv
conda deactivate
PERL5LIB="";
conda activate tormes-1.3.0
echo -e "Samples\tRead1\tRead2" >> samples_"$( date +"%Y-%m-%d" )".tab
for i in *R1_001.fastq.gz;do echo -e "${i/_S*}\t`pwd`/$i\t`pwd`/${i/R1/R2}" >> samples_"$( date +"%Y-%m-%d" )".tab;done
tormes --metadata samples_"$( date +"%Y-%m-%d" )".tab --output ecoli_TORMES-"$( date +"%Y-%m-%d" )" --custom_genes_db ecoli_virulence --threads 32 --min_len 36 --genera Escherichia
# Adhesin genes
Z2206
agn43
c3610
cah
cfaB
eaeH
eaeX
ecpD
ehaA
fim41
fimF
fimG
paa
papG
tia
tlra
upaG/ehaG
# Toxins
aec30
astA
eltA
eltB
estIa
astA
stb
stlv-iivb
stx2eB
vat
# Hemolysins
hlyA
hlyB
hlyC
hlyD
hlyE
# fed-genes
fedA
fedC
fedD
fedE
fedF
abricate --summary *tab > summary.tab
vk_abricate_to_phandango.py -i summary.tab -o summary -c 70
cut -d"," -f1,$(head -n1 summary.csv | tr '\,' '\n' | grep -w -nf column.names | cut -f1 -d: | paste -sd,) summary.csv > virulence-genes.csv
tormes --metadata samples.tab --output Salmonella_TORMES_2021 --threads 32 --min_len 36 --genera Salmonella
conda activate bactopia-new
for i in fastq/*fastq.gz;do mv $i ${i/_001};done
bactopia prepare fastq/ > fastqs.txt
bactopia --samples fastqs.txt -profile docker --datasets ./datasets --species "Staphylococcus aureus" --outdir Staphylococcus-aureus-out
bactopia --wf staphtyper --bactopia Staphylococcus-aureus-out -profile docker
bactopia --wf pangenome --bactopia Staphylococcus-aureus-out -profile docker
bactopia --wf mlst --bactopia Staphylococcus-aureus-out -profile docker
bactopia --wf abricate --bactopia Staphylococcus-aureus-out -profile docker
conda activate assembly
for i in *fasta;do echo -e "`pwd`/$i" >> query_list;done
for i in *fasta;do echo -e "`pwd`/$i" >> reference_list;done
fastANI --ql query_list --rl reference_list --matrix -o fastani-out
sed -i "s/\/.*\///g" fastani-out.matrix
for i in A6_S5_L001_R1_001.fastq.gz; do kraken2 --db /home/vsingh/softwares/minikrake2_db/minikraken_8GB_20200312/ $i --threads 30 --output `basename ${i%_R1_001.fastq.gz}_kraken`;done
git clone https://github.com/jenniferlu717/KrakenTools
for i in *fastq.gz;do python extract_kraken_reads.py -k ${i/L001*/L001_kraken} -s $i -t 544448 --include-children --report ${i/L001*/L001_R1_001_kraken2.report} -o ${i/R1/T544448_R1};done
for i in *gz;do spades.py --only-assembler -t 50 -m 500 -k 21,33,55,77,99,121 -s $i -o ${i/_R1*/.ASM.out};done
conda deactivate
PERL5LIB="";
conda activate tormes-1.3.0
echo -e "Samples\tRead1\tRead2" >> samples_"$( date +"%Y-%m-%d" )".tab
for i in *fasta;do echo -e "${i/.fasta*}\tGENOME\t`pwd`/$i" >> samples.tab;done
tormes --metadata samples_"$( date +"%Y-%m-%d" )".tab --output myco_TORMES-"$( date +"%Y-%m-%d" )" --threads 32
grabseqs sra ERR1777403
##convert vcf to simple 3 colums file, nano header SNP, Chr, Pos
more input.vcf | grep -v "#" | cut -f1,2,3 | awk '{ $3=$2; print $0}' | awk '{ $2=$1; print $0}' | awk '{ $1=$2__$3; print $0}' > input.txt
library(CMplot)
PBR <-read.table("input.txt", header = TRUE)
CMplot(PBR,plot.type="d",bin.size=1e6,col=c("darkgreen", "yellow", "red"),file="jpg",memo="",dpi=300)
snps<-read.table("SNPs.vcf",sep="\t",header=F,blank.lines.skip=TRUE,
comment.char = "#")
colnames(snps)<-c("chr","start","id","refallele","altallele","qual",
"filter","info","format")
summary(snps)
goodChrOrder <- c(paste("Supercontig_1.",c(1:8),sep=""),"MT_CBS_6936")
snps$chr <- factor(snps$chr,levels=goodChrOrder)
library(ggplot2)
snpDensity<-ggplot(snps) +
geom_histogram(aes(x=start),binwidth=1e4) + # pick a binwidth that is not too small
facet_wrap(~ chr,ncol=2) + # seperate plots for each chr, x-scales can differ from chr to chr
ggtitle("Density of SNPs") +
xlab("Position in the genome") +
ylab("SNP density") +
theme_bw() # I prefer the black and white theme
pdf("snp_density.pdf",800,1000)
print(snpDensity)
#dev.off()