fixed a bug with clinvar column

naumenko-sa · May 23, 2019 · 6b3ac62 · 6b3ac62
1 parent b23f025
commit 6b3ac62
Show file tree

Hide file tree

Showing 7 changed files with 13 additions and 13 deletions.
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,6 @@
+## 0.0.4 (23 May 2019)
+- fixed a bug in clinvar column
+
 ## 0.0.3 (21 May 2019)
 - bug fixes, WES report generated for NA12878 looks ok
 

diff --git a/bcbio.pbs b/bcbio.pbs
@@ -11,6 +11,8 @@
 # experience is that the best combination for WES and SK HPC is 7cores/50G
 # wgs alignment: -v threads=40 -l mem=150G,vmem=150G, bigmem nodes=512G RAM + 64 cores, 2 nodes - crashes w memory error, 40 threads is too much
 
+# 30G min RAM for STAR
+
 #PBS -l walltime=240:00:00,nodes=1:ppn=7
 #PBS -joe .
 #PBS -d .

diff --git a/cre.bcbio.upgrade.sh b/cre.bcbio.upgrade.sh
@@ -34,7 +34,7 @@ which bcbio_nextgen.py
 ######################################################################
 # 4. Install indices
 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners bwa --cores 10
-bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10
+# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10
 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners hisat2 --cores 10
 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners rtg --cores 10
 #########################################################################
@@ -70,7 +70,7 @@ bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --aligners star --cores 10
 # bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget dbnsfp
 
 # rnaseq
-bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq
+# bcbio_nextgen.py upgrade -u skip --genomes GRCh37 --datatarget rnaseq
 
 ######################################################################
 # fresh installation for Sam with human and mouse genome

diff --git a/cre.gemini2txt.vcf2db.sh b/cre.gemini2txt.vcf2db.sh
@@ -38,7 +38,7 @@ sQuery="select \
  dp as Depth,\
  qual as Quality,\
  gene as Gene,\
- clinvar_sig as Clinvar,\
+ clinvar_pathogenic as Clinvar,\
  ensembl_gene_id as Ensembl_gene_id,\
  transcript as Ensembl_transcript_id,\
  aa_length as AA_position,\

diff --git a/cre.vcf2cre.sh b/cre.vcf2cre.sh
@@ -25,6 +25,8 @@
 ##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
 # gunzip -c 331606_S1.flt.nochr.vcf.gz | grep -v "^#" | grep PASS | sed s/":DPI:"/":DP:"awk -F ':' '{print $0"\tDP="$9}' | awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$11";"$8"\t"$9"\t"$10}' >> 331606.vcf
 
+. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile
+
 bname=`basename $original_vcf .vcf.gz`
 
 echo "###############################################"

diff --git a/cre.vcfanno.conf b/cre.vcfanno.conf
@@ -51,13 +51,6 @@ file="variation/clinvar.vcf.gz"
 fields=["CLNSIG"]
 names=["clinvar_pathogenic"]
 ops=["concat"]
-
-# convert 5 to 'pathogenic', 255 to 'unknown', etc.
-[[postannotation]]
-fields=["clinvar_pathogenic"]
-op="lua:clinvar_sig(clinvar_pathogenic)"
-name="clinvar_sig"
-type="String"
 
 #dbNSFP v3.4
 [[annotation]]

diff --git a/cre.vep.sh b/cre.vep.sh
@@ -20,6 +20,8 @@ then
  threads=5
 fi
 
+. /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/.test_profile
+
 bname=`basename $vcf .vcf.gz`
 
 #find reference
@@ -38,14 +40,12 @@ echo "Threads:" $threads
 # --plugin SpliceRegion --sift b --polyphen b --hgvs --shift_hgvs 1 --merged \
 # | sed '/^#/! s/;;/;/g' | bgzip -c > $bname.vepeffects.vcf.gz
 
-unset PERL5LIB && export PATH=/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin:"$PATH" && \
- /hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/bin/vep --vcf -o stdout \
+unset PERL5LIB && vep --vcf -o stdout \
  -i $vcf --fork $threads --species homo_sapiens --no_stats --cache --offline --dir ${reference}/vep --symbol --numbers --biotype --total_length \
  --canonical --gene_phenotype --ccds --uniprot --domains --regulatory --protein --tsl --appris --af --max_af --af_1kg --af_esp --af_gnomad --pubmed --variant_class \
  --allele_number \
  --fasta ${reference}/seq/GRCh37.fq.gz \
  --plugin LoF,human_ancestor_fa:${reference}/human_ancestor.fa.gz,loftee_path:$vep_reference \
- --plugin G2P,file:/hpf/largeprojects/ccmbio/naumenko/validation/test_bcbio_runs/WES/variation/G2P.csv \
  --plugin MaxEntScan,/hpf/largeprojects/ccmbio/naumenko/tools/bcbio_1.1.5/anaconda/share/maxentscan-0_2004.04.21-1 \
  --plugin SpliceRegion --sift b --polyphen b --hgvsg --hgvs --shift_hgvs 1 --merged \
  | sed '/^#/! s/;;/;/g' | bgzip -c > $bname.vepeffects.vcf.gz