Skip to content

Commit

Permalink
Merge pull request #58 from broadinstitute/dpark-dev
Browse files Browse the repository at this point in the history
tweak assembly refinement parameters and add iSNV notes
  • Loading branch information
dpark01 committed Jan 4, 2015
2 parents 3500bdb + d7ee8ef commit b07f157
Show file tree
Hide file tree
Showing 13 changed files with 5,862 additions and 7 deletions.
92 changes: 92 additions & 0 deletions old-scripts/iSNV/example_files/patients_all.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
patient
EBOV_2014_EM096
EBOV_2014_EM104
EBOV_2014_EM106
EBOV_2014_EM110
EBOV_2014_EM111
EBOV_2014_EM112
EBOV_2014_EM113
EBOV_2014_EM115
EBOV_2014_EM119
EBOV_2014_EM120
EBOV_2014_EM121
EBOV_2014_EM124.1
EBOV_2014_EM124.2
EBOV_2014_EM124.3
EBOV_2014_G3670
EBOV_2014_G3676.1
EBOV_2014_G3676.2
EBOV_2014_G3677.1
EBOV_2014_G3677.2
EBOV_2014_G3679
EBOV_2014_G3680
EBOV_2014_G3682
EBOV_2014_G3683
EBOV_2014_G3686
EBOV_2014_G3687
EBOV_2014_G3707
EBOV_2014_G3713.2
EBOV_2014_G3713.3
EBOV_2014_G3713.4
EBOV_2014_G3724
EBOV_2014_G3729
EBOV_2014_G3734.1
EBOV_2014_G3735.1
EBOV_2014_G3735.2
EBOV_2014_G3750.1
EBOV_2014_G3750.2
EBOV_2014_G3750.3
EBOV_2014_G3752
EBOV_2014_G3758
EBOV_2014_G3764
EBOV_2014_G3769.1
EBOV_2014_G3769.2
EBOV_2014_G3769.3
EBOV_2014_G3769.4
EBOV_2014_G3770.1
EBOV_2014_G3770.2
EBOV_2014_G3771
EBOV_2014_G3782
EBOV_2014_G3786
EBOV_2014_G3787
EBOV_2014_G3788
EBOV_2014_G3789.1
EBOV_2014_G3795
EBOV_2014_G3796
EBOV_2014_G3798
EBOV_2014_G3799
EBOV_2014_G3800
EBOV_2014_G3807
EBOV_2014_G3808
EBOV_2014_G3809
EBOV_2014_G3810.1
EBOV_2014_G3810.2
EBOV_2014_G3814
EBOV_2014_G3816
EBOV_2014_G3817
EBOV_2014_G3818
EBOV_2014_G3819
EBOV_2014_G3820
EBOV_2014_G3821
EBOV_2014_G3822
EBOV_2014_G3823
EBOV_2014_G3825.1
EBOV_2014_G3825.2
EBOV_2014_G3826
EBOV_2014_G3827
EBOV_2014_G3829
EBOV_2014_G3831
EBOV_2014_G3834
EBOV_2014_G3838
EBOV_2014_G3840
EBOV_2014_G3841
EBOV_2014_G3845
EBOV_2014_G3846
EBOV_2014_G3848
EBOV_2014_G3850
EBOV_2014_G3851
EBOV_2014_G3856.1
EBOV_2014_G3856.3
EBOV_2014_G3857
EBOV_2014_NM042.1
EBOV_2014_NM042.3
1,609 changes: 1,609 additions & 0 deletions old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT1.txt

Large diffs are not rendered by default.

1,012 changes: 1,012 additions & 0 deletions old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT1_freq.txt

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1,617 changes: 1,617 additions & 0 deletions old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT2.txt

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions old-scripts/iSNV/iSNV-pipeline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
Instructions for calling and validating iSNVs:

1. Calling of iSNVs using VPHASER V2

# PREREQUISITES
bash
export LD_LIBRARY_PATH=/seq/viral/analysis/xyang/programs/Library/pezmaster31-bamtools-e235c55/lib:$LD_LIBRARY_PATH

# CALCULATE SNPS WITH VPHASER V2
for sample in EM096_r1 EM104_r1 EM106_r1 EM110_r1 EM111_r1 EM112_r1 EM113_r1 EM115_r1 EM119_r1 EM120_r1 EM121_r1 EM124.1_r1 EM124.2_r1 EM124.3_r1 EM124.4_r1 G3670.1_r1 G3676.1_r1 G3676.2_r1 G3677.1_r1 G3677.2_r1 G3679.1_r1 G3680.1_r1 G3682.1_r1 G3683.1_r1 G3686.1_r1 G3687.1_r1 G3707_r1 G3713.2_r1 G3713.3_r1 G3713.4_r1 G3724_r1 G3729_r1 G3734.1_r1 G3735.1_r1 G3735.2_r1 G3750.1_r1 G3750.2_r1 G3750.3_r1 G3752_r1 G3758_r1 G3764_r1 G3765.2_r1 G3769.1_r1 G3769.2_r1 G3769.3_r1 G3769.4_r1 G3770.1_r1 G3770.2_r1 G3771_r1 G3782_r1 G3786_r1 G3787_r1 G3788_r1 G3789.1_r1 G3795_r1 G3796_r1 G3798_r1 G3799_r1 G3800_r1 G3805.1_r1 G3805.2_r1 G3807_r1 G3808_r1 G3809_r1 G3810.1_r1 G3810.2_r1 G3814_r1 G3816_r1 G3817_r1 G3818_r1 G3819_r1 G3820_r1 G3821_r1 G3822_r1 G3823_r1 G3825.1_r1 G3825.2_r1 G3826_r1 G3827_r1 G3829_r1 G3831_r1 G3834_r1 G3838_r1 G3840_r1 G3841_r1 G3845_r1 G3846_r1 G3848_r1 G3850_r1 G3851_r1 G3856.1_r1 G3856.3_r1 G3857_r1 NM042.1_r1 NM042.2_r1 NM042.3_r1
do
for directory in /idi/sabeti-scratch/kandersen/analysis/140726_ebola3
do
for in_directory in $directory/_bams
do
bsub -n 4 -R "span[hosts=1]" -q week -R "rusage[mem=8]" -o $directory/_logs/$sample.log.bsub.txt -P sabeti_align -J $sample.vp1 "mkdir $directory/_pileup/vp/$sample && /gsap/garage-viral/viral/analysis/xyang/programs/VariantCaller/bin/variant_caller -i $in_directory/$sample.realigned.bam -ig 6 -o $directory/_pileup/vp/$sample"
done
done
done

#output files from this step at: /idi/sabeti-data/kandersen/analysis/140726_ebola3/_pileup/vp
#example output file: “EBOV-EM096-SL-2014.var.raw.txt”


2. Remap iSNV calls to consistent reference coordinates

#remap all variant calls to same library
for i in *.var.raw.txt; do j=${i%%-SL-2014*.var.raw.txt}; k=${j##EBOV-}; echo $k; /seq/viral/analysis/xyang/scripts/align2ref.pl -ref /idi/sabeti-data/rsealfon/viruses/ebolavirus/new_seqs/EBOV_SL_Guinea.fasta.aln.firstseq -seq /idi/sabeti-scratch/kandersen/analysis/140726_ebola3/_refs/$k"_r1.fasta" -oprefix $i.mapped -vc $i; done

#complete directory of remap script: /seq/viral/analysis/xyang/scripts/align2ref.pl


3. Filter iSNV calls

#filter
for i in *var.raw.txt.mapped.ref.vc.txt ;do perl ../vphaser2_calls/filter_read_bias.pl $i > $i.filtered; echo $i; done

#the filtering parameters are hard-coded
#complete directory of filtering script: /idi/sabeti-data/rsealfon/viruses/ebolavirus/iSNV_combined/vphaser2_calls/filter_read_bias.pl
#script also attached


4. Remap the file names

#remap names
for i in *.filtered; do j=${i%%-SL-2014.var.raw.txt.mapped.ref.vc.txt.filtered}; k=${j##EBOV-}; mv $i "EBOV_2014_"$k.var.raw.txt.mapped.ref.vc.txt.filtered; done


5. Combine all of the different samples sequenced in the same library prep

#navigate to folder containing all the .var.raw.txt.mapped.ref.vc.txt.filtered files from step 4
#run attached python script: concat_samples_NXT1.py

#repeat for validation set, using concat_samples_NXT2.py (this is the same file as _NXT1.py but saves results with a different name

# ‘patients_all.txt’ is just a file listing all the patient names in that sequencing batch

#output files (2) from this step: “vphaser_iSNVs_combined_NXT1.txt” and “vphaser_iSNVs_combined_NXT2.txt”



6. Apply frequency filter (code not included)

#apply only to original set (NXT1), not validation set (NXT2)
#remove all rows from “vphaser_iSNVs_combined_NXT1.txt” where the value in the “freq” column is <0.5

#output file from this step: “vphaser_iSNVs_combined_NXT1_freq.txt”


7. Validate calls from NXT1 with validation set (NXT2)

#run attached python script: nextera_validation
#argument 1 = “vphaser_iSNVs_combined_NXT1_freq.txt”
#argument 2 = “vphaser_iSNVs_combined_NXT2.txt”

#output file from this step: “vphaser_iSNVs_combined_NXT1_freq_validated.txt”
#final list of iSNV calls
Loading

0 comments on commit b07f157

Please sign in to comment.