-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #58 from broadinstitute/dpark-dev
tweak assembly refinement parameters and add iSNV notes
- Loading branch information
Showing
13 changed files
with
5,862 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
patient | ||
EBOV_2014_EM096 | ||
EBOV_2014_EM104 | ||
EBOV_2014_EM106 | ||
EBOV_2014_EM110 | ||
EBOV_2014_EM111 | ||
EBOV_2014_EM112 | ||
EBOV_2014_EM113 | ||
EBOV_2014_EM115 | ||
EBOV_2014_EM119 | ||
EBOV_2014_EM120 | ||
EBOV_2014_EM121 | ||
EBOV_2014_EM124.1 | ||
EBOV_2014_EM124.2 | ||
EBOV_2014_EM124.3 | ||
EBOV_2014_G3670 | ||
EBOV_2014_G3676.1 | ||
EBOV_2014_G3676.2 | ||
EBOV_2014_G3677.1 | ||
EBOV_2014_G3677.2 | ||
EBOV_2014_G3679 | ||
EBOV_2014_G3680 | ||
EBOV_2014_G3682 | ||
EBOV_2014_G3683 | ||
EBOV_2014_G3686 | ||
EBOV_2014_G3687 | ||
EBOV_2014_G3707 | ||
EBOV_2014_G3713.2 | ||
EBOV_2014_G3713.3 | ||
EBOV_2014_G3713.4 | ||
EBOV_2014_G3724 | ||
EBOV_2014_G3729 | ||
EBOV_2014_G3734.1 | ||
EBOV_2014_G3735.1 | ||
EBOV_2014_G3735.2 | ||
EBOV_2014_G3750.1 | ||
EBOV_2014_G3750.2 | ||
EBOV_2014_G3750.3 | ||
EBOV_2014_G3752 | ||
EBOV_2014_G3758 | ||
EBOV_2014_G3764 | ||
EBOV_2014_G3769.1 | ||
EBOV_2014_G3769.2 | ||
EBOV_2014_G3769.3 | ||
EBOV_2014_G3769.4 | ||
EBOV_2014_G3770.1 | ||
EBOV_2014_G3770.2 | ||
EBOV_2014_G3771 | ||
EBOV_2014_G3782 | ||
EBOV_2014_G3786 | ||
EBOV_2014_G3787 | ||
EBOV_2014_G3788 | ||
EBOV_2014_G3789.1 | ||
EBOV_2014_G3795 | ||
EBOV_2014_G3796 | ||
EBOV_2014_G3798 | ||
EBOV_2014_G3799 | ||
EBOV_2014_G3800 | ||
EBOV_2014_G3807 | ||
EBOV_2014_G3808 | ||
EBOV_2014_G3809 | ||
EBOV_2014_G3810.1 | ||
EBOV_2014_G3810.2 | ||
EBOV_2014_G3814 | ||
EBOV_2014_G3816 | ||
EBOV_2014_G3817 | ||
EBOV_2014_G3818 | ||
EBOV_2014_G3819 | ||
EBOV_2014_G3820 | ||
EBOV_2014_G3821 | ||
EBOV_2014_G3822 | ||
EBOV_2014_G3823 | ||
EBOV_2014_G3825.1 | ||
EBOV_2014_G3825.2 | ||
EBOV_2014_G3826 | ||
EBOV_2014_G3827 | ||
EBOV_2014_G3829 | ||
EBOV_2014_G3831 | ||
EBOV_2014_G3834 | ||
EBOV_2014_G3838 | ||
EBOV_2014_G3840 | ||
EBOV_2014_G3841 | ||
EBOV_2014_G3845 | ||
EBOV_2014_G3846 | ||
EBOV_2014_G3848 | ||
EBOV_2014_G3850 | ||
EBOV_2014_G3851 | ||
EBOV_2014_G3856.1 | ||
EBOV_2014_G3856.3 | ||
EBOV_2014_G3857 | ||
EBOV_2014_NM042.1 | ||
EBOV_2014_NM042.3 |
1,609 changes: 1,609 additions & 0 deletions
1,609
old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT1.txt
Large diffs are not rendered by default.
Oops, something went wrong.
1,012 changes: 1,012 additions & 0 deletions
1,012
old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT1_freq.txt
Large diffs are not rendered by default.
Oops, something went wrong.
951 changes: 951 additions & 0 deletions
951
old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT1_freq_validated.txt
Large diffs are not rendered by default.
Oops, something went wrong.
1,617 changes: 1,617 additions & 0 deletions
1,617
old-scripts/iSNV/example_files/vphaser_iSNVs_combined_NXT2.txt
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
Instructions for calling and validating iSNVs: | ||
|
||
1. Calling of iSNVs using VPHASER V2 | ||
|
||
# PREREQUISITES | ||
bash | ||
export LD_LIBRARY_PATH=/seq/viral/analysis/xyang/programs/Library/pezmaster31-bamtools-e235c55/lib:$LD_LIBRARY_PATH | ||
|
||
# CALCULATE SNPS WITH VPHASER V2 | ||
for sample in EM096_r1 EM104_r1 EM106_r1 EM110_r1 EM111_r1 EM112_r1 EM113_r1 EM115_r1 EM119_r1 EM120_r1 EM121_r1 EM124.1_r1 EM124.2_r1 EM124.3_r1 EM124.4_r1 G3670.1_r1 G3676.1_r1 G3676.2_r1 G3677.1_r1 G3677.2_r1 G3679.1_r1 G3680.1_r1 G3682.1_r1 G3683.1_r1 G3686.1_r1 G3687.1_r1 G3707_r1 G3713.2_r1 G3713.3_r1 G3713.4_r1 G3724_r1 G3729_r1 G3734.1_r1 G3735.1_r1 G3735.2_r1 G3750.1_r1 G3750.2_r1 G3750.3_r1 G3752_r1 G3758_r1 G3764_r1 G3765.2_r1 G3769.1_r1 G3769.2_r1 G3769.3_r1 G3769.4_r1 G3770.1_r1 G3770.2_r1 G3771_r1 G3782_r1 G3786_r1 G3787_r1 G3788_r1 G3789.1_r1 G3795_r1 G3796_r1 G3798_r1 G3799_r1 G3800_r1 G3805.1_r1 G3805.2_r1 G3807_r1 G3808_r1 G3809_r1 G3810.1_r1 G3810.2_r1 G3814_r1 G3816_r1 G3817_r1 G3818_r1 G3819_r1 G3820_r1 G3821_r1 G3822_r1 G3823_r1 G3825.1_r1 G3825.2_r1 G3826_r1 G3827_r1 G3829_r1 G3831_r1 G3834_r1 G3838_r1 G3840_r1 G3841_r1 G3845_r1 G3846_r1 G3848_r1 G3850_r1 G3851_r1 G3856.1_r1 G3856.3_r1 G3857_r1 NM042.1_r1 NM042.2_r1 NM042.3_r1 | ||
do | ||
for directory in /idi/sabeti-scratch/kandersen/analysis/140726_ebola3 | ||
do | ||
for in_directory in $directory/_bams | ||
do | ||
bsub -n 4 -R "span[hosts=1]" -q week -R "rusage[mem=8]" -o $directory/_logs/$sample.log.bsub.txt -P sabeti_align -J $sample.vp1 "mkdir $directory/_pileup/vp/$sample && /gsap/garage-viral/viral/analysis/xyang/programs/VariantCaller/bin/variant_caller -i $in_directory/$sample.realigned.bam -ig 6 -o $directory/_pileup/vp/$sample" | ||
done | ||
done | ||
done | ||
|
||
#output files from this step at: /idi/sabeti-data/kandersen/analysis/140726_ebola3/_pileup/vp | ||
#example output file: “EBOV-EM096-SL-2014.var.raw.txt” | ||
|
||
|
||
2. Remap iSNV calls to consistent reference coordinates | ||
|
||
#remap all variant calls to same library | ||
for i in *.var.raw.txt; do j=${i%%-SL-2014*.var.raw.txt}; k=${j##EBOV-}; echo $k; /seq/viral/analysis/xyang/scripts/align2ref.pl -ref /idi/sabeti-data/rsealfon/viruses/ebolavirus/new_seqs/EBOV_SL_Guinea.fasta.aln.firstseq -seq /idi/sabeti-scratch/kandersen/analysis/140726_ebola3/_refs/$k"_r1.fasta" -oprefix $i.mapped -vc $i; done | ||
|
||
#complete directory of remap script: /seq/viral/analysis/xyang/scripts/align2ref.pl | ||
|
||
|
||
3. Filter iSNV calls | ||
|
||
#filter | ||
for i in *var.raw.txt.mapped.ref.vc.txt ;do perl ../vphaser2_calls/filter_read_bias.pl $i > $i.filtered; echo $i; done | ||
|
||
#the filtering parameters are hard-coded | ||
#complete directory of filtering script: /idi/sabeti-data/rsealfon/viruses/ebolavirus/iSNV_combined/vphaser2_calls/filter_read_bias.pl | ||
#script also attached | ||
|
||
|
||
4. Remap the file names | ||
|
||
#remap names | ||
for i in *.filtered; do j=${i%%-SL-2014.var.raw.txt.mapped.ref.vc.txt.filtered}; k=${j##EBOV-}; mv $i "EBOV_2014_"$k.var.raw.txt.mapped.ref.vc.txt.filtered; done | ||
|
||
|
||
5. Combine all of the different samples sequenced in the same library prep | ||
|
||
#navigate to folder containing all the .var.raw.txt.mapped.ref.vc.txt.filtered files from step 4 | ||
#run attached python script: concat_samples_NXT1.py | ||
|
||
#repeat for validation set, using concat_samples_NXT2.py (this is the same file as _NXT1.py but saves results with a different name | ||
|
||
# ‘patients_all.txt’ is just a file listing all the patient names in that sequencing batch | ||
|
||
#output files (2) from this step: “vphaser_iSNVs_combined_NXT1.txt” and “vphaser_iSNVs_combined_NXT2.txt” | ||
|
||
|
||
|
||
6. Apply frequency filter (code not included) | ||
|
||
#apply only to original set (NXT1), not validation set (NXT2) | ||
#remove all rows from “vphaser_iSNVs_combined_NXT1.txt” where the value in the “freq” column is <0.5 | ||
|
||
#output file from this step: “vphaser_iSNVs_combined_NXT1_freq.txt” | ||
|
||
|
||
7. Validate calls from NXT1 with validation set (NXT2) | ||
|
||
#run attached python script: nextera_validation | ||
#argument 1 = “vphaser_iSNVs_combined_NXT1_freq.txt” | ||
#argument 2 = “vphaser_iSNVs_combined_NXT2.txt” | ||
|
||
#output file from this step: “vphaser_iSNVs_combined_NXT1_freq_validated.txt” | ||
#final list of iSNV calls |
Oops, something went wrong.