-
Notifications
You must be signed in to change notification settings - Fork 1
/
PRS_LDpred_genotyped_BCA.sh
executable file
·194 lines (171 loc) · 8.55 KB
/
PRS_LDpred_genotyped_BCA.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env bash
#To compute PRS on BCA samples using Validation_EA_genotyped.weight generated by PRS_LDpred_genotype_cambridge.sh
#Use genotyped data
#use Cambridge+BEACONcontrol as the validation, only work on EA
#BEACONcase+AMOS+other BEACONcontrol for discovery
#https://choishingwan.github.io/PRS-Tutorial/ldpred/
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4596916/
#https://biostat0903.github.io/DBSLMM/Scripts.html
plink=/fh/fast/stanford_j/Xiaoyu_Oct2020/Tools/plink-1.07-x86_64/plink1.9/plink
#
#step 1----
#remove ambiguous SNPs
#genotype A/B
#prefix=/fh/fast/dai_j/BEACON/BeagessCambridgeAmos/bca_filtered_19Feb2015
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip
#ml Python #to use snpflip
snpflip=/fh/fast/dai_j/CancerGenomics/Tools/python3/snpflip/snpflip
reference=/fh/fast/stanford_j/Xiaoyu_Oct2020/Tools/reference/human_g1k_v37.fasta
# $snpflip -b $prefix.bim -f $reference -o $prefix1
# FILESIZE=$(stat -c%s ${prefix1}.reverse)
# #flip
# if [[ $FILESIZE -gt 0 ]];then
# $plink --bfile $prefix --flip ${prefix1}.reverse --make-bed --out ${prefix1} --memory 1000
# else
# cp $prefix.bed ${prefix1}.bed
# cp $prefix.bim ${prefix1}.bim
# cp $prefix.fam ${prefix1}.fam
# fi
# #remove variants on ambiguous strand
# FILESIZE1=$(stat -c%s ${prefix1}.ambiguous)
# if [[ $FILESIZE1 -gt 0 ]];then
# $plink --bfile $prefix1 --exclude ${prefix1}.ambiguous --make-bed --out ${prefix1}_noambiguous
# fi
#working on "validation" data
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip_noambiguous
# prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous
# $plink --bfile $prefix \
# --keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
#samples with extreme heterozygosity are typically removed prior to downstream analyses
#prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip_noambiguous
# #QC
# $plink \
# --bfile ${prefix} \
# --maf 0.05 \
# --hwe 1e-6 \
# --geno 0.01 \
# --mind 0.01 \
# --write-snplist \
# --make-just-fam \
# --out ${prefix}_QC
#
# $plink \
# --bfile ${prefix} \
# --keep ${prefix}_QC.fam \
# --extract ${prefix}_QC.snplist \
# --indep-pairwise 200 50 0.25 \
# --out ${prefix}_QC
#
# $plink \
# --bfile ${prefix} \
# --extract ${prefix}_QC.prune.in \
# --keep ${prefix}_QC.fam \
# --het \
# --out ${prefix}_QC
#Individuals that have a first or second degree relative in the sample (π>0.125) can be removed with the following command:
#prefix="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous"
prefix="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip_noambiguous"
$plink \
--bfile $prefix \
--extract ${prefix}_QC.prune.in \
--keep ${prefix}_QC.valid.sample \
--rel-cutoff 0.125 \
--out ${prefix}_QC
prefix="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous"
prefix1="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous_QC"
$plink --bfile $prefix \
--keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
#step 3----
$plink \
--bfile ${prefix}_QC \
--keep ${prefix}_QC.valid.sample \
--make-bed \
--out ${prefix1}
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/validation_filtered_30Nov2018_flip_noambiguous_QC
$plink --bfile $prefix \
--keep ${prefix}.rel.id --make-bed --out $prefix
#make BE/EA/BEEA files using PRS_LDpred.R
#work on base data using PRS_LDpred.R
#install ldpred
#ml Python/3.6.6-foss-2018b
python -m venv env
source ./env/bin/activate
python -m pip install --user ldpred
#If you want to switch projects or otherwise leave your virtual environment, simply run:
#deactivate
#If you want to re-enter the virtual environment just follow the same instructions above about activating a virtual environment. There’s no need to re-create the virtual environment.
# #ldpred=/fh/fast/dai_j/CancerGenomics/Tools/python3/lib/python3.6/site-packages/LDpred-1.0.11-py3.6.egg/ldpred/LDpred_fast.py
# #N numer of samples in base data
# ldpred coord \
# --rs MarkerName \
# --A1 Allele1 \
# --A2 Allele2 \
# --pos pos \
# --chr chr \
# --pval P \
# --eff Effect \
# --ssf-format CUSTOM \
# --N 14229 \
# --ssf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Bonn_Oxford_Cambridge_metastat.txt.gz \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.coord \
# --gf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
#
# nsnp=$(wc -l /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.bim) #4937535 /3000=1645
# read -r -a tmp <<< "$nsnp"
# nldr=$((${tmp[0]}/3000))
# ldpred gibbs \
# --cf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.coord \
# --ldr $nldr \
# --ldf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.ld \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.weight \
# --N 14229
#
# ldpred score \
# --gf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.ldpred \
# --rf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.weight \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.score \
# --pf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.pheno \
# --pf-format LSTANDARD
run_score_ldpred () {
target="$1" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
#N="$3" #sample size of base data
targetweight="$2"
#f is the probability that a marker is drawn from a Gaussian distribution, i.e., the fraction of causal markers.
echo "score"
ldpred score \
--gf $target \
--rf $targetweight.weight \
--cov-file $target.covariate \
--f 1 0.3 0.1 0.03 0.01 0.003 0.001 \
--out $target.score \
--pf $target.pheno \
--pf-format LSTANDARD
}
# run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Beacon_Bonn_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped 8843 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_CambAmos_EA_genotyped.log &
# # run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/BEEA_Bonn_Cambridge_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BEEA_genotyped 11459 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_Beacon_BEEA_genotyped.log &
#
# #high quality SNPs
# cp /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped.bim /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info.bim
# cp /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped.bed /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info.bed
# cp /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped.fam /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info.fam
# cp /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped.pheno /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info.pheno
# cp /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped.covariate /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info.covariate
# run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Beacon_Bonn_info_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_info 8843 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_CambAmos_EA_genotyped_info1.log &
# copyfile(){
# local prefix="$1"
# local prefix1="$2"
# cp $prefix.bim $prefix1.bim
# cp $prefix.pheno $prefix1.pheno
# cp $prefix.covariate $prefix1.covariate
# cp $prefix.bed $prefix1.bed
# cp $prefix.fam $prefix1.fam
# echo "done"
# }
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped
# prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_noAmos
# copyfile $prefix $prefix1
# run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Beacon_Bonn_genotyped_noAmos_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/CambAmos_EA_genotyped_noAmos 8843 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_CambAmos_EA_genotyped_noAmos.log &
#run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Discovery_Bonn_genotyped_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Validation_EA_genotyped 8724 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_Validation_EA_genotyped.log &
run_score_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/BCA_EA_genotyped /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Validation_EA_genotyped &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_BCA_EA_genotyped.log &