PRS_LDpred_BCA.sh

#!/usr/bin/env bash
#To compute PRS on BCA samples using Validation_EA_genotyped.weight generated by PRS_LDpred_cambridge.sh
#only work on EA
#use Cambridge+BEACONcontrol as the validation, only work on EA
#BEACONcase+AMOS+other BEACONcontrol for discovery

#https://choishingwan.github.io/PRS-Tutorial/ldpred/
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4596916/
#https://biostat0903.github.io/DBSLMM/Scripts.html

plink=/fh/fast/stanford_j/Xiaoyu/Tools/plink-1.07-x86_64/plink1.9/plink

#use imputed cambamos data
#extract cambamos data, target data---
#step1--------------
# rm  ../result/BCA1000gnoambiguousmergelist.txt
# for chr in {1..22}
# do
# echo /fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/chr${chr}_filter_noambiguous  >> ../result/BCA1000gnoambiguousmergelist.txt
# done
# $plink --merge-list ../result/BCA1000gnoambiguousmergelist.txt --make-bed \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
# 
# #do GWAS on BEACON
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
# $plink --bfile $prefix \
#   --keep ../result/GWAS/Discovery_EA_CO_selectedsamples_plink.txt --make-bed \
#   --out ../result/GWAS/Discovery_EA_CO_gwas
# #update_fam() in R
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/GWAS/Beacon_EA_CO_gwas
# $plink --bfile  $prefix --covar  ../result/GWAS/Beacon_EA_CO_selectedsamples_pheno_plink.txt \
#   --covar-name pc1,pc2,pc3,pc4,sex,age --logistic --beta --hide-covar --ci 0.95 --out $prefix
  
#working on validation data
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
# prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
# $plink --bfile $prefix  \
#  --keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
#  
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
# #QC
# $plink \
#     --bfile ${prefix} \
#     --maf 0.05 \
#     --hwe 1e-6 \
#     --geno 0.01 \
#     --mind 0.01 \
#     --write-snplist \
#     --make-just-fam \
#     --out ${prefix}_QC
# 
# $plink \
#     --bfile ${prefix} \
#     --keep ${prefix}_QC.fam \
#     --extract ${prefix}_QC.snplist \
#     --indep-pairwise 200 50 0.25 \
#     --out ${prefix}_QC
# 
# $plink \
#     --bfile ${prefix} \
#     --extract ${prefix}_QC.prune.in \
#     --keep ${prefix}_QC.fam \
#     --het \
#     --out ${prefix}_QC
# 
# #Individuals that have a first or second degree relative in the sample (π>0.125) can be removed with the following command:
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
# $plink \
#     --bfile $prefix \
#     --extract ${prefix}_QC.prune.in \
#     --keep ${prefix}_QC.valid.sample \
#     --rel-cutoff 0.125 \
#     --out ${prefix}_QC
#     
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
# prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous_QC
# $plink --bfile $prefix  \
#  --keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
# 
# 
# $plink \
#     --bfile ${prefix1} \
#     --keep ${prefix1}.valid.sample \
#     --make-bed \
#     --out ${prefix1}
#     
# prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous_QC
# $plink --bfile $prefix  \
#  --keep ${prefix}.rel.id --make-bed --out $prefix
 
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous_NoNA
$plink \
    --bfile ${prefix} \
    --remove ../result/BCA_NAsamples_plink.txt \
    --make-bed \
    --out ${prefix1}
#step7---- 
#make BE/EA/BEEA files using PRS_LDpred.R
#work on base data using PRS_LDpred.R
#install ldpred
#ml Python
#python -m venv env
#source ./env/bin/activate
#python -m pip install ldpred
#If you want to switch projects or otherwise leave your virtual environment, simply run:
#deactivate


#A1:effective allele,--only-hm3
run_score_ldpred () {
    target="$1" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
    #N="$3" #sample size of base data
    targetweight="$2"
    
    #f is the probability that a marker is drawn from a Gaussian distribution, i.e., the fraction of causal markers.
    echo "score"
    ldpred score \
     --gf $target \
     --rf $targetweight.weight \
     --cov-file $target.covariate \
     --f 1 0.3 0.1 0.03 0.01 0.003 0.001  \
     --out $target.score \
     --pf $target.pheno \
     --pf-format LSTANDARD 
}


run_score_ldpred  /fh/fast/dai_j/BEACON/BEACON_GRANT/result/BCA_EA /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Validation_EA   &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_BCA_EA.log  &