From bd7903aec9b945ec8273964bc4ce8a6acfac8665 Mon Sep 17 00:00:00 2001 From: Yong Date: Wed, 31 May 2023 14:04:42 -0400 Subject: [PATCH] update_reference_sh --- assets/README.md | 6 +++--- assets/Reference/build_reference_index.sh | 7 +++---- assets/Reference/download_reference.sh | 16 ++++++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/assets/README.md b/assets/README.md index adea8f6..df38cd2 100644 --- a/assets/README.md +++ b/assets/README.md @@ -10,15 +10,15 @@ You can download reference genome, pre-build BWA index and annotated regions (e. ```bash ## eg: ./download_build_reference.sh hg38 /your/genome/data/path/hg38 -$ ./workflow/download_reference.sh [GENOME] [DEST_DIR] +$ ./assets/Reference/download_reference.sh [GENOME] [DEST_DIR] ``` * Build reference genomes index If your sequencing libraries come with spike-ins, you can build new aligner index after combining spike-in genome with human genome. The new index information will be appended to corresponding manifest file. ```bash -## eg: ./build_reference_index.sh hg38 ./data/BAC_F19K16_F24B22.fa hg38_BAC_F19K16_F24B22 /your/genome/data/path/hg38 -$ ./workflow/build_reference_index.sh [GENOME] [SPIKEIN_FA] [INDEX_PREFIX] [DEST_DIR] +## eg: ./assets/Reference/build_reference_index.sh hg38 ./data/BAC_F19K16_F24B22.fa hg38_BAC_F19K16_F24B22 /your/genome/data/path/hg38 +$ ./assets/Reference/build_reference_index.sh [GENOME] [SPIKEIN_FA] [INDEX_PREFIX] [DEST_DIR] ``` diff --git a/assets/Reference/build_reference_index.sh b/assets/Reference/build_reference_index.sh index 48189da..40c1012 100755 --- a/assets/Reference/build_reference_index.sh +++ b/assets/Reference/build_reference_index.sh @@ -1,9 +1,8 @@ #!/bin/bash ## the script will build BWA index for combined human and spike-in genomes. -## "Usage: ./build_reference_index.sh [GENOME] [SPIKEIN_FA] [INDEX_PREFIX] [DEST_DIR]" -## "Example: ./build_reference_index.sh hg38 ./data/BAC_F19K16_F24B22.fa hg38_BAC_F19K16_F24B22 /your/genome/data/path/hg38" -## "Example: ./build_reference_index.sh hg19 ./data/BAC_F19K16_F24B22.fa hg19_BAC_F19K16_F24B22 /cluster/projects/tcge/DB/cfmedip-seq-pepeline/hg19" +## "Usage: ./assets/Reference/build_reference_index.sh [GENOME] [SPIKEIN_FA] [INDEX_PREFIX] [DEST_DIR]" +## "Example: ./assets/Reference/build_reference_index.sh hg38 ./assets/Spike-in_genomes/BAC_F19K16_F24B22.fa hg38_BAC_F19K16_F24B22 /your/genome/data/path/hg38" ################# ## initilizaiton @@ -33,7 +32,7 @@ cat ${hg_fa} ${SPIKEIN_FA} > ${DEST_DIR}/${INDEX_PREFIX}.fa cd ${DEST_DIR} echo "=== Building bwa index for mereged genomes ..." -conda activate tcge-cfmedip-seq-pipeline +conda activate MEDIPIPE bwa index -a bwtsw ${INDEX_PREFIX}.fa diff --git a/assets/Reference/download_reference.sh b/assets/Reference/download_reference.sh index 65ae2b4..8c3e483 100755 --- a/assets/Reference/download_reference.sh +++ b/assets/Reference/download_reference.sh @@ -7,8 +7,8 @@ ## "A TSV file [DEST_DIR]/[GENOME].tsv will be generated. Use it for pipeline." ## "Supported genomes: hg19 and hg38"; Arabidopsis TAIR10 genome will be downloaded, ## as well as building bwa index for merged genomes. -## "Usage: ./download_build_reference.sh [GENOME] [DEST_DIR]" -## "Example: ./download_build_reference.sh hg38 /your/genome/data/path/hg38" +## "Usage: ./assets/Reference/download_build_reference.sh [GENOME] [DEST_DIR]" +## "Example: ./assets/Reference/download_build_reference.sh hg38 /your/genome/data/path/hg38" ################# @@ -47,7 +47,7 @@ if [[ "${GENOME}" == "hg38" ]]; then PROM="https://www.encodeproject.org/files/ENCFF140XLU/@@download/ENCFF140XLU.bed.gz" ENH="https://www.encodeproject.org/files/ENCFF212UAV/@@download/ENCFF212UAV.bed.gz" - REF_FA_TAIR10="https://www.arabidopsis.org/download_files/Genes/TAIR10_genome_release/TAIR10_chromosome_files/TAIR10_chr_all.fas" + #REF_FA_TAIR10="https://www.arabidopsis.org/download_files/Genes/TAIR10_genome_release/TAIR10_chromosome_files/TAIR10_chr_all.fas" fi @@ -68,7 +68,7 @@ if [[ "${GENOME}" == "hg19" ]]; then ENH="https://storage.googleapis.com/encode-pipeline-genome-data/hg19/ataqc/reg2map_honeybadger2_dnase_enh_p2.bed.gz" ## Arabidopsis - REF_FA_TAIR10="https://www.arabidopsis.org/download_files/Genes/TAIR10_genome_release/TAIR10_chromosome_files/TAIR10_chr_all.fas" + # REF_FA_TAIR10="https://www.arabidopsis.org/download_files/Genes/TAIR10_genome_release/TAIR10_chromosome_files/TAIR10_chr_all.fas" fi @@ -84,12 +84,12 @@ wget -c -O $(basename ${REF_MITO_FA}) ${REF_MITO_FA} wget -c -O $(basename ${CHRSZ}) ${CHRSZ} ## TAIR10 -wget -c -O $(basename ${REF_FA_TAIR10}) ${REF_FA_TAIR10} -sed -i -e 's/^>/>tair10_chr/' TAIR10_chr_all.fas -gzip TAIR10_chr_all.fas +#wget -c -O $(basename ${REF_FA_TAIR10}) ${REF_FA_TAIR10} +#sed -i -e 's/^>/>tair10_chr/' TAIR10_chr_all.fas +#gzip TAIR10_chr_all.fas ## combine genomes -cat $(basename ${REF_FA}) TAIR10_chr_all.fas.gz > ${GENOME}_tair10.fa.gz +# cat $(basename ${REF_FA}) TAIR10_chr_all.fas.gz > ${GENOME}_tair10.fa.gz ## annotated regions wget -N -c ${BLACKLIST}