Skip to content

Commit

Permalink
handle timeout errors on data download
Browse files Browse the repository at this point in the history
  • Loading branch information
ManuelTgn authored Oct 11, 2024
1 parent 0854455 commit 87ef7d4
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions crisprme_auto_test_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ mkdir -p $GENOMEDIR # create Genomes folder
cd $GENOMEDIR
# download chromosomes FASTA files
original_md5sum="a5aa5da14ccf3d259c4308f7b2c18cb0" # see https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/md5sum.txt
wget -c https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chromFa.tar.gz
while true; do # retry download if caught timeout
wget -T 15 -c https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.chromFa.tar.gz && break
done
chromsfasta="hg38.chromFa.tar.gz"
local_md5sum="$(md5sum $chromsfasta | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
Expand All @@ -35,7 +37,9 @@ cd $VCF1000G
for i in $(seq 1 22; echo "X");
do
original_md5sum="$(curl -sL ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | md5sum | cut -d ' ' -f 1)" # compute original md5sum
wget -c ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz
while true; do # retry download if caught timeout
wget -T 15 -c ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz && break
done
local_md5sum="$(md5sum ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then # check download consistency
echo "ERROR: unexpected failure while downloading ALL.chr${i}.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.vcf.gz"
Expand All @@ -51,7 +55,9 @@ cd $ANNOTATIONDIR
echo "Downloading ENCODE+GENCODE annotation data..."
original_md5sum="$(curl -sL https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)"
encodegencode="encode+gencode.hg38.bed.zip"
wget -c -O $encodegencode https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0
while true; do # retry download if caught timeout
wget -T 15 -c -O $encodegencode https://www.dropbox.com/s/1n2f0qxdba7u3gb/encode%2Bgencode.hg38.bed.zip?dl=0 && break
done
local_md5sum="$(md5sum $encodegencode | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
echo "ERROR: unexpected failure while downloading ${encodegencode}"
Expand All @@ -62,7 +68,9 @@ unzip $encodegencode
echo "Downloading GENCODE encoding sequences..."
original_md5sum="$(curl -sL https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0 | md5sum | cut -d ' ' -f 1)"
gencode="gencode.protein_coding.bed.zip"
wget -c -O $gencode https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0
while true; do # retry download if caught timeout
wget -T 15 -c -O $gencode https://www.dropbox.com/s/isqpkg113cr1xea/gencode.protein_coding.bed.zip?dl=0
done
local_md5sum="$(md5sum $gencode | cut -d ' ' -f 1)"
if [ "$original_md5sum" != "$local_md5sum" ]; then
echo "ERROR: unexpected failure while downloading ${gencode}"
Expand Down

0 comments on commit 87ef7d4

Please sign in to comment.