diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e9f40fc..f088f0ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,8 @@ - `bedtools_getfasta`: extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file (PR #59). +* `fq_subsample`: Sample a subset of records from single or paired FASTQ files (PR #147). + ## MINOR CHANGES * Uniformize component metadata (PR #23). diff --git a/src/fq_subsample/config.vsh.yaml b/src/fq_subsample/config.vsh.yaml index ea07d342..6628b1be 100644 --- a/src/fq_subsample/config.vsh.yaml +++ b/src/fq_subsample/config.vsh.yaml @@ -23,12 +23,10 @@ argument_groups: - name: "--output_1" type: file direction: output - default: $id.read_1.subsampled.fastq description: Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`. - name: "--output_2" type: file direction: output - default: $id.read_2.subsampled.fastq description: Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`. - name: "Options" @@ -58,14 +56,14 @@ engines: setup: - type: docker env: - - TZ Europe/Brussels + - TZ=Europe/Brussels run: | ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ apt-get update && \ apt-get install -y --no-install-recommends build-essential git-all curl && \ curl https://sh.rustup.rs -sSf | sh -s -- -y && \ . "$HOME/.cargo/env" && \ - git clone --depth 1 --branch v0.11.0 https://github.com/stjude-rust-labs/fq.git && \ + git clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \ mv fq /usr/local/ && cd /usr/local/fq && \ cargo install --locked --path . && \ mv /usr/local/fq/target/release/fq /usr/local/bin/ diff --git a/src/fq_subsample/test.sh b/src/fq_subsample/test.sh index fd8545e8..1de48e95 100644 --- a/src/fq_subsample/test.sh +++ b/src/fq_subsample/test.sh @@ -4,8 +4,8 @@ echo ">>> Testing $meta_executable" echo ">>> Testing for paired-end reads" "$meta_executable" \ - --input_1 $meta_resources_dir/test_data/a.1.fastq \ - --input_2 $meta_resources_dir/test_data/a.1.fastq \ + --input_1 $meta_resources_dir/test_data/a.3.fastq.gz \ + --input_2 $meta_resources_dir/test_data/a.4.fastq.gz \ --record_count 3 \ --seed 1 \ --output_1 a.1.subsampled.fastq \ @@ -13,23 +13,23 @@ echo ">>> Testing for paired-end reads" echo ">> Checking if the correct files are present" [ ! -f "a.1.subsampled.fastq" ] && echo "Subsampled FASTQ file for read 1 is missing!" && exit 1 -[ ! -s "a.1.subsampled.fastq" ] && echo "Subsampled FASTQ file is empty!" && exit 1 +[ $(wc -l < a.1.subsampled.fastq) -ne 12 ] && echo "Subsampled FASTQ file for read 1 does not contain the expected number of records" && exit 1 [ ! -f "a.2.subsampled.fastq" ] && echo "Subsampled FASTQ file for read 2 is missing" && exit 1 -[ ! -s "a.2.subsampled.fastq" ] && echo "Subsampled FASTQ file is empty" && exit 1 +[ $(wc -l < a.2.subsampled.fastq) -ne 12 ] && echo "Subsampled FASTQ file for read 2 does not contain the expected number of records" && exit 1 rm a.1.subsampled.fastq a.2.subsampled.fastq echo ">>> Testing for single-end reads" "$meta_executable" \ - --input_1 $meta_resources_dir/test_data/a.1.fastq \ - --input_2 $meta_resources_dir/test_data/a.1.fastq \ + --input_1 $meta_resources_dir/test_data/a.3.fastq.gz \ --record_count 3 \ --seed 1 \ --output_1 a.1.subsampled.fastq + echo ">> Checking if the correct files are present" [ ! -f "a.1.subsampled.fastq" ] && echo "Subsampled FASTQ file is missing" && exit 1 -[ ! -s "a.1.subsampled.fastq" ] && echo "Subsampled FASTQ file is empty" && exit 1 +[ $(wc -l < a.1.subsampled.fastq) -ne 12 ] && echo "Subsampled FASTQ file does not contain the expected number of records" && exit 1 echo ">>> Tests finished successfully" exit 0 diff --git a/src/fq_subsample/test_data/a.1.fastq b/src/fq_subsample/test_data/a.1.fastq deleted file mode 100644 index 4cd6d866..00000000 --- a/src/fq_subsample/test_data/a.1.fastq +++ /dev/null @@ -1,21 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! -@2 -TACGGCA -+ -!!!!!!! -@3 -ATACGGC -+ -!!!!!!! -@4 -CATACGG -+ -!!!!!!! -@5 -GCATACG -+ -!!!!!!! - diff --git a/src/fq_subsample/test_data/a.2.fastq b/src/fq_subsample/test_data/a.2.fastq deleted file mode 100644 index f9fa80de..00000000 --- a/src/fq_subsample/test_data/a.2.fastq +++ /dev/null @@ -1,20 +0,0 @@ -@1 -ACGGCAT -+ -!!!!!!! -@2 -TACGGCA -+ -!!!!!!! -@3 -ATACGGC -+ -!!!!!!! -@4 -CATACGG -+ -!!!!!!! -@5 -GCATACG -+ -!!!!!!! \ No newline at end of file diff --git a/src/fq_subsample/test_data/a.3.fastq.gz b/src/fq_subsample/test_data/a.3.fastq.gz new file mode 100644 index 00000000..3e38d06d Binary files /dev/null and b/src/fq_subsample/test_data/a.3.fastq.gz differ diff --git a/src/fq_subsample/test_data/a.4.fastq.gz b/src/fq_subsample/test_data/a.4.fastq.gz new file mode 100644 index 00000000..3164c614 Binary files /dev/null and b/src/fq_subsample/test_data/a.4.fastq.gz differ