forked from viash-hub/biobox
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into samtools_view
- Loading branch information
Showing
32 changed files
with
4,584 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
name: bcl_convert | ||
description: | | ||
Convert bcl files to fastq files using bcl-convert. | ||
Information about upgrading from bcl2fastq via | ||
https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html | ||
and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html | ||
argument_groups: | ||
- name: Input arguments | ||
arguments: | ||
- name: "--bcl_input_directory" | ||
alternatives: ["-i"] | ||
type: file | ||
required: true | ||
description: Input run directory | ||
example: bcl_dir | ||
- name: "--sample_sheet" | ||
alternatives: ["-s"] | ||
type: file | ||
description: Path to SampleSheet.csv file (default searched for in --bcl_input_directory) | ||
example: bcl_dir/sample_sheet.csv | ||
- name: --run_info | ||
type: file | ||
description: Path to RunInfo.xml file (default root of BCL input directory) | ||
example: bcl_dir/RunInfo.xml | ||
|
||
- name: Lane and tile settings | ||
arguments: | ||
- name: "--bcl_only_lane" | ||
type: integer | ||
description: Convert only specified lane number (default all lanes) | ||
example: 1 | ||
- name: --first_tile_only | ||
type: boolean | ||
description: Only convert first tile of input (for testing & debugging) | ||
example: true | ||
- name: --tiles | ||
type: string | ||
description: Process only a subset of tiles by a regular expression | ||
example: "s_[0-9]+_1" | ||
- name: --exclude_tiles | ||
type: string | ||
description: Exclude set of tiles by a regular expression | ||
example: "s_[0-9]+_1" | ||
|
||
- name: Resource arguments | ||
arguments: | ||
- name: --shared_thread_odirect_output | ||
type: boolean | ||
description: Use linux native asynchronous io (io_submit) for file output (Default=false) | ||
example: true | ||
- name: --bcl_num_parallel_tiles | ||
type: integer | ||
description: "# of tiles to process in parallel (default 1)" | ||
example: 1 | ||
- name: --bcl_num_conversion_threads | ||
type: integer | ||
description: "# of threads for conversion (per tile, default # cpu threads)" | ||
example: 1 | ||
- name: --bcl_num_compression_threads | ||
type: integer | ||
description: "# of threads for fastq.gz output compression (per tile, default # cpu threads, or HW+12)" | ||
example: 1 | ||
- name: --bcl_num_decompression_threads | ||
type: integer | ||
description: | ||
"# of threads for bcl/cbcl input decompression (per tile, default half # cpu threads, or HW+8). | ||
Only applies when preloading files" | ||
example: 1 | ||
|
||
- name: Run arguments | ||
arguments: | ||
- name: --bcl_only_matched_reads | ||
type: boolean | ||
description: For pure BCL conversion, do not output files for 'Undetermined' [unmatched] reads (output by default) | ||
example: true | ||
- name: --no_lane_splitting | ||
type: boolean | ||
description: Do not split FASTQ file by lane (false by default) | ||
example: true | ||
- name: --num_unknown_barcodes_reported | ||
type: integer | ||
description: "# of Top Unknown Barcodes to output (1000 by default)" | ||
example: 1000 | ||
- name: --bcl_validate_sample_sheet_only | ||
type: boolean | ||
description: Only validate RunInfo.xml & SampleSheet files (produce no FASTQ files) | ||
example: true | ||
- name: --strict_mode | ||
type: boolean | ||
description: Abort if any files are missing (false by default) | ||
example: true | ||
- name: --sample_name_column_enabled | ||
type: boolean | ||
description: Use sample sheet 'Sample_Name' column when naming fastq files & subdirectories | ||
example: true | ||
|
||
- name: Output arguments | ||
arguments: | ||
- name: "--output_directory" | ||
alternatives: ["-o"] | ||
type: file | ||
direction: output | ||
required: true | ||
description: Output directory containig fastq files | ||
example: fastq_dir | ||
- name: --bcl_sampleproject_subdirectories | ||
type: boolean | ||
description: Output to subdirectories based upon sample sheet 'Sample_Project' column | ||
example: true | ||
- name: --fastq_gzip_compression_level | ||
type: integer | ||
description: Set fastq output compression level 0-9 (default 1) | ||
example: 1 | ||
- name: "--reports" | ||
type: file | ||
direction: output | ||
required: false | ||
description: Reports directory | ||
example: reports_dir | ||
- name: "--logs" | ||
type: file | ||
direction: output | ||
required: false | ||
description: Reports directory | ||
example: logs_dir | ||
|
||
# bcl-convert arguments not taken into account | ||
# --force | ||
# --output-legacy-stats arg Also output stats in legacy (bcl2fastq2) format (false by default) | ||
# --no-sample-sheet arg Enable legacy no-sample-sheet operation (No demux or trimming. No settings | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
|
||
engines: | ||
- type: docker | ||
image: debian:trixie-slim | ||
# https://support.illumina.com/sequencing/sequencing_software/bcl-convert/downloads.html | ||
setup: | ||
- type: apt | ||
packages: [wget, gdb, which, hostname, alien, procps] | ||
- type: docker | ||
run: | | ||
wget https://s3.amazonaws.com/webdata.illumina.com/downloads/software/bcl-convert/bcl-convert-4.2.7-2.el8.x86_64.rpm -O /tmp/bcl-convert.rpm && \ | ||
alien -i /tmp/bcl-convert.rpm && \ | ||
rm -rf /var/lib/apt/lists/* && \ | ||
rm /tmp/bcl-convert.rpm | ||
- type: docker | ||
run: | | ||
echo "bcl-convert: \"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/ s/^bcl-convert\ Version //p')\"" > /var/software_versions.txt | ||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
bcl-convert Version 00.000.000.4.2.7 | ||
Copyright (c) 2014-2022 Illumina, Inc. | ||
|
||
Run BCL Conversion (BCL directory to *.fastq.gz) | ||
bcl-convert --bcl-input-directory <BCL_ROOT_DIR> --output-directory <PATH> [options] | ||
|
||
Options: | ||
-h [ --help ] Print this help message | ||
-V [ --version ] Print the version and exit | ||
--output-directory arg Output BCL directory for BCL conversion (must be specified) | ||
-f [ --force ] Force: allow destination diretory to already exist | ||
--bcl-input-directory arg Input BCL directory for BCL conversion (must be specified) | ||
--sample-sheet arg Path to SampleSheet.csv file (default searched for in --bcl-input-directory) | ||
--bcl-only-lane arg Convert only specified lane number (default all lanes) | ||
--strict-mode arg Abort if any files are missing (false by default) | ||
--first-tile-only arg Only convert first tile of input (for testing & debugging) | ||
--tiles arg Process only a subset of tiles by a regular expression | ||
--exclude-tiles arg Exclude set of tiles by a regular expression | ||
--bcl-sampleproject-subdirectories arg Output to subdirectories based upon sample sheet 'Sample_Project' column | ||
--sample-name-column-enabled arg Use sample sheet 'Sample_Name' column when naming fastq files & subdirectories | ||
--fastq-gzip-compression-level arg Set fastq output compression level 0-9 (default 1) | ||
--shared-thread-odirect-output arg Use linux native asynchronous io (io_submit) for file output (Default=false) | ||
--bcl-num-parallel-tiles arg # of tiles to process in parallel (default 1) | ||
--bcl-num-conversion-threads arg # of threads for conversion (per tile, default # cpu threads) | ||
--bcl-num-compression-threads arg # of threads for fastq.gz output compression (per tile, default # cpu threads, | ||
or HW+12) | ||
--bcl-num-decompression-threads arg # of threads for bcl/cbcl input decompression (per tile, default half # cpu | ||
threads, or HW+8. Only applies when preloading files) | ||
--bcl-only-matched-reads arg For pure BCL conversion, do not output files for 'Undetermined' [unmatched] | ||
reads (output by default) | ||
--run-info arg Path to RunInfo.xml file (default root of BCL input directory) | ||
--no-lane-splitting arg Do not split FASTQ file by lane (false by default) | ||
--num-unknown-barcodes-reported arg # of Top Unknown Barcodes to output (1000 by default) | ||
--bcl-validate-sample-sheet-only arg Only validate RunInfo.xml & SampleSheet files (produce no FASTQ files) | ||
--output-legacy-stats arg Also output stats in legacy (bcl2fastq2) format (false by default) | ||
--no-sample-sheet arg Enable legacy no-sample-sheet operation (No demux or trimming. No settings | ||
supported. False by default, not recommended | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
$(which bcl-convert) \ | ||
--bcl-input-directory "$par_bcl_input_directory" \ | ||
--output-directory "$par_output_directory" \ | ||
${par_sample_sheet:+ --sample-sheet "$par_sample_sheet"} \ | ||
${par_run_info:+ --run-info "$par_run_info"} \ | ||
${par_bcl_only_lane:+ --bcl-only-lane "$par_bcl_only_lane"} \ | ||
${par_first_tile_only:+ --first-tile-only "$par_first_tile_only"} \ | ||
${par_tiles:+ --tiles "$par_tiles"} \ | ||
${par_exclude_tiles:+ --exclude-tiles "$par_exclude_tiles"} \ | ||
${par_shared_thread_odirect_output:+ --shared-thread-odirect-output "$par_shared_thread_odirect_output"} \ | ||
${par_bcl_num_parallel_tiles:+ --bcl-num-parallel-tiles "$par_bcl_num_parallel_tiles"} \ | ||
${par_bcl_num_conversion_threads:+ --bcl-num-conversion-threads "$par_bcl_num_conversion_threads"} \ | ||
${par_bcl_num_compression_threads:+ --bcl-num-compression-threads "$par_bcl_num_compression_threads"} \ | ||
${par_bcl_num_decompression_threads:+ --bcl-num-decompression-threads "$par_bcl_num_decompression_threads"} \ | ||
${par_bcl_only_matched_reads:+ --bcl-only-matched-reads "$par_bcl_only_matched_reads"} \ | ||
${par_no_lane_splitting:+ --no-lane-splitting "$par_no_lane_splitting"} \ | ||
${par_num_unknown_barcodes_reported:+ --num-unknown-barcodes-reported "$par_num_unknown_barcodes_reported"} \ | ||
${par_bcl_validate_sample_sheet_only:+ --bcl-validate-sample-sheet-only "$par_bcl_validate_sample_sheet_only"} \ | ||
${par_strict_mode:+ --strict-mode "$par_strict_mode"} \ | ||
${par_sample_name_column_enabled:+ --sample-name-column-enabled "$par_sample_name_column_enabled"} \ | ||
${par_bcl_sampleproject_subdirectories:+ --bcl-sampleproject-subdirectories "$par_bcl_sampleproject_subdirectories"} \ | ||
${par_fastq_gzip_compression_level:+ --fastq-gzip-compression-level "$par_fastq_gzip_compression_level"} | ||
|
||
if [ ! -z "$par_reports" ]; then | ||
echo "Moving reports to their own location" | ||
mv "${par_output_directory}/Reports" "$par_reports" | ||
else | ||
echo "Leaving reports alone" | ||
fi | ||
|
||
if [ ! -z "$par_logs" ]; then | ||
echo "Moving logs to their own location" | ||
mv "${par_output_directory}/Logs" "$par_logs" | ||
else | ||
echo "Leaving logs alone" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/bin/bash | ||
|
||
# Tests are sourced from: | ||
# https://www.10xgenomics.com/support/software/cell-ranger/latest/analysis/inputs/cr-direct-demultiplexing-bcl-convert | ||
# Test input files are fetched from: | ||
# https://cf.10xgenomics.com/supp/spatial-exp/demultiplexing/iseq-DI.tar.gz | ||
# https://cf.10xgenomics.com/supp/spatial-exp/demultiplexing/bcl_convert_samplesheet.csv | ||
|
||
set -eo pipefail | ||
|
||
echo ">> Fetching and preparing test data" | ||
data_src="https://cf.10xgenomics.com/supp/spatial-exp/demultiplexing/iseq-DI.tar.gz" | ||
sample_sheet_src="https://cf.10xgenomics.com/supp/spatial-exp/demultiplexing/bcl_convert_samplesheet.csv" | ||
test_data_dir="test_data" | ||
|
||
mkdir $test_data_dir | ||
wget -q $data_src -O $test_data_dir/data.tar.gz | ||
wget -q $sample_sheet_src -O $test_data_dir/sample_sheet.csv | ||
tar xzf $test_data_dir/data.tar.gz -C $test_data_dir | ||
rm $test_data_dir/data.tar.gz | ||
|
||
echo ">> Execute and verify output" | ||
|
||
$meta_executable \ | ||
--bcl_input_directory "$test_data_dir/iseq-DI" \ | ||
--sample_sheet "$test_data_dir/sample_sheet.csv" \ | ||
--output_directory fastq \ | ||
--reports reports \ | ||
--logs logs | ||
|
||
echo ">>> Checking whether the output dir exists" | ||
[[ ! -d fastq ]] && echo "Output dir could not be found!" && exit 1 | ||
|
||
echo ">>> Checking whether output fastq files are created" | ||
[[ ! -f fastq/Undetermined_S0_L001_R1_001.fastq.gz ]] && echo "Output fastq files could not be found!" && exit 1 | ||
[[ ! -f fastq/iseq-DI_S1_L001_R1_001.fastq.gz ]] && echo "Output fastq files could not be found!" && exit 1 | ||
|
||
echo ">>> Checking whether the report dir exists" | ||
[[ ! -d reports ]] && echo "Reports dir could not be found!" && exit 1 | ||
|
||
echo ">>> Checking whether the log dir exists" | ||
[[ ! -d logs ]] && echo "Logs dir could not be found!" && exit 1 | ||
|
||
# print final message | ||
echo ">>> Test finished successfully" | ||
|
||
echo ">> Execute with additional arguments and verify output" | ||
|
||
$meta_executable \ | ||
--bcl_input_directory "$test_data_dir/iseq-DI" \ | ||
--sample_sheet "$test_data_dir/sample_sheet.csv" \ | ||
--output_directory fastq1 \ | ||
--bcl_only_matched_reads true \ | ||
--bcl_num_compression_threads 1 \ | ||
--no_lane_splitting false \ | ||
--fastq_gzip_compression_level 9 | ||
|
||
echo ">> Checking whether the output dir exists" | ||
[[ ! -d fastq1 ]] && echo "Output dir could not be found!" && exit 1 | ||
|
||
echo ">> Checking whether output fastq files are created" | ||
[[ -f fastq1/Undetermined_S0_L001_R1_001.fastq.gz ]] && echo "Undetermined should not be generated!" && exit 1 | ||
[[ ! -f fastq1/iseq-DI_S1_L001_R1_001.fastq.gz ]] && echo "Output fastq files could not be found!" && exit 1 | ||
|
||
# print final message | ||
echo ">> Test finished successfully" | ||
|
||
# do not remove this | ||
# as otherwise your test might exit with a different exit code | ||
exit 0 |
Oops, something went wrong.