Skip to content

Commit

Permalink
Add bases2fastq (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored Nov 26, 2024
1 parent 065297b commit a13b57d
Show file tree
Hide file tree
Showing 5 changed files with 553 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

* `sgedemux`: demultiplexing sequencing data generated on Singular Genomics' sequencing instruments (PR #166).

* `bases2fasta`: demultiplexing sequencing data generated by Element Biosciences instruments (PR #167).

## BUG FIXES

* `falco`: Fix a typo in the `--reverse_complement` argument (PR #157).
Expand Down
200 changes: 200 additions & 0 deletions src/bases2fastq/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
name: bases2fastq
description: |
Bases2Fastq demultiplexes sequencing data generated by Element Biosciences instruments and converts base calls into FASTQ files.
keywords: ["demultiplex", "fastq", "demux", "Element Biosciences"]
links:
documentation: https://docs.elembio.io/docs/bases2fastq/introduction/
license: Proprietairy
requirements:
commands: [bases2fastq]
authors:
- __merge__: /src/_authors/dries_schaumont.yaml
roles: [ author, maintainer ]

argument_groups:
- name: Input
arguments:
- name: "--analysis_directory"
type: file
description: Location of analysis directory
required: true
example: "input/"
- name: --run_manifest
alternatives: [-r]
type: file
description: Location of run manifest to use instead of default RunManifest.csv found in analysis directory
required: false

- name: Output
arguments:
- name: "--output_directory"
alternatives: ["-o"]
type: file
direction: output
required: true
description: Location to save output fastqs
example: fastq_dir
- name: "--report"
type: file
required: false
direction: output
description: Output location for the HTML report
- name: "--logs"
type: file
direction: output
required: false
description: Directory containing log files
example: logs_dir
- name: Arguments
arguments:
- name: --chemistry_version
type: string
required: false
description: Run parameters override, chemistry version.
- name: "--demux_only"
alternatives: [-d]
type: boolean_true
description: |
Generate demux files and indexing stats without generating FASTQ
- name: "--detect_adapters"
type: boolean_true
description: |
Detect adapters sequences, overriding any sequences present in run manifest.
- name: "--error_on_missing"
type: boolean_true
description: |
Terminate execution for a missing file (by default, missing files are
skipped and execution continues). Also set by --strict.
- name: "--exclude_tile"
alternatives: [-e]
multiple: true
type: string
description: |
Regex matching tile names to exclude. This flag can be specified multiple times. (e.g. L1.*C0[23]S.)
- name: "--filter_mask"
type: string
description: |
Run parameters override, custom pass filter mask.
- name: "--flowcell_id"
type: string
description: |
Run parameters override, flowcell ID.
- name: "--force_index_orientation"
type: boolean_true
description: |
Do not attempt to find orientation for I1/I2 reads (reverse complement).
Use orientation given in run manifest.
- name: "--group_fastq"
type: boolean_true
description: |
Group all FASTQ/stats/metrics for a project are in the project folder.
- name: "--i1_cycles"
type: integer
min: 1
description: |
Run parameters override, I1 cycles.
- name: "--i2_cycles"
type: integer
min: 1
description: |
Run parameters override, I2 cycles
- name: "--include_tile"
alternatives: [-i]
type: string
multiple: true
description: |
Regex matching tile names to include. This flag
can be specified multiple times. (e.g. L1.*C0[23]S.)
- name: "--kit_configuration"
type: string
description: |
Run parameters override, kit configuration.
- name: "--legacy_fastq"
type: boolean_true
description: |
Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)
- name: "--log_level"
type: string
alternatives: [-l]
choices: ["DEBUG", "INFO", "WARNING", "ERROR"]
description: |
Severity level for logging.
example: INFO
- name: "--no_error_on_invalid"
type: boolean_true
description: |
Skip invalid files and continue execution. Overridden by --strict options
- name: "--no_projects"
type: boolean_true
description: |
Disable project directories
- name: "--num_unassigned"
type: integer
min: 0
max: 1000
example: 30
description: |
Max Number of unassigned sequences to report.
- name: "--preparation_workflow"
type: string
description: |
Run parameters override, preparation workflow.
- name: --qc_only
type: boolean_true
description: |
Quickly generate run stats for single tile without generating FASTQ.
Use --include_tile/--exclude_tile to define custom tile set.
- name: --r1_cycles
type: integer
min: 1
description: |
Run parameters override, R1 cycles.
- name: --r2_cycles
type: integer
min: 1
description: |
Run parameters override, R2 cycles.
- name: "--split_lanes"
type: boolean_true
description: |
Split FASTQ files by lane.
- name: --strict
type: boolean_true
description: |
In strict mode any invalid or missing input file will terminate execution
(overrides no_error_on_invalid and sets --error_on_missing)
# --help, -h Display this usage statement
# --input-remote, NAME Rclone remote name for remote ANALYSIS_DIRECTORY
# --num-threads, -p NUMBER Number of threads (default 1)
# --output-remote, NAME Rclone remote name for remote OUTPUT_DIRECTORY
# --settings SELECTION Run manifest settings override. This option may be specified multiple times.
# --version, -v Display bases2fastq version
# --skip-qc-report SELECTION Do not generate HTML QC report.

resources:
- type: bash_script
path: script.sh

test_resources:
- type: bash_script
path: test.sh

engines:
- type: docker
image: elembio/bases2fastq:2.1.0
setup:
- type: apt
packages:
- procps
- tree
- type: docker
run: |
echo "bases2fastq: $(bases2fastq --version | cut -d' ' -f3)" > /var/software_versions.txt
test_setup:
- type: apt
packages: curl

runners:
- type: executable
- type: nextflow
40 changes: 40 additions & 0 deletions src/bases2fastq/help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

Usage: bases2fastq [OPTIONS] ANALYSIS_DIRECTORY OUTPUT_DIRECTORY

positional arguments:
ANALYSIS_DIRECTORY Location of analysis directory
OUTPUT_DIRECTORY Location to save output

optional arguments:
--chemistry-version VERSION Run parameters override, chemistry version.
--demux-only, -d Generate demux files and indexing stats without generating FASTQ
--detect-adapters Detect adapters sequences, overriding any sequences present in run manifest.
--error-on-missing Terminate execution for a missing file (by default, missing files are skipped and execution continues). Also set by --strict.
--exclude-tile, -e SELECTION Regex matching tile names to exclude. This flag can be specified multiple times. (e.g. L1.*C0[23]S.)
--filter-mask MASK Run parameters override, custom pass filter mask.
--flowcell-id FLOWCELL_ID Run parameters override, flowcell ID.
--force-index-orientation Do not attempt to find orientation for I1/I2 reads (reverse complement). Use orientation given in run manifest.
--group-fastq Group all FASTQ/stats/metrics for a project are in the project folder (default false)
--help, -h Display this usage statement
--i1-cycles NUM_CYCLES Run parameters override, I1 cycles.
--i2-cycles NUM_CYCLES Run parameters override, I2 cycles.
--include-tile, -i SELECTION Regex matching tile names to include. This flag can be specified multiple times. (e.g. L1.*C0[23]S.)
--input-remote, NAME Rclone remote name for remote ANALYSIS_DIRECTORY
--kit-configuration KIT_CONFIG Run parameters override, kit configuration.
--legacy-fastq Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)
--log-level, -l LEVEL Severity level for logging. i.e. DEBUG, INFO, WARNING, ERROR (default INFO)
--no-error-on-invalid Skip invalid files and continue execution (by default, execution is terminated for an invalid file). Overridden by --strict options.
--no-projects Disable project directories (default false)
--num-threads, -p NUMBER Number of threads (default 1)
--num-unassigned NUMBER Max Number of unassigned sequences to report. Must be <= 1000 (default 30)
--output-remote, NAME Rclone remote name for remote OUTPUT_DIRECTORY
--preparation-workflow WORKFLOW Run parameters override, preparation workflow.
--qc-only Quickly generate run stats for single tile without generating FASTQ. Use --include-tile/--exclude-tile to define custom tile set.
--r1-cycles NUM_CYCLES Run parameters override, R1 cycles.
--r2-cycles NUM_CYCLES Run parameters override, R2 cycles.
--run-manifest, -r PATH Location of run manifest to use instead of default RunManifest.csv found in analysis directory
--settings SELECTION Run manifest settings override. This option may be specified multiple times.
--skip-qc-report SELECTION Do not generate HTML QC report.
--split-lanes Split FASTQ files by lane
--strict, -s In strict mode any invalid or missing input file will terminate execution (overrides no-error-on-invalid and sets --error-on-missing)
--version, -v Display bases2fastq version
111 changes: 111 additions & 0 deletions src/bases2fastq/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/bin/bash

## VIASH START
## VIASH END

# Exit on error
set -eo pipefail

# Unset parameters
unset_if_false=(
par_demux_only
par_detect_adapters
par_error_on_missing
par_group_fastq
par_legacy_fastq
par_no_error_on_invalid
par_no_projects
par_qc_only
par_split_lanes
par_skip_qc_report
par_strict
par_force_index_orientation
)

for par in ${unset_if_false[@]}; do
test_val="${!par}"
[[ "$test_val" == "false" ]] && unset $par
done

# NOTE: --preparation-workflow is bugged in bases2fastq
args=(
${par_demux_only:+--demux-only}
${par_detect_adapters:+--detect-adapters}
${par_error_on_missing:+--error-on-missing}
${par_group_fastq:+--group-fastq}
${par_legacy_fastq:+--legacy-fastq}
${par_no_error_on_invalid:+--no-error-on-invalid}
${par_no_projects:+--no-projects}
${par_split_lanes:+--split-lanes}
${par_strict:+--strict}
${par_force_index_orientation:+--force-index-orientation}
${par_chemistry_version:+--chemistry-version "$par_chemistry_version"}
${par_filter_mask:+--filter-mask "$par_filter_mask"}
${par_flowcell_id:+--flowcell-id "$par_flowcell_id"}
${par_i1_cycles:+--i1-cycles "$par_i1_cycles"}
${par_i2_cycles:+--i2-cycles "$par_i2_cycles"}
${par_r1_cycles:+--r1-cycles "$par_r1_cycles"}
${par_r2_cycles:+--r2-cycles "$par_r2_cycles"}
${par_kit_configuration:+--kit-configuration "$par_kit_configuration"}
${par_log_level:+--log-level "$par_log_level"}
${par_num_unassigned:+--num-unassigned "$par_num_unassigned"}
${par_preparation_workflow:+--preparation-workflow "$par_preparation_workflow"}
${meta_cpus:+--num-threads "$meta_cpus"}
${par_run_manifest:+--run-manifest "$par_run_manifest"}
)

# Create arrays for inputs that contain multiple arguments
IFS=";" read -ra exclude_tile <<< "$par_exclude_tile"
IFS=";" read -ra include_tile <<< "$par_include_tile"

if [ -z "$par_report" ]; then
args+=( --skip-qc-report )
fi

for arg_value in "${exclude_tile[@]}"; do
args+=( "--exclude-tile" "$arg_value" )
done

for arg_value in "${include_tile[@]}"; do
args+=( "--include-tile" "$arg_value" )
done

echo "> Creating temporary directory."
# create temporary directory and clean up on exit
TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXX")
echo "> Created $TMPDIR"
function clean_up {
[[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR"
}
trap clean_up EXIT

args+=( "$par_analysis_directory" "$TMPDIR")
echo "> Running bases2fastq with arguments: ${args[@]}"
bases2fastq ${args[@]}
echo "> Done running sgdemux"

echo "> Output folder:"
tree "$TMPDIR"

echo "> Moving FASTQ files into final output directory"
mkdir -p "$par_output_directory/"
mv "$TMPDIR"/Samples/* --target-directory="$par_output_directory"

if [ ! -z "$par_report" ]; then
echo "> Moving HTML report to the output ($par_report)"
mv "$TMPDIR"/*.html "$par_report"
else
echo " > Leaving reports alone"
fi

# Logs is everything else
if [ ! -z "$par_logs" ]; then
mkdir -p "$par_logs"
echo "> Moving logs to their own location ($par_logs)"
mv "$TMPDIR/"* "$par_logs/"
else
echo "> Not moving logs"
fi



Loading

0 comments on commit a13b57d

Please sign in to comment.