Skip to content

Commit

Permalink
FEAT: Add falco and multiqc. (#8)
Browse files Browse the repository at this point in the history
* FEAT: Add falco.

* Fix output and adjust formatting

* Add test for permissions

* FEAT: Add multiqc.

* Cleanup code.

* Update file parsing

* Refactor into different workflows

* Remove debbuging view()

* Add test for multiqc

* Autodetect SampleSheet from input directory

* Fix InterOp statistics.
  • Loading branch information
DriesSchaumont authored May 14, 2024
1 parent 4cfd291 commit fb98dc5
Show file tree
Hide file tree
Showing 12 changed files with 376 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ testData

# Nextflow related files
.nextflow
.nextflow.log*
work
4 changes: 3 additions & 1 deletion src/config/tests.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
profiles {

process.container = 'nextflow/bash:latest'
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
Expand All @@ -26,6 +27,7 @@ profiles {
}

docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
Expand Down
38 changes: 38 additions & 0 deletions src/dataflow/combine_samples/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: combine_samples
namespace: dataflow
description: Combine fastq files from across samples into one event with a list of fastq files per orientation.
argument_groups:
- name: Input arguments
arguments:
- name: "--id"
description: "ID of the new event"
type: string
required: true
- name: --forward_input
type: file
required: true
- name: --reverse_input
type: file
required: false
- name: Output arguments
arguments:
- name: --output_forward
type: file
direction: output
multiple: true
required: true
- name: --output_reverse
type: file
direction: output
multiple: true
required: false
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf

runners:
- type: nextflow

engines:
- type: native
28 changes: 28 additions & 0 deletions src/dataflow/combine_samples/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
workflow run_wf {
take:
input_ch

main:
output_ch = input_ch
| map { id, state ->
def newEvent = [state.id, state + ["_meta": ["join_id": id]]]
newEvent
}
| groupTuple(by: 0, sort: "hash")
| map {run_id, states ->
// Gather the following state for all samples
def forward_fastqs = states.collect{it.forward_input}
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}

def resultState = [
"output_forward": forward_fastqs,
"output_reverse": reverse_fastqs,
// The join ID is the same across all samples from the same run
"_meta": ["join_id": states[0]._meta.join_id]
]
return [run_id, resultState]
}

emit:
output_ch
}
36 changes: 36 additions & 0 deletions src/dataflow/gather_fastqs_and_validate/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: gather_fastqs_and_validate
namespace: dataflow
description: |
From a directory containing fastq files, gather the files per sample
and validate according to the contents of the sample sheet.
argument_groups:
- name: Input arguments
arguments:
- name: --input
description: Directory containing .fastq files
type: file
required: true
- name: --sample_sheet
description: Sample sheet
type: file
required: true
- name: Output arguments
arguments:
- name: --fastq_forward
type: file
direction: output
required: true
- name: "--fastq_reverse"
type: file
direction: output
required: false
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf

runners:
- type: nextflow

engines:
- type: native
73 changes: 73 additions & 0 deletions src/dataflow/gather_fastqs_and_validate/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
workflow run_wf {
take:
input_ch

main:
output_ch = input_ch
// Gather input files from BCL convert output folder
| flatMap { id, state ->
println "Processing sample sheet: $state.sample_sheet"
def sample_sheet = state.sample_sheet
def start_parsing = false
def sample_id_column_index = null
def samples = ["Undetermined"]
def original_id = id

// Parse sample sheet for sample IDs
csv_lines = sample_sheet.splitCsv(header: false, sep: ',')
csv_lines.any { csv_items ->
if (csv_items.isEmpty()) {
return
}
def possible_header = csv_items[0]
def header = possible_header.find(/\[(.*)\]/){fullmatch, header_name -> header_name}
if (header) {
if (start_parsing) {
// Stop parsing when encountering the next header
return true
}
if (header == "Data") {
start_parsing = true
}
}
if (start_parsing) {
if ( !sample_id_column_index ) {
sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID"}
assert sample_id_column_index != -1:
"Could not find column 'Sample_ID' in sample sheet!"
return
}
samples += csv_items[sample_id_column_index]
}
}
println "Looking for fastq files in ${state.input}."
def allfastqs = state.input.listFiles().findAll{it.isFile() && it.name ==~ /^.+\.fastq.gz$/}
println "Found ${allfastqs.size()} fastq files, matching them to the following samples: ${samples}."
processed_samples = samples.collect { sample_id ->
def forward_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R1_(\d+)\.fastq\.gz$/
def reverse_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R2_(\d+)\.fastq\.gz$/
def forward_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ forward_regex}
def reverse_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ reverse_regex}
assert forward_fastq : "No forward fastq files were found for sample ${sample_id}"
assert forward_fastq.size() < 2:
"Found multiple forward fastq files corresponding to sample ${sample_id}: ${forward_fastq}"
assert reverse_fastq.size() < 2:
"Found multiple reverse fastq files corresponding to sample ${sample_id}: ${reverse_fastq}."
assert !forward_fastq.isEmpty():
"Expected a forward fastq file to have been created correspondig to sample ${sample_id}."
// TODO: if one sample had reverse reads, the others must as well.
reverse_fastq = !reverse_fastq.isEmpty() ? reverse_fastq[0] : null
def fastqs_state = [
"fastq_forward": forward_fastq[0],
"fastq_reverse": reverse_fastq,
"_meta": [ "join_id": original_id ],
]
[sample_id, fastqs_state]
}
println "Finished processing sample sheet."
return processed_samples
}

emit:
output_ch
}
30 changes: 26 additions & 4 deletions src/demultiplex/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,30 @@ argument_groups:
type: file
required: true
- name: --sample_sheet
description: Sample sheet
description: |
Sample sheet as input for BCL Convert. If not specified,
will try to autodetect the sample sheet in the input directory
type: file
required: true
required: false
- name: Output arguments
arguments:
- name: --output
description: Directory to write fastq data to
type: file
direction: output
required: true
- name: "--output_falco"
description: Directory to write falco output to
type: file
direction: output
required: false
default: "$id/falco"
- name: "--output_multiqc"
description: Directory to write falco output to
type: file
direction: output
required: false
default: "$id/multiqc_report.html"
resources:
- type: nextflow_script
path: main.nf
Expand All @@ -31,14 +45,22 @@ test_resources:
dependencies:
- name: io/untar
repository: local
- name: dataflow/gather_fastqs_and_validate
repository: local
- name: io/interop_summary_to_csv
repository: local
- name: dataflow/combine_samples
repository: local
- name: bcl_convert
repository: bb

- name: falco
repository: bb
- name: multiqc
repository: bb
repositories:
- name: bb
type: vsh
repo: viash-hub/biobase
tag: main

runners:
- type: nextflow
Expand Down
5 changes: 3 additions & 2 deletions src/demultiplex/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

viash ns build -q 'untar|demultiplex' --setup cb
viash ns build --setup cb

nextflow run . \
-main-script src/demultiplex/test.nf \
-profile docker,no_publish \
-entry test_wf \
-c src/config/tests.config
-c src/config/tests.config \
-resume
Loading

0 comments on commit fb98dc5

Please sign in to comment.