Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: Add falco and multiqc. #8

Merged
merged 14 commits into from
May 14, 2024
Merged
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ testData

# Nextflow related files
.nextflow
.nextflow.log*
work
4 changes: 3 additions & 1 deletion src/config/tests.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
profiles {

process.container = 'nextflow/bash:latest'
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
Expand All @@ -26,6 +27,7 @@ profiles {
}

docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
Expand Down
38 changes: 38 additions & 0 deletions src/dataflow/combine_samples/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: combine_samples
namespace: dataflow
description: Combine fastq files from across samples into one event with a list of fastq files per orientation.
argument_groups:
- name: Input arguments
arguments:
- name: "--id"
description: "ID of the new event"
type: string
required: true
- name: --forward_input
type: file
required: true
- name: --reverse_input
type: file
required: false
- name: Output arguments
arguments:
- name: --output_forward
type: file
direction: output
multiple: true
required: true
- name: --output_reverse
type: file
direction: output
multiple: true
required: false
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf

runners:
- type: nextflow

engines:
- type: native
28 changes: 28 additions & 0 deletions src/dataflow/combine_samples/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
workflow run_wf {
take:
input_ch

main:
output_ch = input_ch
| map { id, state ->
def newEvent = [state.id, state + ["_meta": ["join_id": id]]]
newEvent
}
| groupTuple(by: 0, sort: "hash")
| map {run_id, states ->
// Gather the following state for all samples
def forward_fastqs = states.collect{it.forward_input}
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}

def resultState = [
"output_forward": forward_fastqs,
"output_reverse": reverse_fastqs,
// The join ID is the same across all samples from the same run
"_meta": ["join_id": states[0]._meta.join_id]
]
return [run_id, resultState]
}

emit:
output_ch
}
36 changes: 36 additions & 0 deletions src/dataflow/gather_fastqs_and_validate/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: gather_fastqs_and_validate
namespace: dataflow
description: |
From a directory containing fastq files, gather the files per sample
and validate according to the contents of the sample sheet.
argument_groups:
- name: Input arguments
arguments:
- name: --input
description: Directory containing .fastq files
type: file
required: true
- name: --sample_sheet
description: Sample sheet
type: file
required: true
- name: Output arguments
arguments:
- name: --fastq_forward
type: file
direction: output
required: true
- name: "--fastq_reverse"
type: file
direction: output
required: false
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf

runners:
- type: nextflow

engines:
- type: native
73 changes: 73 additions & 0 deletions src/dataflow/gather_fastqs_and_validate/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
workflow run_wf {
take:
input_ch

main:
output_ch = input_ch
// Gather input files from BCL convert output folder
| flatMap { id, state ->
println "Processing sample sheet: $state.sample_sheet"
def sample_sheet = state.sample_sheet
def start_parsing = false
def sample_id_column_index = null
def samples = ["Undetermined"]
def original_id = id

// Parse sample sheet for sample IDs
csv_lines = sample_sheet.splitCsv(header: false, sep: ',')
csv_lines.any { csv_items ->
if (csv_items.isEmpty()) {
return
}
def possible_header = csv_items[0]
def header = possible_header.find(/\[(.*)\]/){fullmatch, header_name -> header_name}
if (header) {
if (start_parsing) {
// Stop parsing when encountering the next header
return true
}
if (header == "Data") {
start_parsing = true
}
}
if (start_parsing) {
if ( !sample_id_column_index ) {
sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID"}
assert sample_id_column_index != -1:
"Could not find column 'Sample_ID' in sample sheet!"
return
}
samples += csv_items[sample_id_column_index]
}
}
println "Looking for fastq files in ${state.input}."
def allfastqs = state.input.listFiles().findAll{it.isFile() && it.name ==~ /^.+\.fastq.gz$/}
println "Found ${allfastqs.size()} fastq files, matching them to the following samples: ${samples}."
processed_samples = samples.collect { sample_id ->
def forward_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R1_(\d+)\.fastq\.gz$/
def reverse_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R2_(\d+)\.fastq\.gz$/
def forward_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ forward_regex}
def reverse_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ reverse_regex}
assert forward_fastq : "No forward fastq files were found for sample ${sample_id}"
assert forward_fastq.size() < 2:
"Found multiple forward fastq files corresponding to sample ${sample_id}: ${forward_fastq}"
assert reverse_fastq.size() < 2:
"Found multiple reverse fastq files corresponding to sample ${sample_id}: ${reverse_fastq}."
assert !forward_fastq.isEmpty():
"Expected a forward fastq file to have been created correspondig to sample ${sample_id}."
// TODO: if one sample had reverse reads, the others must as well.
reverse_fastq = !reverse_fastq.isEmpty() ? reverse_fastq[0] : null
def fastqs_state = [
"fastq_forward": forward_fastq[0],
"fastq_reverse": reverse_fastq,
"_meta": [ "join_id": original_id ],
]
[sample_id, fastqs_state]
}
println "Finished processing sample sheet."
return processed_samples
}

emit:
output_ch
}
30 changes: 26 additions & 4 deletions src/demultiplex/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,30 @@ argument_groups:
type: file
required: true
- name: --sample_sheet
description: Sample sheet
description: |
Sample sheet as input for BCL Convert. If not specified,
will try to autodetect the sample sheet in the input directory
type: file
required: true
required: false
- name: Output arguments
arguments:
- name: --output
description: Directory to write fastq data to
type: file
direction: output
required: true
- name: "--output_falco"
description: Directory to write falco output to
type: file
direction: output
required: false
default: "$id/falco"
- name: "--output_multiqc"
description: Directory to write falco output to
type: file
direction: output
required: false
default: "$id/multiqc_report.html"
resources:
- type: nextflow_script
path: main.nf
Expand All @@ -31,14 +45,22 @@ test_resources:
dependencies:
- name: io/untar
repository: local
- name: dataflow/gather_fastqs_and_validate
repository: local
- name: io/interop_summary_to_csv
repository: local
- name: dataflow/combine_samples
repository: local
- name: bcl_convert
repository: bb

- name: falco
repository: bb
- name: multiqc
repository: bb
repositories:
- name: bb
type: vsh
repo: viash-hub/biobase
tag: main

runners:
- type: nextflow
Expand Down
5 changes: 3 additions & 2 deletions src/demultiplex/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

viash ns build -q 'untar|demultiplex' --setup cb
viash ns build --setup cb

nextflow run . \
-main-script src/demultiplex/test.nf \
-profile docker,no_publish \
-entry test_wf \
-c src/config/tests.config
-c src/config/tests.config \
-resume
Loading