diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bb38b9..5d3f40b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,54 @@ +# demultiplex v0.3.3 + +## Breaking change + +- The `runner` defines the output differently now: + + - The last part of the `--input` path is expected to be the run ID and this run ID is used to create the output directory. + - If the input is `file.tar.gz` instead of a directory, the `file` part is used as the run ID. + +- The output structure is then as follows: + + ``` + $publish_dir//_demultiplex_/ + ``` + + For instance: + + ``` + $publish_dir + └── 200624_A00834_0183_BHMTFYDRXX + └── 20241217_051404_demultiplex_v1.2 + ├── fastq + │   ├── Sample1_S1_L001_R1_001.fastq.gz + │   ├── Sample23_S3_L001_R1_001.fastq.gz + │   ├── SampleA_S2_L001_R1_001.fastq.gz + │   ├── Undetermined_S0_L001_R1_001.fastq.gz + │   └── sampletest_S4_L001_R1_001.fastq.gz + └── qc + ├── fastqc + │   ├── Sample1_S1_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Sample1_S1_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Sample1_S1_L001_R1_001.fastq.gz_summary.txt + │   ├── Sample23_S3_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Sample23_S3_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Sample23_S3_L001_R1_001.fastq.gz_summary.txt + │   ├── SampleA_S2_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── SampleA_S2_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── SampleA_S2_L001_R1_001.fastq.gz_summary.txt + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_summary.txt + │   ├── sampletest_S4_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── sampletest_S4_L001_R1_001.fastq.gz_fastqc_report.html + │   └── sampletest_S4_L001_R1_001.fastq.gz_summary.txt + └── multiqc_report.html + + ``` + +- This logic can be avoided by providing the flag `--plain_output`. + + # demultiplex v0.3.2 # Bug fixes diff --git a/src/runner/config.vsh.yaml b/src/runner/config.vsh.yaml index 50e4a1c..020be85 100644 --- a/src/runner/config.vsh.yaml +++ b/src/runner/config.vsh.yaml @@ -4,7 +4,10 @@ argument_groups: - name: Input arguments arguments: - name: --input - description: Base directory of the form `s3://Sequencing///` + description: | + Base directory of the canonical form `s3://///`. + A tarball (tar.gz, .tgz, .tar) containing run information can be provided in which + case the RunID is set to the name of the tarball without the extension. type: file required: true - name: --run_information @@ -27,15 +30,10 @@ argument_groups: required. - name: Annotation flags arguments: - - name: --add_date_time - description: | - Add date and time to the output directory name. This is useful - when running the same pipeline multiple times on the same input - directory. - type: boolean_true - - name: --add_workflow_id + - name: --plain_output description: | - Add a workflow identifier to the output directory name. + Flag to indicate that the output should be stored directly under $publish_dir rather than + under a subdirectory structure runID/_demultiplex_/. type: boolean_true - name: Output arguments arguments: diff --git a/src/runner/main.nf b/src/runner/main.nf index 5a92905..ef78f71 100644 --- a/src/runner/main.nf +++ b/src/runner/main.nf @@ -9,6 +9,15 @@ workflow run_wf { main: output_ch = input_ch + // Extract the ID from the input. + // If the input is a tarball, strip the suffix. + | map{ id, state -> + def id_with_suffix = state.input.getFileName().toString() + [ + id, + state + [ run_id: id_with_suffix - ~/\.(tar.gz|tgz|tar)$/ ] + ] + } | demultiplex.run( fromState: [ "input": "input", @@ -24,14 +33,15 @@ workflow run_wf { ) | publish.run( fromState: { id, state -> - def id1 = (params.add_date_time) ? "${id}_${date}" : id - def id2 = (params.add_workflow_id) ? "${id1}_demultiplex_${version}" : id1 + println(state.plain_output) + def id1 = (state.plain_output) ? id : "${state.run_id}/${date}" + def id2 = (state.plain_output) ? id : "${id1}_demultiplex_${version}" - def fastq_output_1 = (id == "run") ? state.fastq_output : "${id2}/" + state.fastq_output - def falco_output_1 = (id == "run") ? state.falco_output : "${id2}/" + state.falco_output - def multiqc_output_1 = (id == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output + def fastq_output_1 = (id2 == "run") ? state.fastq_output : "${id2}/" + state.fastq_output + def falco_output_1 = (id2 == "run") ? state.falco_output : "${id2}/" + state.falco_output + def multiqc_output_1 = (id2 == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output - if (id == "run") { + if (id2 == "run") { println("Publising to ${params.publish_dir}") } else { println("Publising to ${params.publish_dir}/${id2}")