Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
julibeg committed Feb 17, 2023
1 parent 6985e44 commit 7afb2ea
Show file tree
Hide file tree
Showing 99 changed files with 35 additions and 1,703 deletions.
71 changes: 0 additions & 71 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,77 +4,6 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]
### Added
- GitHub issues template.
- Return of metadata with fastqingress.
- Check of number of samples and barcoded directories.
- Example of how to use the metadata from `fastqingress`.
- Implemented `--version`
- `fastcat_extra_args` option to `fastq_ingress` to pass arbitrary arguments to `fastcat` (defaults to empty string).
- `fastcat_stats` option to `fastq_ingress` to force generation of `fastcat` stats even when the input is only a single file (default is false).
### Changed
- `fastq_ingress` now returns `[metamap, path-to-fastcat-seqs, path-to-fastcat-stats | null]`.
- Bumped base container to v0.2.0.
- Use groovy script to ping after workflow has run.
- Removed sanitize fastq option.
- fastq_ingress now removes unclassified read folders by default.
- Workflow name and version is now more prominently displayed on start
### Fixed
- Output argument in Fastqingress homogenised.
- Sanitize fastq intermittent null object error.
- Add `*.pyc` and `*.pyo` ignores to wf-template .gitignore
### Note
- Bumped version to `v4` to align versioning with Launcher v4

## [v0.2.0]
### Added
- default process label parameter
- Added `params.wf.example_cmd` list to populate `--help`
### Changed
- Update WorkflowMain.groovy to provide better `--help`

## [v0.1.0]
### Changed
- `sample_name` to `sample_id` throughout to mathc MinKNOW samplesheet.
### Added
- Singularity profile include in base config.
- Numerous other changes that have been lost to the mists of time.

## [v0.0.7]
### Added
- Fastqingress module for common handling of (possibly
multiplexed) inputs.
- Optimized container size through removal of various
conda cruft.
### Changed
- Use mamba by default for building conda environments.
- Cut down README to items specific to workflow.
### Fixed
- Incorrect specification of conda environment file in Nextflow config.

## [v0.0.6]
### Changed
- Explicitely install into base conda env

## [v0.0.5]
### Added
- Software versioning report example.

## [v0.0.4]
### Changed
- Version bump to test CI.

## [v0.0.3]
### Changed
- Moved all CI to templates.
- Use canned aplanat report components.

## [v0.0.2]
### Added
- CI release checks.
- Create pre-releases in CI from dev branch.

## [v0.0.1]

First release.
6 changes: 3 additions & 3 deletions lib/fastqingress.nf
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def watch_path(Map margs) {


process mv_or_pigz {
label "wftemplate"
label "wfamplicon"
cpus params.threads
input:
tuple val(meta), path(input)
Expand All @@ -190,7 +190,7 @@ process mv_or_pigz {


process fastcat {
label "wftemplate"
label "wfamplicon"
cpus params.threads
input:
tuple val(meta), path(input)
Expand Down Expand Up @@ -408,7 +408,7 @@ def get_sample_sheet(Path sample_sheet) {
* @return: string (optional)
*/
process validate_sample_sheet {
label "wftemplate"
label "wfamplicon"
input: path csv
output: stdout
"""
Expand Down
111 changes: 32 additions & 79 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
#!/usr/bin/env nextflow

// Developer notes
//
// This template workflow provides a basic structure to copy in order
// to create a new workflow. Current recommended pratices are:
// i) create a simple command-line interface.
// ii) include an abstract workflow scope named "pipeline" to be used
// in a module fashion
// iii) a second concreate, but anonymous, workflow scope to be used
// as an entry point when using this workflow in isolation.

import groovy.json.JsonBuilder
nextflow.enable.dsl = 2

include { fastq_ingress } from './lib/fastqingress'
include { clusterReads } from './subworkflows/clustering/vsearch'
include { draftAssembly } from './subworkflows/assembly/flye'

OPTIONAL_FILE = file("$projectDir/data/OPTIONAL_FILE")

process getVersions {
label "wftemplate"
label "wfamplicon"
cpus 1
output:
path "versions.txt"
Expand All @@ -31,7 +23,7 @@ process getVersions {


process getParams {
label "wftemplate"
label "wfamplicon"
cpus 1
output:
path "params.json"
Expand All @@ -44,38 +36,14 @@ process getParams {
}


process makeReport {
label "wftemplate"
input:
val metadata
path per_read_stats
path "versions/*"
path "params.json"
output:
path "wf-template-*.html"
script:
String report_name = "wf-template-report.html"
String metadata = new JsonBuilder(metadata).toPrettyString()
String stats_args = \
(per_read_stats.name == OPTIONAL_FILE.name) ? "" : "--stats $per_read_stats"
"""
echo '${metadata}' > metadata.json
workflow-glue report $report_name \
--versions versions \
$stats_args \
--params params.json \
--metadata metadata.json
"""
}


// See https://github.com/nextflow-io/nextflow/issues/1636. This is the only way to
// publish files from a workflow whilst decoupling the publish from the process steps.
// The process takes a tuple containing the filename and the name of a sub-directory to
// put the file into. If the latter is `null`, puts it into the top-level directory.
process output {
// publish inputs to output directory
label "wftemplate"
label "wfamplicon"
publishDir (
params.out_dir,
mode: "copy",
Expand All @@ -89,52 +57,31 @@ process output {
"""
}

// Creates a new directory named after the sample alias and moves the fastcat results
// into it.
process collect_fastq_ingress_results_in_dir {
label "wftemplate"
input:
tuple val(meta), path(concat_seqs), path(fastcat_stats)
output:
path "*"
script:
String outdir = meta["alias"]
String fastcat_stats = \
(fastcat_stats.name == OPTIONAL_FILE.name) ? "" : fastcat_stats
"""
mkdir $outdir
mv $concat_seqs $fastcat_stats $outdir
"""
}

// workflow module
workflow pipeline {
take:
reads
main:
per_read_stats = reads.map {
it[2] ? it[2].resolve('per-read-stats.tsv') : null
}
| collectFile ( keepHeader: true )
| ifEmpty ( OPTIONAL_FILE )
software_versions = getVersions()
workflow_params = getParams()
metadata = reads.map { it[0] }.toList()
report = makeReport(
metadata, per_read_stats, software_versions.collect(), workflow_params

// the reads have already been filtered by `fastcat` --> cluster next
clustering = clusterReads(
reads.map {it[0..1]},
params.min_cluster_size,
)
reads
| map { [it[0], it[1], it[2] ?: OPTIONAL_FILE ] }
| collect_fastq_ingress_results_in_dir


emit:
fastq_ingress_results = collect_fastq_ingress_results_in_dir.out
report
workflow_params
// TODO: use something more useful as telemetry
telemetry = workflow_params
}


params.min_read_length = 300
params.max_read_length = 3600
params.min_read_qual = 8
params.min_cluster_size = 0.2

// entrypoint workflow
WorkflowMain.initialise(workflow, params, log)
workflow {
Expand All @@ -143,23 +90,29 @@ workflow {
Pinguscript.ping_post(workflow, "start", "none", params.out_dir, params)
}

ArrayList fastcat_extra_args = []
if (params.min_read_length) { fastcat_extra_args << "-a $params.min_read_length" }
if (params.max_read_length) { fastcat_extra_args << "-b $params.max_read_length" }
if (params.min_read_qual) { fastcat_extra_args << "-q $params.min_read_qual" }

samples = fastq_ingress([
"input":params.fastq,
"sample":params.sample,
"sample_sheet":params.sample_sheet,
"analyse_unclassified":params.analyse_unclassified,
"fastcat_stats": params.wf.fastcat_stats,
"fastcat_extra_args": ""])
"fastcat_stats": false,
"fastcat_extra_args": fastcat_extra_args.join(" ")])

// looks like this is the most robust way to check if a `param` coming from the
// command line is a number
if (
params.min_cluster_size instanceof String ||
!params.min_cluster_size.toString().isNumber()
) {
error "`--min_cluster_size` must be a float or integer."
}

pipeline(samples)
pipeline.out.fastq_ingress_results
| map { [it, "fastq_ingress_results"] }
| concat (
pipeline.out.report.concat(pipeline.out.workflow_params)
| map { [it, null] }
)
| output
}

if (params.disable_ping == false) {
Expand Down
61 changes: 0 additions & 61 deletions test/run_fastq_ingress_test.sh

This file was deleted.

Loading

0 comments on commit 7afb2ea

Please sign in to comment.