diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 8d9be513a4e..f4fb56e55f5 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -81,6 +81,10 @@ jobs: tags: merquryfk/merquryfk - profile: "conda" tags: merquryfk/ploidyplot + - profile: "conda" + tags: universc + - profile: "singularity" + tags: universc - profile: "conda" tags: subworkflows/vcf_annotate_ensemblvep env: diff --git a/modules/nf-core/universc/CITATION.cff b/modules/nf-core/universc/CITATION.cff new file mode 100644 index 00000000000..b00957d151b --- /dev/null +++ b/modules/nf-core/universc/CITATION.cff @@ -0,0 +1,51 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - given-names: "S. Thomas" + family-names: "Kelly" + email: "tom.kelly@riken.jp" + affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "https://orcid.org/0000-0003-3904-6690" + - family-names: "Battenberg" + given-names: "Kai" + email: "kai.battenberg@riken.jp" + affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0001-7517-2657" +version: 1.2.5.1 +doi: 10.1101/2021.01.19.427209 +date-released: 2021-02-14 +url: "https://github.com/minoda-lab/universc" +preferred-citation: + type: article + authors: + - given-names: "S. Thomas" + family-names: "Kelly" + email: "tom.kelly@riken.jp" + affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "https://orcid.org/0000-0003-3904-6690" + - family-names: "Battenberg" + given-names: "Kai" + email: "kai.battenberg@riken.jp" + affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "https://orcid.org/http://orcid.org/0000-0001-7517-2657" + - family-names: "Hetherington" + given-names: "Nicola A." + affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0001-8802-2906" + - family-names: "Hayashi" + given-names: "Makoto" + affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0001-6389-4265" + - given-names: "Aki" + family-names: "Minoda" + email: "akiko.minoda@riken.jp" + affiliation: Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0002-2927-5791" + doi: "10.1101/2021.01.19.427209" + title: "UniverSC: a flexible cross-platform single-cell data processing pipeline" + year: "2021" + journal: "bioRxiv" + start: 2021.01.19.427209 + volume: + issue: + month: 1 diff --git a/modules/nf-core/universc/CITATION.md b/modules/nf-core/universc/CITATION.md new file mode 100644 index 00000000000..4f420bb8f00 --- /dev/null +++ b/modules/nf-core/universc/CITATION.md @@ -0,0 +1,37 @@ +### Citation + +A submission to a journal and biorXiv is in progress. Please cite these when +they are available. Currently, the package can be cited +as follows: + +Kelly, S.T., Battenberg, Hetherington, N.A., K., Hayashi, K., and Minoda, A. (2021) +UniverSC: a flexible cross-platform single-cell data processing pipeline. +bioRxiv 2021.01.19.427209; doi: [https://doi.org/10.1101/2021.01.19.427209](https://doi.org/10.1101/2021.01.19.427209) +package version 1.2.5.1. [https://github.com/minoda-lab/universc](https://github.com/minoda-lab/universc) + +``` +@article {Kelly2021.01.19.427209, + author = {Kelly, S. Thomas and Battenberg, Kai and Hetherington, Nicola A. and Hayashi, Makoto and Minoda, Aki}, + title = {{UniverSC}: a flexible cross-platform single-cell data processing pipeline}, + elocation-id = {2021.01.19.427209}, + year = {2021}, + doi = {10.1101/2021.01.19.427209}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {Single-cell RNA-sequencing analysis to quantify RNA molecules in individual cells has become popular owing to the large amount of information one can obtain from each experiment. We have developed UniverSC (https://github.com/minoda-lab/universc), a universal single-cell processing tool that supports any UMI-based platform. Our command-line tool enables consistent and comprehensive integration, comparison, and evaluation across data generated from a wide range of platforms.Competing Interest StatementThe authors have declared no competing interest.}, + eprint = {https://www.biorxiv.org/content/early/2021/01/19/2021.01.19.427209.full.pdf}, + journal = {{bioRxiv}}, + note = {package version 1.2.5.1}, + URL = {https://github.com/minoda-lab/universc}, +} + +``` + +``` +@Manual{, + title = {{UniverSC}: a flexible cross-platform single-cell data processing pipeline}, + author = {S. Thomas Kelly, Kai Battenberg, Nicola A. Hetherington, Makoto Hayashi, and Aki Minoda}, + year = {2021}, + note = {package version 1.2.5.1}, + url = {https://github.com/minoda-lab/universc}, + } +``` diff --git a/modules/nf-core/universc/README.md b/modules/nf-core/universc/README.md new file mode 100644 index 00000000000..8b6f61446c0 --- /dev/null +++ b/modules/nf-core/universc/README.md @@ -0,0 +1,116 @@ +# UniverSC + +## Single-cell processing across technologies + +UniverSC is an open-source single-cell pipeline that runs across platforms on various technologies. + +## Maintainers + +Tom Kelly (RIKEN, IMS) + +Kai Battenberg (RIKEN CSRS/IMS) + +Contact: .[at]riken.jp + +## Implementation + +This container runs Cell Ranger v3.0.2 installed from source on MIT License on GitHub with +modifications for compatibility with updated dependencies. All software is installed from +open-source repositories and available for reuse. + +It is _not_ subject to the 10X Genomics End User License Agreement (EULA). +This version allows running Cell Ranger v3.0.2 on data generated from any experimental platform +without restrictions. However, updating to newer versions on Cell Ranger subject to the +10X EULA is not possible without the agreement of 10X Genomics. + +To comply with licensing and respect 10X Genomics Trademarks, the 10X Genomics logo +has been removed from HTML reports, the tool has been renamed, and proprietary +closed-source tools to build Cloupe files are disabled. + +It is still suffient to generate summary reports and count matrices compatible with +single-cell analysis tools available for 10X Genomics and Cell Ranger output format +in Python and R packages. + +## Usage + +### Generating References + +The Cell Ranger modules can be used to generate reference indexes to run UniverSC. +Note that UniverSC requires the Open Source version v3.0.2 of Cell Ranger included +in the nf-core/universc Docker image. The same module parameters can be run provided +that the container is changed in process configurations (modify nextflow.config). + +``` +process { + +... + withName: CELLRANGER_MKGTF { + container = "nfcore/universc:1.2.5.1" + } + withName: CELLRANGER_MKREF{ + container = "nfcore/universc:1.2.5.1" + } +... +} +``` + +This will generate a compatible index for UniverSC using the same version of the +STAR aligner and a permissive software license without and EULA. + +### Container settings + +The cellranger install directory must have write permissions to run UniverSC. +To run in docker or podman use the `--user root` option in container parameters +and for singularity use the `--writeable` parameter. + +These are set as default in universc/main.nf: + +``` + container "nfcore/universc:1.2.5.1" + if (workflow.containerEngine == 'docker'){ + containerOptions = "--privileged" + } + if (workflow.containerEngine == 'podman'){ + containerOptions = "--runtime /usr/bin/crun --userns=keep-id --user root --systemd=always" + } + if (workflow.containerEngine == 'singularity'){ + containerOptions = "--writable" + } +``` + +Select the container engine with `nextflow --profile "docker"` or set the environment variable +as one of the following before running nextflow. + +``` +export PROFILE="docker" +export PROFILE="podman" +export PROFILE="singularity" +``` + +Note that due to dependencies installed in a docker image, it is not possible to use conda environments. + +## Disclaimer + +We are third party developers not affiliated with 10X Genomics or any other vendor of +single-cell technologies. We are releasing this code on an open-source license which calls Cell Ranger +as an external dependency. + +## Licensing + +This package is provided open-source on a GPL-3 license. This means that you are free to use and +modify this code provided that they also contain this license. + +## Updating the package + +The tomkellygenetics/universc: container is automatically updated with tomkellygenetics/universc:latest. + +A stable release is mirrored at nfcore/universc:1.2.5.1 and will be updated as needed. + +To build an updated container use the Dockerfile provided here: + +[https://github.com/minoda-lab/universc/blob/master/Dockerfile](https://github.com/minoda-lab/universc/blob/master/Dockerfile) + +Note that this uses a custom base image which is built with an open-source implementation of +Cell Ranger v3.0.2 on MIT License and relies of Python 2. The build file can be found here: + +[https://github.com/TomKellyGenetics/cellranger_clean/blob/master/Dockerfile](https://github.com/TomKellyGenetics/cellranger_clean/blob/master/Dockerfile) diff --git a/modules/nf-core/universc/main.nf b/modules/nf-core/universc/main.nf new file mode 100644 index 00000000000..a23cb05b2b7 --- /dev/null +++ b/modules/nf-core/universc/main.nf @@ -0,0 +1,76 @@ +process UNIVERSC { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "UNIVERSC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + container "nfcore/universc:1.2.5.1" + if (workflow.containerEngine == 'docker'){ + containerOptions = "--privileged" + } + if ( workflow.containerEngine == 'podman'){ + containerOptions = "--runtime crun --userns=keep-id --systemd=always" + } + if (workflow.containerEngine == 'singularity'){ + containerOptions = "-B /var/tmp --writable-tmpfs" + params.singularity_autoMounts = true + } + + input: + tuple val(meta), path(reads) + path reference + + + output: + tuple val(meta), path("sample-${meta.id}/outs/*"), emit: outs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def sample_arg = meta.samples.unique().join(",") + def reference_name = reference.name + def input_reads = meta.single_end ? "--file $reads" : "-R1 ${reads[0]} -R2 ${reads[1]}" + """ + universc \\ + --id 'sample-${meta.id}' \\ + ${input_reads} \\ + --technology '${meta.technology}' \\ + --chemistry '${meta.chemistry}' \\ + --reference ${reference_name} \\ + --description ${sample_arg} \\ + --jobmode "local" \\ + --localcores ${task.cpus} \\ + --localmem ${task.memory.toGiga()} \\ + --per-cell-data \\ + $args 1> _log 2> _err + + # save log files + echo !! > sample-${meta.id}/outs/_invocation + cp _log sample-${meta.id}/outs/_log + cp _err sample-${meta.id}/outs/_err + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$(cellranger count --version 2>&1 | head -n 2 | tail -n 1 | sed 's/^.* //g' | sed 's/(//g' | sed 's/)//g' )) + universc: \$(echo \$(bash /universc/launch_universc.sh --version | grep version | grep universc | sed 's/^.* //g' )) + END_VERSIONS + """ + + + stub: + """ + mkdir -p "sample-${meta.id}/outs/" + touch sample-${meta.id}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$(cellranger count --version 2>&1 | head -n 2 | tail -n 1 | sed 's/^.* //g' | sed 's/(//g' | sed 's/)//g' )) + universc: \$(echo \$(bash /universc/launch_universc.sh --version | grep version | grep universc | sed 's/^.* //g' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/universc/meta.yml b/modules/nf-core/universc/meta.yml new file mode 100644 index 00000000000..7f5436fc031 --- /dev/null +++ b/modules/nf-core/universc/meta.yml @@ -0,0 +1,42 @@ +name: "universc" +description: Module to run UniverSC an open-source pipeline to demultiplex and process single-cell RNA-Seq data +keywords: + - demultiplex + - align + - single-cell + - scRNA-Seq + - count + - umi +tools: + - "universc": + description: "UniverSC: a flexible cross-platform single-cell data processing pipeline" + homepage: "https://hub.docker.com/r/tomkellygenetics/universc" + documentation: "https://raw.githubusercontent.com/minoda-lab/universc/master/man/launch_universc.sh" + tool_dev_url: "https://github.com/minoda-lab/universc" + doi: "https://doi.org/10.1101/2021.01.19.427209" + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ or FASTQ.GZ file, list of 2 files for paired-end data + pattern: "*.{fastq,fq,fastq.gz,fq.gz}" + +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger + pattern: "sample-${meta.id}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + +authors: + - "@kbattenb" + - "@tomkellygenetics" diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config index 849a17b8c41..d8477d8f2dc 100644 --- a/tests/config/nextflow.config +++ b/tests/config/nextflow.config @@ -19,6 +19,10 @@ if ("$PROFILE" == "singularity") { } else if ("$PROFILE" == "mamba") { conda.enabled = true conda.useMamba = true +} else if ("$PROFILE" == "podman") { + podman.enabled = true + podman.userEmulation = true + podman.runOptions = "--runtime crun --platform linux/x86_64 --systemd=always" } else { docker.enabled = true docker.userEmulation = true diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 82164876f20..88a0962902a 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3236,6 +3236,10 @@ unicycler: - modules/nf-core/unicycler/** - tests/modules/nf-core/unicycler/** +universc: + - modules/nf-core/universc/** + - tests/modules/nf-core/universc/** + untar: - modules/nf-core/untar/** - tests/modules/nf-core/untar/** diff --git a/tests/modules/nf-core/universc/main.nf b/tests/modules/nf-core/universc/main.nf new file mode 100644 index 00000000000..72020bbbcde --- /dev/null +++ b/tests/modules/nf-core/universc/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CELLRANGER_MKGTF } from '../../../../modules/nf-core/cellranger/mkgtf/main.nf' +include { CELLRANGER_MKREF } from '../../../../modules/nf-core/cellranger/mkref/main.nf' +include { UNIVERSC } from '../../../../modules/nf-core/universc//main.nf' + +workflow test_universc_10x { + + input = [ [ id:'123', technology:'10x', chemistry:'SC3Pv3', single_end:false, strandedness:'forward', samples: ["test_10x"] ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_10x_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_10x_2_fastq_gz'], checkIfExists: true) + ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + reference_name = "homo_sapiens_chr22_reference" + + CELLRANGER_MKGTF ( gtf ) + + CELLRANGER_MKREF ( + fasta, + CELLRANGER_MKGTF.out.gtf, + reference_name + ) + + UNIVERSC ( + input, + CELLRANGER_MKREF.out.reference + ) +} diff --git a/tests/modules/nf-core/universc/nextflow.config b/tests/modules/nf-core/universc/nextflow.config new file mode 100644 index 00000000000..98b8d475da4 --- /dev/null +++ b/tests/modules/nf-core/universc/nextflow.config @@ -0,0 +1,17 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UNIVERSC { + ext.args = '' + container = "nfcore/universc:1.2.5.1" + } + + withName: CELLRANGER_MKGTF { + container = "nfcore/universc:1.2.5.1" + } + withName: CELLRANGER_MKREF{ + container = "nfcore/universc:1.2.5.1" + } + +} diff --git a/tests/modules/nf-core/universc/test.yml b/tests/modules/nf-core/universc/test.yml new file mode 100644 index 00000000000..a23430968fe --- /dev/null +++ b/tests/modules/nf-core/universc/test.yml @@ -0,0 +1,43 @@ +- name: universc test_universc_10x + command: nextflow run ./tests/modules/nf-core/universc -entry test_universc_10x -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/universc/nextflow.config + tags: + - universc + files: + - path: output/cellranger/genome.filtered.gtf + md5sum: 9dc1a510bbe4438cfc2a74423bf2b2bd + - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa + md5sum: f315020d899597c1b57e5fe9f60f4c3e + - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: output/cellranger/homo_sapiens_chr22_reference/reference.json + md5sum: 1f7bf05454cc908bf70cd232ae70b02d + - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome + md5sum: 22102926fadf5890e905ca71b2da3f35 + - path: output/cellranger/homo_sapiens_chr22_reference/star/SA + md5sum: bcf3e1a855783105150b46c905465333 + - path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex + md5sum: b93fb07d342e6c32a00ebc4311c0ad38 + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt + md5sum: c81f40f27e72606d7d07097c1d56a5b5 + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt + md5sum: 5ae68a67b70976ee95342a7451cb5af1 + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt + md5sum: b190587cae0531f3cf25552d8aa674db + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt + md5sum: bc73df776dd3d5bb9cfcbcba60880519 + - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab + md5sum: d04497f69d6ef889efd4d34fe63edcc4 + - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab + md5sum: 0d560290fab688b7268d88d5494bf9fe + - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab + md5sum: a20c70b081f5d83649c48ebbd951cb77 + - path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt + contains: ["genomeGenerate"] + - path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab + md5sum: 6fa11b4d34f4680a1c23dbcea2e050d5 + - path: output/universc/sample-123/outs/basic_stats.txt + md5sum: 6ce8341506150f0b1add1800b0a11cdd + - path: output/universc/sample-123/outs/metrics_summary.csv + md5sum: edfa1a0dc666c38f9740167c045fb3c8 + - path: output/universc/sample-123/outs/web_summary.html + contains: [" sample-123"]