diff --git a/CHANGELOG.md b/CHANGELOG.md index b42791db..a9d9a7e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.6.0dev - [date] -- [[#91](https://github.com/nf-core/scrnaseq/issues/91)] - Change from pytests to nf-test +- Change from pytests to nf-test ([#291](https://github.com/nf-core/scrnaseq/pull/291)) +- Update template to v2.13.1 ([#309](https://github.com/nf-core/scrnaseq/pull/309)) +- Update to kallisto|bustools v0.28.2 ([#294](https://github.com/nf-core/scrnaseq/pull/294)) ## v2.5.1 diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf deleted file mode 100644 index 9fd29e0a..00000000 --- a/modules/local/gene_map.nf +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Reformat design file and check validity - */ -process GENE_MAP { - tag "$gtf" - label 'process_low' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path gtf - - output: - path "transcripts_to_genes.txt" , emit: gene_map - - when: - task.ext.when == null || task.ext.when - - script: - if("${gtf}".endsWith('.gz')){ - name = "${gtf.baseName}" - unzip = "gunzip -f ${gtf}" - } else { - unzip = "" - name = "${gtf}" - } - """ - $unzip - cat $name | t2g.py --use_version > transcripts_to_genes.txt - """ -} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 84d98608..c991b695 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -27,7 +27,7 @@ process MTX_TO_H5AD { if (params.aligner == 'kallisto') { mtx_matrix = "*count/counts_unfiltered/*.mtx" barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt" - features_tsv = "*count/counts_unfiltered/*.genes.txt" + features_tsv = "*count/counts_unfiltered/*.genes.names.txt" } else if (params.aligner == 'alevin') { mtx_matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" barcodes_tsv = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" @@ -54,13 +54,13 @@ process MTX_TO_H5AD { else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in nascent ambiguous mature ; do mtx_to_h5ad.py \\ --aligner ${params.aligner} \\ --sample ${meta.id} \\ - --input *count/counts_unfiltered/\${input_type}.mtx \\ - --barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - --feature *count/counts_unfiltered/\${input_type}.genes.txt \\ + --input *count/counts_unfiltered/cells_x_genes.\${input_type}.mtx \\ + --barcode $barcodes_tsv \\ + --feature $features_tsv \\ --txp2gene ${txp2gene} \\ --star_index ${star_index} \\ --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index d83575a4..82ee63cd 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -26,7 +26,7 @@ process MTX_TO_SEURAT { } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" - features = "*count/counts_unfiltered/*.genes.txt" + features = "*count/counts_unfiltered/*.genes.names.txt" } else if (params.aligner == "alevin") { matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" @@ -43,11 +43,11 @@ process MTX_TO_SEURAT { if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in nascent ambiguous mature ; do mtx_to_seurat.R \\ - *count/counts_unfiltered/\${input_type}.mtx \\ - *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - *count/counts_unfiltered/\${input_type}.genes.txt \\ + *count/counts_unfiltered/cells_x_genes.\${input_type}.mtx \\ + $barcodes \\ + $features \\ ${meta.id}/${meta.id}_\${input_type}_matrix.rds \\ ${aligner} done diff --git a/nextflow.config b/nextflow.config index 3fe5c47f..54bd4ab3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,17 +19,18 @@ params { // reference files genome = null transcript_fasta = null + txp2gene = null // salmon alevin parameters (simpleaf) simpleaf_rlen = 91 barcode_whitelist = null - txp2gene = null salmon_index = null - // kallist bustools parameters - kallisto_gene_map = null + // kallisto bustools parameters kallisto_index = null kb_workflow = "standard" + kb_t1c = null + kb_t2c = null // STARsolo parameters star_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 649b9b18..23f6e9b5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -214,26 +214,35 @@ "type": "object", "description": "Params related to Kallisto/BUS tool", "default": "", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-rainbow", "properties": { - "kallisto_gene_map": { - "type": "string", - "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish" - }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t1c": { + "type": "string", + "description": "Specify a path to the cDNA transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t2c": { + "type": "string", + "description": "Specify a path to the intron transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", "format": "file-path", "exists": true }, "kb_workflow": { "type": "string", "default": "standard", - "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", - "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + "description": "Type of workflow. Use `nac` for an index type that can quantify nascent and mature RNA. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. (default: standard)", + "fa_icon": "fas fa-rainbow", + "enum": ["standard", "lamanno", "nac"] } } }, diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3210e47a..b6549094 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,5 +1,4 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { GENE_MAP } from '../../modules/local/gene_map' include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/kallistobustools/count/main' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ @@ -14,6 +13,8 @@ workflow KALLISTO_BUSTOOLS { gtf kallisto_index txp2gene + t1c + t2c protocol kb_workflow ch_fastq @@ -21,26 +22,13 @@ workflow KALLISTO_BUSTOOLS { main: ch_versions = Channel.empty() - assert kallisto_index || (genome_fasta && gtf): + assert (txp2gene && kallisto_index) || (genome_fasta && gtf): "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!" - assert txp2gene || gtf: - "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!" - - /* - * Generate Kallisto Gene Map if not supplied and index is given - * If no index is given, the gene map will be generated in the 'kb ref' step - */ - if (!txp2gene && kallisto_index) { - GENE_MAP( gtf ) - txp2gene = GENE_MAP.out.gene_map - ch_versions = ch_versions.mix(GENE_MAP.out.versions) - } - /* - * Generate kallisto index + * Generate kallisto index and t2g if not already present */ - if (!kallisto_index) { + if (!(txp2gene && kallisto_index)) { KALLISTOBUSTOOLS_REF( genome_fasta, gtf, kb_workflow ) txp2gene = KALLISTOBUSTOOLS_REF.out.t2g.collect() kallisto_index = KALLISTOBUSTOOLS_REF.out.index.collect() @@ -58,7 +46,8 @@ workflow KALLISTO_BUSTOOLS { txp2gene, t1c, t2c, - protocol + protocol, + kb_workflow ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) @@ -66,7 +55,7 @@ workflow KALLISTO_BUSTOOLS { emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.count - txp2gene = txp2gene.collect() + txp2gene } diff --git a/tests/.nf-test.log b/tests/.nf-test.log new file mode 100644 index 00000000..8251fc75 --- /dev/null +++ b/tests/.nf-test.log @@ -0,0 +1,21 @@ +Feb-27 21:54:09.971 [main] INFO com.askimed.nf.test.App - nf-test 0.8.4 +Feb-27 21:54:09.988 [main] INFO com.askimed.nf.test.App - Arguments: [test, tests/main_pipeline_kallisto.test, --update-snapshot] +Feb-27 21:54:10.670 [main] INFO com.askimed.nf.test.App - Nextflow Version: 23.10.1 +Feb-27 21:54:10.674 [main] WARN com.askimed.nf.test.commands.RunTestsCommand - No nf-test config file found. +Feb-27 21:54:10.674 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files. +Feb-27 21:54:10.676 [main] ERROR com.askimed.nf.test.commands.RunTestsCommand - Running tests failed. +java.lang.Exception: Test file '/home/ec2-user/scrnaseq/tests/tests/main_pipeline_kallisto.test' not found. + at com.askimed.nf.test.core.TestExecutionEngine.parse(TestExecutionEngine.java:116) + at com.askimed.nf.test.core.TestExecutionEngine.execute(TestExecutionEngine.java:159) + at com.askimed.nf.test.commands.RunTestsCommand.execute(RunTestsCommand.java:184) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:43) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:18) + at picocli.CommandLine.executeUserObject(CommandLine.java:1953) + at picocli.CommandLine.access$1300(CommandLine.java:145) + at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) + at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) + at picocli.CommandLine.execute(CommandLine.java:2078) + at com.askimed.nf.test.App.run(App.java:44) + at com.askimed.nf.test.App.main(App.java:51) diff --git a/tests/main_pipeline_kallisto.test.snap b/tests/main_pipeline_kallisto.test.snap index e57b77af..1eb15749 100644 --- a/tests/main_pipeline_kallisto.test.snap +++ b/tests/main_pipeline_kallisto.test.snap @@ -20,15 +20,15 @@ "name": "workflow", "success": true }, - "cells_x_genes.barcodes.txt:md5,18be561873e435d4587f6b3f95a0e301", + "cells_x_genes.barcodes.txt:md5,72d78bb1c1ee7cb174520b30f695aa48", "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", - "cells_x_genes.mtx:md5,37d2cd8c712f9c70463e87485bf6cd36", - "cells_x_genes.barcodes.txt:md5,488437e1f5477243697efb93366e5676", + "cells_x_genes.mtx:md5,894d60da192e3788de11fa8fc1fa711d", + "cells_x_genes.barcodes.txt:md5,a8cf7ea4b2d075296a94bf066a64b7a4", "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", - "cells_x_genes.mtx:md5,af90e05b404490f6cb133ab7f62949f8", - "Sample_X_matrix.rds:md5,f0e43f69403f4b2e7704065421592ad0", - "Sample_Y_matrix.rds:md5,61809156e64dbdaf254cbc1c3456588e" + "cells_x_genes.mtx:md5,abd83de117204d0a77df3c92d00cc025", + "Sample_X_matrix.rds:md5,0938f4189b7a7fd1030abfcee798741c", + "Sample_Y_matrix.rds:md5,93c12abe283ab37c5f37e5cd3cb25302" ], - "timestamp": "2024-01-23T12:19:47.921508953" + "timestamp": "2024-02-27T12:19:47.921508953" } } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index f2a0ba3f..64d75390 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -43,6 +43,12 @@ workflow SCRNASEQ { ch_barcode_whitelist = [] } + //kallisto params + ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] + kb_workflow = params.kb_workflow + kb_t1c = params.kb_t1c ? file(params.kb_t1c) : [] + kb_t2c = params.kb_t2c ? file(params.kb_t2c) : [] + // samplesheet - this is passed to the MTX conversion functions to add metadata to the // AnnData objects. ch_input = file(params.input) @@ -83,6 +89,8 @@ workflow SCRNASEQ { ch_filter_gtf, ch_kallisto_index, ch_txp2gene, + kb_t1c, + kb_t2c, protocol_config['protocol'], kb_workflow, ch_fastq