diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32fb5b8..fb18a85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,3 +44,32 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + test_fasta: + name: Run pipeline with test data with fasta files in samplesheet + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/reportho') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_fasta,docker --outdir ./results diff --git a/README.md b/README.md index f363652..63e8fac 100644 --- a/README.md +++ b/README.md @@ -44,21 +44,20 @@ Steps that follow can be skipped with `--skip_downstream` in batch analysis. First, prepare a samplesheet with your input data that looks as follows: -`samplesheet.csv`: - -```csv -id,query +```csv title="samplesheet_fasta.csv" +id,fasta BicD2,data/bicd2.fasta ``` -or: +or if you know the UniProt ID of the protein you can provide it directly: -```csv +```csv title="samplesheet.csv" id,query BicD2,Q8TD16 ``` -If using the latter format, you must set `--uniprot_query` to true. +> [!NOTE] +> If you provide both a FASTA file and a UniProt ID only the latter will be used. Now, you can run the pipeline using: diff --git a/assets/samplesheet_fasta.csv b/assets/samplesheet_fasta.csv new file mode 100644 index 0000000..9cdb0c6 --- /dev/null +++ b/assets/samplesheet_fasta.csv @@ -0,0 +1,3 @@ +id,fasta +ste2,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste2.fa +ste3,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste3.fa diff --git a/assets/schema_input.json b/assets/schema_input.json index d80499c..55dd337 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,8 +17,22 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "A query must be provided" + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fa(sta)?$", + "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" } + } + }, + "anyOf": [ + { + "required": ["id", "query"] }, - "required": ["id", "query"] - } + { + "required": ["id", "fasta"] + } + ] } diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py index 70f719b..c30a084 100755 --- a/bin/fetch_oma_by_sequence.py +++ b/bin/fetch_oma_by_sequence.py @@ -30,9 +30,9 @@ def main() -> None: # Find the main isoform for it in json["targets"]: - if it["is_main_isoform"]: - entry = it - break + if it["is_main_isoform"]: + entry = it + break # Write exact match status if json["identified_by"] == "exact match": diff --git a/conf/test.config b/conf/test.config index 2a67104..7de21c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv' // Other parameters - uniprot_query = true skip_eggnog = true min_score = 3 skip_iqtree = true diff --git a/conf/test_fasta.config b/conf/test_fasta.config new file mode 100644 index 0000000..e9b009f --- /dev/null +++ b/conf/test_fasta.config @@ -0,0 +1,31 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/reportho -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv' + + // Other parameters + skip_eggnog = true + min_score = 3 + skip_iqtree = true + fastme_bootstrap = 0 +} + diff --git a/conf/test_full.config b/conf/test_full.config index 2f59347..68c6bb4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,7 +18,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv' // Other parameters - uniprot_query = true eggnog_path = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz' eggnog_idmap_path = "http://eggnog5.embl.de/download/eggnog_5.0/id_mappings/uniprot/latest.Eukaryota.tsv.gz" min_score = 3 diff --git a/docs/usage.md b/docs/usage.md index 33eaba7..1b1ce30 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,28 +18,29 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the tables below. -A final samplesheet file may look something like the one below, with `--uniprot_query` enabled: +A final samplesheet file may look something like the one below: ```csv title="samplesheet.csv" id,query BicD2,Q8TD16 ``` -or the one below, otherwise: +or the one below, if you provide the sequence of the protein in FASTA format: ```csv title="samplesheet.csv" -id,query +id,fasta BicD2,/home/myuser/data/bicd2.fa ``` -| Column | Description | -| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `id` | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces. | -| `query` | The query of the user-specified type. If `--uniprot_query` is `true`, it should be a valid Uniprot accession. Otherwise, it should be a valid path to a FASTA file. | +| Column | Description | +| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `id` | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces. | +| `query` | The query of the user-specified type. It should be a valid Uniprot accession. | +| `fasta` | It should be a valid path to a FASTA file. | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +An [example Uniprot samplesheet](../assets/samplesheet.csv) and [example FASTA samplesheet](../assets/samplesheet_fasta.csv) has been provided with the pipeline. ## Running the pipeline diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 0000000..e69de29 diff --git a/main.nf b/main.nf index a7e69c2..cb1dfd0 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,8 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_repo workflow NFCORE_REPORTHO { take: - samplesheet // channel: samplesheet read in from --input + samplesheet_query // channel: samplesheet read in from --input with query + samplesheet_fasta // channel: samplesheet read in from --input with fasta main: @@ -41,7 +42,8 @@ workflow NFCORE_REPORTHO { // WORKFLOW: Run pipeline // REPORTHO ( - samplesheet + samplesheet_query, + samplesheet_fasta, ) // emit: @@ -75,7 +77,8 @@ workflow { // WORKFLOW: Run main workflow // NFCORE_REPORTHO ( - PIPELINE_INITIALISATION.out.samplesheet + PIPELINE_INITIALISATION.out.samplesheet_query, + PIPELINE_INITIALISATION.out.samplesheet_fasta, ) // diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf index de9747b..2b4712d 100644 --- a/modules/local/dump_params.nf +++ b/modules/local/dump_params.nf @@ -8,7 +8,6 @@ process DUMP_PARAMS { input: tuple val(meta), path(exact) - val uniprot_query val use_structures val use_centroid val min_score @@ -26,7 +25,6 @@ process DUMP_PARAMS { """ cat <<- END_PARAMS > params.yml id: ${meta.id} - uniprot_query: ${uniprot_query} exact_match: \$(cat $exact) use_structures: ${use_structures} use_centroid: ${use_centroid} diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf index eec8581..b95be8f 100644 --- a/modules/local/fetch_sequences_online.nf +++ b/modules/local/fetch_sequences_online.nf @@ -20,8 +20,8 @@ process FETCH_SEQUENCES_ONLINE { task.ext.when == null || task.ext.when script: - prefix = task.ext.prefix ?: meta.id - add_query = params.uniprot_query ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa" + def prefix = task.ext.prefix ?: meta.id + def add_query = query_fasta == [] ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa" """ fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa $add_query diff --git a/nextflow.config b/nextflow.config index 8ed611c..805568f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,6 @@ params { // Input options input = null - uniprot_query = false // MultiQC options multiqc_config = null @@ -200,8 +199,9 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_fasta { includeConfig 'conf/test_fasta.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -214,7 +214,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index cc60f32..4c22f00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -23,12 +23,6 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/reportho/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, - "uniprot_query": { - "type": "boolean", - "description": "The input contains a Uniprot ID as query.", - "help_text": "If the input file contains a Uniprot ID as query, set this parameter to `true`.", - "fa_icon": "fas fa-database" - }, "outdir": { "type": "string", "format": "directory-path", diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 46c78b4..2459c65 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -25,9 +25,7 @@ workflow ALIGN { ch_for_filter ) - ch_versions - .mix(FILTER_FASTA.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FILTER_FASTA.out.versions) CREATE_TCOFFEETEMPLATE( ch_pdb @@ -52,9 +50,8 @@ workflow ALIGN { TCOFFEE_3DALIGN.out.alignment .set { ch_alignment } - ch_versions - .mix(TCOFFEE_3DALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(TCOFFEE_3DALIGN.out.versions) + } else { TCOFFEE_ALIGN ( @@ -67,9 +64,7 @@ workflow ALIGN { TCOFFEE_ALIGN.out.alignment .set { ch_alignment } - ch_versions - .mix(TCOFFEE_ALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions) } emit: diff --git a/subworkflows/local/fetch_sequences.nf b/subworkflows/local/fetch_sequences.nf index bb03048..0c441dd 100644 --- a/subworkflows/local/fetch_sequences.nf +++ b/subworkflows/local/fetch_sequences.nf @@ -2,12 +2,14 @@ include { FETCH_SEQUENCES_ONLINE } from "../../modules/local/fetch_sequences_onl workflow FETCH_SEQUENCES { take: - ch_idlist - ch_query_fasta + ch_id_list + ch_query main: + ch_id_list + .join(ch_query) + .set { ch_input } - ch_input = params.uniprot_query ? ch_idlist.map { it -> [it[0], it[1], []]} : ch_idlist.join(ch_query_fasta) FETCH_SEQUENCES_ONLINE ( ch_input ) diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 287c112..6634aaf 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -21,44 +21,31 @@ include { CSVTK_CONCAT as MERGE_STATS } from "../../modules/nf-core/csvtk/conca workflow GET_ORTHOLOGS { take: - ch_samplesheet + ch_samplesheet_query + ch_samplesheet_fasta main: - ch_versions = Channel.empty() - ch_queryid = params.uniprot_query ? ch_samplesheet.map { it[1] } : ch_samplesheet.map { it[0].id } ch_orthogroups = Channel.empty() // Preprocessing - find the ID and taxid of the query sequences + ch_samplesheet_fasta + .map { it -> [it[0], file(it[1])] } + .set { ch_fasta } - if (!params.uniprot_query) { - ch_samplesheet - .map { it -> [it[0], file(it[1])] } - .set { ch_inputfile } - - - IDENTIFY_SEQ_ONLINE ( - ch_inputfile - ) + IDENTIFY_SEQ_ONLINE ( + ch_fasta + ) - IDENTIFY_SEQ_ONLINE.out.seqinfo - .set { ch_query } + ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo + ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions) - ch_versions - .mix(IDENTIFY_SEQ_ONLINE.out.versions) - .set { ch_versions } - } else { - WRITE_SEQINFO ( - ch_samplesheet - ) - - WRITE_SEQINFO.out.seqinfo - .set { ch_query } + WRITE_SEQINFO ( + ch_samplesheet_query + ) - ch_versions - .mix(WRITE_SEQINFO.out.versions) - .set { ch_versions } - } + ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo.mix(WRITE_SEQINFO.out.seqinfo) + ch_versions = ch_versions.mix(WRITE_SEQINFO.out.versions) // Ortholog fetching @@ -77,10 +64,9 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_LOCAL.out.versions) - .set { ch_versions } - } else { + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions) + } + else { FETCH_OMA_GROUP_ONLINE ( ch_query ) @@ -89,9 +75,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) } // Panther if (params.local_databases) { @@ -104,9 +88,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) } else { FETCH_PANTHER_GROUP_ONLINE ( ch_query @@ -116,9 +98,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) } // OrthoInspector FETCH_INSPECTOR_GROUP_ONLINE ( @@ -130,9 +110,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) FETCH_EGGNOG_GROUP_LOCAL ( ch_query, @@ -144,9 +122,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) } else { // online/local separation is used // local only @@ -164,9 +140,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions) } if (!params.skip_panther) { @@ -179,9 +153,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) } if(!params.skip_eggnog) { @@ -195,10 +167,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) - .set { ch_versions } - + ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) } } else { // online only @@ -211,10 +180,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_ONLINE.out.versions) - .set { ch_versions } - + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) } if (!params.skip_panther) { FETCH_PANTHER_GROUP_ONLINE ( @@ -225,9 +191,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) } if (!params.skip_orthoinspector) { FETCH_INSPECTOR_GROUP_ONLINE ( @@ -239,9 +203,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) } } } @@ -252,9 +214,7 @@ workflow GET_ORTHOLOGS { ch_orthogroups.groupTuple() ) - ch_versions - .mix(MERGE_CSV.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_CSV.out.versions) // Scoring and filtering @@ -262,9 +222,7 @@ workflow GET_ORTHOLOGS { MERGE_CSV.out.csv ) - ch_versions - .mix(MAKE_SCORE_TABLE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_SCORE_TABLE.out.versions) ch_forfilter = MAKE_SCORE_TABLE.out.score_table .combine(ch_query, by: 0) @@ -276,9 +234,7 @@ workflow GET_ORTHOLOGS { params.min_score ) - ch_versions - .mix(FILTER_HITS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FILTER_HITS.out.versions) // Plotting @@ -295,9 +251,7 @@ workflow GET_ORTHOLOGS { ch_vennplot = PLOT_ORTHOLOGS.out.venn ch_jaccardplot = PLOT_ORTHOLOGS.out.jaccard - ch_versions - .mix(PLOT_ORTHOLOGS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_ORTHOLOGS.out.versions) } // Hits @@ -306,9 +260,7 @@ workflow GET_ORTHOLOGS { MERGE_CSV.out.csv ) - ch_versions - .mix(MAKE_HITS_TABLE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_HITS_TABLE.out.versions) ch_hits = MAKE_HITS_TABLE.out.hits_table .collect { it[1] } @@ -320,9 +272,7 @@ workflow GET_ORTHOLOGS { "csv" ) - ch_versions - .mix(MERGE_HITS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_HITS.out.versions) // Stats @@ -330,17 +280,13 @@ workflow GET_ORTHOLOGS { MAKE_SCORE_TABLE.out.score_table ) - ch_versions - .mix(MAKE_STATS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_STATS.out.versions) STATS2CSV( MAKE_STATS.out.stats ) - ch_versions - .mix(STATS2CSV.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(STATS2CSV.out.versions) ch_stats = STATS2CSV.out.csv .collect { it[1] } @@ -352,9 +298,7 @@ workflow GET_ORTHOLOGS { "csv" ) - ch_versions - .mix(MERGE_STATS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_STATS.out.versions) ch_versions .collectFile(name: "get_orthologs_versions.yml", sort: true, newLine: true) diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf index b4743a0..6f60967 100644 --- a/subworkflows/local/make_trees.nf +++ b/subworkflows/local/make_trees.nf @@ -24,9 +24,7 @@ workflow MAKE_TREES { ch_mltree = IQTREE.out.phylogeny - ch_versions - .mix(IQTREE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(IQTREE.out.versions) ch_mlplot = ch_alignment.map { [it[0], []] } @@ -38,9 +36,7 @@ workflow MAKE_TREES { ch_mlplot = PLOT_IQTREE.out.plot - ch_versions - .mix(PLOT_IQTREE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_IQTREE.out.versions) } } @@ -50,9 +46,7 @@ workflow MAKE_TREES { ch_alignment ) - ch_versions - .mix(CONVERT_PHYLIP.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(CONVERT_PHYLIP.out.versions) FASTME ( CONVERT_PHYLIP.out.phylip.map { [it[0], it[1], []] } @@ -60,9 +54,7 @@ workflow MAKE_TREES { ch_metree = FASTME.out.nwk - ch_versions - .mix(FASTME.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FASTME.out.versions) ch_meplot = ch_alignment.map { [it[0], []] } @@ -74,9 +66,7 @@ workflow MAKE_TREES { ch_meplot = PLOT_FASTME.out.plot - ch_versions - .mix(PLOT_FASTME.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_FASTME.out.versions) } } diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf index 47e061d..b3c60ed 100644 --- a/subworkflows/local/report.nf +++ b/subworkflows/local/report.nf @@ -5,7 +5,6 @@ include { CONVERT_FASTA } from "../../modules/local/convert_fasta" workflow REPORT { take: - uniprot_query use_structures use_centroid min_score @@ -52,7 +51,6 @@ workflow REPORT { DUMP_PARAMS( ch_seqinfo.map { [it[0], it[3]] }, - params.uniprot_query, params.use_structures, params.use_centroid, params.min_score, @@ -66,9 +64,7 @@ workflow REPORT { ch_fasta = CONVERT_FASTA.out.fasta - ch_versions - .mix(CONVERT_FASTA.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(CONVERT_FASTA.out.versions) } ch_forreport = ch_seqinfo @@ -91,9 +87,7 @@ workflow REPORT { ch_forreport ) - ch_versions - .mix(MAKE_REPORT.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_REPORT.out.versions) emit: versions = ch_versions diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf index 877995f..44dc7eb 100644 --- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf @@ -74,19 +74,23 @@ workflow PIPELINE_INITIALISATION { ) // - // Create channel from input file provided through params.input + // Create channel from input file provided through params.input and check for query // Channel .fromSamplesheet("input") - .map { - id, query -> - [ id, query ] + .branch { + id, query, fasta -> + query: query != [] + return [ id, query ] + fasta: query == [] + return [ id, fasta ] } .set { ch_samplesheet } emit: - samplesheet = ch_samplesheet - versions = ch_versions + samplesheet_query = ch_samplesheet.query + samplesheet_fasta = ch_samplesheet.fasta + versions = ch_versions } /* diff --git a/workflows/reportho.nf b/workflows/reportho.nf index 098aea7..8ac73f5 100644 --- a/workflows/reportho.nf +++ b/workflows/reportho.nf @@ -27,22 +27,22 @@ include { REPORT } from '../subworkflows/local/report' workflow REPORTHO { take: - ch_samplesheet // channel: samplesheet read in from --input + ch_samplesheet_query // channel: samplesheet query + ch_samplesheet_fasta // channel: samplesheet fasta main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - - ch_query_fasta = params.uniprot_query ? ch_samplesheet.map { [it[0], []] } : ch_samplesheet.map { [it[0], file(it[1])] } + ch_fasta_query = ch_samplesheet_query.map { [it[0], []] }.mix(ch_samplesheet_fasta.map { [it[0], file(it[1])] }) GET_ORTHOLOGS ( - ch_samplesheet + ch_samplesheet_query, + ch_samplesheet_fasta ) - ch_versions - .mix(GET_ORTHOLOGS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(GET_ORTHOLOGS.out.versions) + ch_samplesheet = ch_samplesheet_query.mix (ch_samplesheet_fasta) ch_multiqc_files = ch_multiqc_files.mix(GET_ORTHOLOGS.out.aggregated_stats.map {it[1]}) ch_multiqc_files = ch_multiqc_files.mix(GET_ORTHOLOGS.out.aggregated_hits.map {it[1]}) @@ -58,16 +58,14 @@ workflow REPORTHO { if (!params.skip_downstream) { FETCH_SEQUENCES ( GET_ORTHOLOGS.out.orthologs, - ch_query_fasta + ch_fasta_query ) ch_seqhits = FETCH_SEQUENCES.out.hits ch_seqmisses = FETCH_SEQUENCES.out.misses - ch_versions - .mix(FETCH_SEQUENCES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_SEQUENCES.out.versions) if (params.use_structures) { FETCH_STRUCTURES ( @@ -78,9 +76,7 @@ workflow REPORTHO { ch_strmisses = FETCH_STRUCTURES.out.misses - ch_versions - .mix(FETCH_STRUCTURES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_STRUCTURES.out.versions) } ch_structures = params.use_structures ? FETCH_STRUCTURES.out.structures : Channel.empty() @@ -92,9 +88,7 @@ workflow REPORTHO { ch_alignment = ALIGN.out.alignment - ch_versions - .mix(ALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(ALIGN.out.versions) MAKE_TREES ( ALIGN.out.alignment @@ -103,14 +97,11 @@ workflow REPORTHO { ch_iqtree = MAKE_TREES.out.mlplot ch_fastme = MAKE_TREES.out.meplot - ch_versions - .mix(MAKE_TREES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_TREES.out.versions) } if(!params.skip_report) { REPORT ( - params.uniprot_query, params.use_structures, params.use_centroid, params.min_score, @@ -133,9 +124,7 @@ workflow REPORTHO { ch_fastme ) - ch_versions - .mix(REPORT.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(REPORT.out.versions) } //