diff --git a/assets/NO_FILE_INTERSECT_COUNT b/assets/NO_FILE_INTERSECT_COUNT new file mode 100644 index 00000000..e69de29b diff --git a/modules/local/ancestry/ancestry_analysis.nf b/modules/local/ancestry/ancestry_analysis.nf index ab9ce799..8e0d3f64 100644 --- a/modules/local/ancestry/ancestry_analysis.nf +++ b/modules/local/ancestry/ancestry_analysis.nf @@ -21,7 +21,6 @@ process ANCESTRY_ANALYSIS { script: """ - # TODO: --ref_pcs is a horrible hack to select the first duplicate ancestry_analysis -d $meta.target_id \ -r reference \ --psam $ref_psam \ diff --git a/modules/local/ancestry/bootstrap/make_database.nf b/modules/local/ancestry/bootstrap/make_database.nf index fdc1e7f9..8b9e7209 100644 --- a/modules/local/ancestry/bootstrap/make_database.nf +++ b/modules/local/ancestry/bootstrap/make_database.nf @@ -3,7 +3,7 @@ process MAKE_DATABASE { label 'process_low' label 'zstd' // controls conda, docker, + singularity options - storeDir "${workDir.resolve()}/reference" + storeDir workDir / "reference" conda "${task.ext.conda}" diff --git a/modules/local/ancestry/extract_database.nf b/modules/local/ancestry/extract_database.nf index 5ae7184b..f20118cd 100644 --- a/modules/local/ancestry/extract_database.nf +++ b/modules/local/ancestry/extract_database.nf @@ -3,7 +3,7 @@ process EXTRACT_DATABASE { label 'process_low' label 'zstd' // controls conda, docker, + singularity options - storeDir "${workDir.resolve()}/ref_extracted/" + storeDir workDir / "ancestry" / "ref_extracted" conda "${task.ext.conda}" diff --git a/modules/local/ancestry/intersect_variants.nf b/modules/local/ancestry/intersect_variants.nf index 91dbb32d..d57f8fa8 100644 --- a/modules/local/ancestry/intersect_variants.nf +++ b/modules/local/ancestry/intersect_variants.nf @@ -4,8 +4,9 @@ process INTERSECT_VARIANTS { label 'zstd' // controls conda, docker, + singularity options tag "$meta.id chromosome $meta.chrom" - def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" ) - storeDir "${baseDir}/intersected/${params.target_build}/$meta.id/$meta.chrom" + + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "ancestry" / "intersected" conda "${task.ext.conda}" @@ -19,7 +20,7 @@ process INTERSECT_VARIANTS { path(ref_geno), path(ref_pheno), path(ref_variants) output: - tuple val(id), path("${meta.id}_${meta.chrom}_matched.txt.gz"), emit: intersection + tuple val(id), path("${output}.txt.gz"), emit: intersection path "intersect_counts_*.txt", emit: intersect_count path "versions.yml", emit: versions @@ -27,6 +28,7 @@ process INTERSECT_VARIANTS { def mem_mb = task.memory.toMega() // plink is greedy def file_format = meta.is_pfile ? 'pvar' : 'bim' id = meta.subMap('id', 'build', 'n_chrom', 'chrom') + output = "${meta.id}_${meta.chrom}_matched" """ intersect_variants.sh <(zstdcat $ref_variants) \ <(zstdcat $variants) \ @@ -37,7 +39,7 @@ process INTERSECT_VARIANTS { echo "ERROR: No variants in intersection" exit 1 else - mv matched_variants.txt ${meta.id}_${meta.chrom}_matched.txt + mv matched_variants.txt ${output}.txt gzip *_variants.txt *_matched.txt fi diff --git a/modules/local/ancestry/oadp/fraposa_pca.nf b/modules/local/ancestry/oadp/fraposa_pca.nf index 1c8381b6..00361137 100644 --- a/modules/local/ancestry/oadp/fraposa_pca.nf +++ b/modules/local/ancestry/oadp/fraposa_pca.nf @@ -5,8 +5,9 @@ process FRAPOSA_PCA { tag "reference" // permanently derive a PCA for each reference - sampleset combination - def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" ) - storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${ref_geno.baseName}/${targetmeta.id}/" + + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "ancestry" / "fraposa_pca" conda "${task.ext.conda}" @@ -20,9 +21,10 @@ process FRAPOSA_PCA { tuple val(targetmeta), path(target_geno) output: - path "*.{dat,pcs}", emit: pca + path "${output}*.{dat,pcs}", emit: pca path "versions.yml", emit: versions + output = "${params.target_build}_${meta.id}_${meta.chrom}" script: """ fraposa ${ref_geno.baseName} \ diff --git a/modules/local/ancestry/oadp/fraposa_project.nf b/modules/local/ancestry/oadp/fraposa_project.nf index b06aa39e..0d66b2ce 100644 --- a/modules/local/ancestry/oadp/fraposa_project.nf +++ b/modules/local/ancestry/oadp/fraposa_project.nf @@ -5,8 +5,8 @@ process FRAPOSA_PROJECT { tag "${target_geno.baseName.tokenize('_')[1]}" - def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" ) - storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${target_geno.baseName}/${split_fam.baseName}" + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "ancestry" / "fraposa" / "project" conda "${task.ext.conda}" @@ -21,12 +21,13 @@ process FRAPOSA_PROJECT { path(pca) output: - tuple val(oadp_meta), path("GRCh3?_${target_id}_*.pcs"), emit: pca + tuple val(oadp_meta), path("${output}.pcs"), emit: pca path "versions.yml", emit: versions script: target_id = target_geno.baseName.tokenize('_')[1] oadp_meta = ['target_id':target_id] + output = "${params.target_build}_${target_id}_${split_fam}" """ fraposa ${ref_geno.baseName} \ --method $params.projection_method \ diff --git a/modules/local/ancestry/oadp/intersect_thinned.nf b/modules/local/ancestry/oadp/intersect_thinned.nf index 29de52a4..9ca9694d 100644 --- a/modules/local/ancestry/oadp/intersect_thinned.nf +++ b/modules/local/ancestry/oadp/intersect_thinned.nf @@ -11,7 +11,8 @@ process INTERSECT_THINNED { label 'plink2' // controls conda, docker, + singularity options tag "$meta.id" - storeDir "${workDir.resolve()}/ancestry/thinned_intersection/${params.target_build}/${meta.id}" + + storeDir workDir / "ancestry" / "thinned_intersections" conda "${task.ext.conda}" @@ -24,10 +25,10 @@ process INTERSECT_THINNED { tuple val(meta), path(matched), path(pruned), val(geno_meta), path(genomes) output: - path("*_thinned.txt.gz"), emit: match_thinned - tuple val(geno_meta), path("*_extracted.pgen"), emit: geno - tuple val(geno_meta), path("*_extracted.pvar.gz"), emit: variants - tuple val(geno_meta), path("*_extracted.psam"), emit: pheno + path("${thin_output}.txt.gz"), emit: match_thinned + tuple val(geno_meta), path("${output}.pgen"), emit: geno + tuple val(geno_meta), path("${output}.pvar.gz"), emit: variants + tuple val(geno_meta), path("${output}.psam"), emit: pheno path "versions.yml" , emit: versions script: @@ -37,6 +38,8 @@ process INTERSECT_THINNED { // input options def input = (geno_meta.is_pfile) ? '--pfile' : '--bfile' + output = "${params.target_build}_${meta.id}_ALL_extracted" + thin_output = "${meta.id}_ALL_matched_thinned" """ # 1) intersect thinned variants -------------------------------------------- diff --git a/modules/local/ancestry/oadp/plink2_makebed.nf b/modules/local/ancestry/oadp/plink2_makebed.nf index 508f698c..66a641f8 100644 --- a/modules/local/ancestry/oadp/plink2_makebed.nf +++ b/modules/local/ancestry/oadp/plink2_makebed.nf @@ -5,7 +5,8 @@ process PLINK2_MAKEBED { label "plink2" // controls conda, docker, + singularity options tag "$meta.id chromosome" - storeDir "${workDir.resolve()}/ancestry/bed/${geno.baseName}/" + + storeDir workDir / "ancestry" / "bed" conda "${task.ext.conda}" @@ -19,10 +20,10 @@ process PLINK2_MAKEBED { tuple val(meta), path(geno), path(pheno), path(variants), path(pruned) output: - tuple val(meta), path("*.bed"), emit: geno - tuple val(meta), path("*.bim"), emit: variants - tuple val(meta), path("*.fam"), emit: pheno - tuple val(meta), path("splitfam*"), emit: splits, optional: true + tuple val(meta), path("${output}.bed"), emit: geno + tuple val(meta), path("${output}.bim"), emit: variants + tuple val(meta), path("${output}.fam"), emit: pheno + tuple val(meta), path("${split_output}*"), emit: splits, optional: true path "versions.yml" , emit: versions script: @@ -33,7 +34,8 @@ process PLINK2_MAKEBED { def extract = pruned.name != 'NO_FILE' ? "--extract $pruned" : '' def extracted = pruned.name != 'NO_FILE' ? "_extracted" : '' def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_" - + output = "${params.target_build}_${prefix}${meta.chrom}${extracted}" + split_output = "${meta.id}_splitfam" """ # use explicit flag because pfile prefix might be different plink2 \ @@ -45,11 +47,11 @@ process PLINK2_MAKEBED { --pvar $variants \ --make-bed \ $extract \ - --out ${params.target_build}_${prefix}${meta.chrom}${extracted} + --out ${output} if [ $meta.id != 'reference' ] then - split -l 50000 <(grep -v '#' $pheno) splitfam + split -l 50000 <(grep -v '#' $pheno) ${split_output} fi cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/ancestry/oadp/plink2_orient.nf b/modules/local/ancestry/oadp/plink2_orient.nf index 061381ac..75485259 100644 --- a/modules/local/ancestry/oadp/plink2_orient.nf +++ b/modules/local/ancestry/oadp/plink2_orient.nf @@ -5,7 +5,8 @@ process PLINK2_ORIENT { label "plink2" // controls conda, docker, + singularity options tag "$meta.id" - storeDir "${workDir.resolve()}/ancestry/oriented/${geno.baseName}/" + + storeDir = workDir / "ancestry" / "oriented" conda "${task.ext.conda}" @@ -19,9 +20,9 @@ process PLINK2_ORIENT { tuple val(meta), path(geno), path(pheno), path(variants), path(ref_variants) output: - tuple val(meta), path("*.bed"), emit: geno - tuple val(meta), path("*.bim"), emit: variants - tuple val(meta), path("*.fam"), emit: pheno + tuple val(meta), path("${output}.bed"), emit: geno + tuple val(meta), path("${output}.bim"), emit: variants + tuple val(meta), path("${output}.fam"), emit: pheno path "versions.yml" , emit: versions script: @@ -30,7 +31,7 @@ process PLINK2_ORIENT { // output options def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_" - + output = "${params.target_build}_${prefix}${meta.chrom}_oriented" """ plink2 \ --threads $task.cpus \ @@ -41,7 +42,7 @@ process PLINK2_ORIENT { --bim $variants \ --a1-allele $ref_variants 5 2 \ --make-bed \ - --out ${params.target_build}_${prefix}${meta.chrom}_oriented + --out $output cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: diff --git a/modules/local/ancestry/relabel_ids.nf b/modules/local/ancestry/relabel_ids.nf index 792f277e..b4751904 100644 --- a/modules/local/ancestry/relabel_ids.nf +++ b/modules/local/ancestry/relabel_ids.nf @@ -4,7 +4,9 @@ process RELABEL_IDS { label 'pgscatalog_utils' // controls conda, docker, + singularity options tag "$meta.id $meta.effect_type $target_format" - storeDir { refgeno.name != 'NO_FILE' ? "${workDir.resolve()}/ancestry/relabel/${refgeno.baseName}/${meta.id}/" : false } + + cachedir = workDir / "ancestry" / "relabel" + storeDir { refgeno.name != 'NO_FILE' ? cachedir : false } conda "${task.ext.conda}" diff --git a/modules/local/match_combine.nf b/modules/local/match_combine.nf index 4aa1871a..7acd223f 100644 --- a/modules/local/match_combine.nf +++ b/modules/local/match_combine.nf @@ -6,7 +6,6 @@ process MATCH_COMBINE { // first element of tag must be sampleset tag "$meta.id" - scratch (workflow.containerEngine == 'singularity') conda "${task.ext.conda}" diff --git a/modules/local/match_variants.nf b/modules/local/match_variants.nf index 4c43523a..9883841b 100644 --- a/modules/local/match_variants.nf +++ b/modules/local/match_variants.nf @@ -5,7 +5,6 @@ process MATCH_VARIANTS { // first element of tag must be sampleset tag "$meta.id chromosome $meta.chrom" - scratch (workflow.containerEngine == 'singularity') errorStrategy 'finish' conda "${task.ext.conda}" diff --git a/modules/local/plink2_relabelbim.nf b/modules/local/plink2_relabelbim.nf index 5c9fc4bf..ad2e63fa 100644 --- a/modules/local/plink2_relabelbim.nf +++ b/modules/local/plink2_relabelbim.nf @@ -5,8 +5,10 @@ process PLINK2_RELABELBIM { label "plink2" // controls conda, docker, + singularity options tag "$meta.id chromosome $meta.chrom" - storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" : - "${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}") + + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "genomes" / "relabelled" + conda "${task.ext.conda}" container "${ workflow.containerEngine == 'singularity' && @@ -19,10 +21,10 @@ process PLINK2_RELABELBIM { tuple val(meta), path(geno), path(variants), path(pheno) output: - tuple val(meta), path("${meta.build}_*.bed"), emit: geno - tuple val(meta), path("${meta.build}_*.zst"), emit: variants - tuple val(meta), path("${meta.build}_*.fam"), emit: pheno - tuple val(meta), path("*.vmiss.gz"), emit: vmiss + tuple val(meta), path("${output}.bed"), emit: geno + tuple val(meta), path("${output}.pvar.zst"), emit: variants + tuple val(meta), path("${output}.fam"), emit: pheno + tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss path "versions.yml" , emit: versions when: @@ -36,7 +38,8 @@ process PLINK2_RELABELBIM { def mem_mb = task.memory.toMega() // plink is greedy // if dropping multiallelic variants, set a generic ID that won't match def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#' - + // def limits scope to process block, so don't use it + output = "${meta.build}_${prefix}_${meta.chrom}" """ plink2 \\ --threads $task.cpus \\ @@ -47,12 +50,12 @@ process PLINK2_RELABELBIM { $set_ma_missing \\ --bfile ${geno.baseName} $compressed \\ --make-just-bim zs \\ - --out ${meta.build}_${prefix}_${meta.chrom} + --out ${output} # cross platform (mac, linux) method of preserving symlinks - cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.bed - cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.fam - gzip *.vmiss + cp -a $geno ${output}.bed + cp -a $pheno ${output}.fam + gzip ${output}.vmiss cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: diff --git a/modules/local/plink2_relabelpvar.nf b/modules/local/plink2_relabelpvar.nf index 7aac2135..22fc16c8 100644 --- a/modules/local/plink2_relabelpvar.nf +++ b/modules/local/plink2_relabelpvar.nf @@ -6,8 +6,8 @@ process PLINK2_RELABELPVAR { tag "$meta.id chromosome $meta.chrom" - storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" : - "${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}") + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "genomes" / "relabelled" conda "${task.ext.conda}" @@ -21,10 +21,10 @@ process PLINK2_RELABELPVAR { tuple val(meta), path(geno), path(pheno), path(variants) output: - tuple val(meta), path("${meta.build}_*.pgen"), emit: geno - tuple val(meta), path("${meta.build}_*.pvar.zst") , emit: variants - tuple val(meta), path("${meta.build}_*.psam"), emit: pheno - tuple val(meta), path("*.vmiss.gz"), emit: vmiss + tuple val(meta), path("${output}.pgen"), emit: geno + tuple val(meta), path("${output}.pvar.zst") , emit: variants + tuple val(meta), path("${output}.psam"), emit: pheno + tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss path "versions.yml" , emit: versions when: @@ -38,7 +38,8 @@ process PLINK2_RELABELPVAR { def mem_mb = task.memory.toMega() // plink is greedy // if dropping multiallelic variants, set a generic ID that won't match def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#' - + // def limits scope to process block, so don't use it + output = "${meta.build}_${prefix}_${meta.chrom}" """ plink2 \\ --threads $task.cpus \\ @@ -49,13 +50,13 @@ process PLINK2_RELABELPVAR { $set_ma_missing \\ --pfile ${geno.baseName} $compressed \\ --make-just-pvar zs \\ - --out ${meta.build}_${prefix}_${meta.chrom} + --out $output # cross platform (mac, linux) method of preserving symlinks - cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.pgen - cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.psam + cp -a $geno ${output}.pgen + cp -a $pheno ${output}.psam - gzip *.vmiss + gzip ${output}.vmiss cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: diff --git a/modules/local/plink2_vcf.nf b/modules/local/plink2_vcf.nf index f2986b3b..13fd5a2b 100644 --- a/modules/local/plink2_vcf.nf +++ b/modules/local/plink2_vcf.nf @@ -6,8 +6,8 @@ process PLINK2_VCF { tag "$meta.id chromosome $meta.chrom" - storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" : - "${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}") + cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir + storeDir cachedir / "genomes" / "recoded" conda "${task.ext.conda}" @@ -20,10 +20,10 @@ process PLINK2_VCF { tuple val(meta), path(vcf) output: - tuple val(newmeta), path("${meta.build}_*.pgen"), emit: pgen - tuple val(newmeta), path("${meta.build}_*.psam"), emit: psam - tuple val(newmeta), path("${meta.build}_*.zst") , emit: pvar - tuple val(newmeta), path("${meta.build}_*.vmiss.gz"), emit: vmiss + tuple val(newmeta), path("${output}.pgen"), emit: pgen + tuple val(newmeta), path("${output}.psam"), emit: psam + tuple val(newmeta), path("${output}.pvar.zst") , emit: pvar + tuple val(newmeta), path("${output}.vmiss.gz"), emit: vmiss path "versions.yml" , emit: versions script: @@ -36,7 +36,8 @@ process PLINK2_VCF { def chrom_filter = meta.chrom == "ALL" ? "--chr 1-22, X, Y, XY" : "--chr ${meta.chrom}" // filter to canonical/stated chromosome newmeta = meta.clone() // copy hashmap for updating... newmeta.is_pfile = true // now it's converted to a pfile :) - + // def limits scope to process block, so don't use it + output = "${meta.build}_${prefix}_${meta.chrom}_vcf" """ plink2 \\ --threads $task.cpus \\ @@ -48,9 +49,9 @@ process PLINK2_VCF { --vcf $vcf $dosage_options \\ --allow-extra-chr $chrom_filter \\ --make-pgen vzs \\ - --out ${meta.build}_${prefix}_${meta.chrom}_vcf + --out ${output} - gzip *.vmiss + gzip ${output}.vmiss cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/score_report.nf b/modules/local/score_report.nf index 0e1a3545..8adc6164 100644 --- a/modules/local/score_report.nf +++ b/modules/local/score_report.nf @@ -16,6 +16,7 @@ process SCORE_REPORT { tuple val(meta), path(scorefile), path(score_log), path(match_summary), path(ancestry) path intersect_count val reference_panel_name + path report_path output: // includeInputs to correctly use $meta.id in publishDir path @@ -25,7 +26,7 @@ process SCORE_REPORT { // for testing ancestry workflow path "pop_summary.csv", optional: true // normal outputs - path "*.html", emit: report + path "report.html", emit: report path "versions.yml", emit: versions script: @@ -42,11 +43,12 @@ process SCORE_REPORT { echo "keep_ambiguous : $params.keep_ambiguous" >> params.txt echo "min_overlap : $params.min_overlap" >> params.txt - quarto render report.qmd -M "self-contained:true" \ + quarto render $report_path -M "self-contained:true" \ -P score_path:$scorefile \ -P sampleset:$meta.id \ -P run_ancestry:$run_ancestry \ - -P reference_panel_name:$reference_panel_name + -P reference_panel_name:$reference_panel_name \ + -o report.html cat <<-END_VERSIONS > versions.yml ${task.process.tokenize(':').last()}: diff --git a/subworkflows/local/ancestry/ancestry_project.nf b/subworkflows/local/ancestry/ancestry_project.nf index 2a5c7d77..5514e757 100644 --- a/subworkflows/local/ancestry/ancestry_project.nf +++ b/subworkflows/local/ancestry/ancestry_project.nf @@ -179,7 +179,7 @@ workflow ANCESTRY_PROJECT { .filter{ it instanceof Path && it.getName().contains('ALL') } .set { ch_ref_relabelled_variants } - target_extract = Channel.of(file('NO_FILE')) // optional input for PLINK2_MAKEBED + target_extract = Channel.of(file(projectDir / "assets" / "NO_FILE")) // optional input for PLINK2_MAKEBED // [meta, pgen, psam, relabelled pvar, optional_input] INTERSECT_THINNED.out.geno diff --git a/subworkflows/local/apply_score.nf b/subworkflows/local/apply_score.nf index 9f9081fa..1ea03d3e 100644 --- a/subworkflows/local/apply_score.nf +++ b/subworkflows/local/apply_score.nf @@ -63,7 +63,7 @@ workflow APPLY_SCORE { .set { ch_scorefile_relabel_input } // relabel scoring file ids to match reference format - RELABEL_SCOREFILE_IDS ( ch_scorefile_relabel_input, Channel.value([[:], file("$projectDir/assets/NO_FILE")]) ) + RELABEL_SCOREFILE_IDS ( ch_scorefile_relabel_input, Channel.value([[:], file(projectDir / "assets" / "NO_FILE", checkIfExists: true)]) ) RELABEL_SCOREFILE_IDS.out.relabelled .transpose() @@ -85,7 +85,7 @@ workflow APPLY_SCORE { .set { ch_afreq } // map afreq IDs from reference -> target - RELABEL_AFREQ_IDS ( ch_afreq, Channel.value([[:], file("$projectDir/assets/NO_FILE")]) ) + RELABEL_AFREQ_IDS ( ch_afreq, Channel.value([[:], file(projectDir / "assets" / "NO_FILE", checkIfExists: true)]) ) ref_afreq = RELABEL_AFREQ_IDS.out.relabelled } @@ -154,7 +154,7 @@ def annotate_scorefiles(ArrayList scorefiles) { // dominant, 1 recessive). scorefile looks like: // variant ID | effect allele | weight 1 | ... | weight_n // one weight is mandatory, extra weight columns are optional - scoremeta.n_scores = count_scores(it.last()) + scoremeta.n_scores = count_scores(it.last().newInputStream()) // file name structure: {dataset}_{chr}_{effect}_{split}.scorefile - // {dataset} is only used to disambiguate files, not for scoremeta @@ -193,10 +193,10 @@ def annotate_genomic(ArrayList target) { return [meta, paths] } -def count_scores(Path f) { +def count_scores(InputStream f) { // count number of calculated scores in a gzipped plink .scorefile // try-with-resources block automatically closes streams - try (buffered = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f.toFile()))))) { + try (buffered = new BufferedReader(new InputStreamReader(new GZIPInputStream(f)))) { n_extra_cols = 2 // ID, effect_allele n_scores = buffered.readLine().split("\t").length - n_extra_cols assert n_scores > 0 : "Counting scores failed, please check scoring file" diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf index 9bce0c96..c1fe7953 100644 --- a/subworkflows/local/report.nf +++ b/subworkflows/local/report.nf @@ -68,7 +68,7 @@ workflow REPORT { // make NO_FILE for each sampleset to join correctly later ancestry_results = ancestry_results.mix( ch_scores.map {it[0]} // unique samplesets - .combine(Channel.fromPath('NO_FILE')) + .combine(Channel.fromPath(file(projectDir / "assets" / "NO_FILE", checkIfExists: true))) ) } @@ -82,7 +82,10 @@ workflow REPORT { .combine(log_scorefiles) // all samplesets have the same scorefile metadata .set { ch_report_input } - SCORE_REPORT( ch_report_input, intersect_count, reference_panel_name ) + Channel.fromPath(file(projectDir / "assets" /"report" / "report.qmd", checkIfExists: true)) + .set{report_path} + + SCORE_REPORT( ch_report_input, intersect_count, reference_panel_name, report_path ) ch_versions = ch_versions.mix(SCORE_REPORT.out.versions) // if this workflow runs, the report must be written diff --git a/workflows/pgsc_calc.nf b/workflows/pgsc_calc.nf index d2d5ca92..89ece78a 100644 --- a/workflows/pgsc_calc.nf +++ b/workflows/pgsc_calc.nf @@ -145,6 +145,12 @@ if (params.platform) { workflow PGSCCALC { ch_versions = Channel.empty() + // some workflows require an optional input + // let's make one, and reuse it where possible + // see https://nextflow-io.github.io/patterns/optional-input/ which explains this odd implementation pattern + // these dummy files need to exist for cloud executors to work OK + optional_input = file(projectDir / "assets" / "NO_FILE", checkIfExists: true) + // // SUBWORKFLOW: Create reference database for ancestry inference // @@ -193,7 +199,7 @@ workflow PGSCCALC { // flatten the score channel ch_scorefiles = ch_scores.collect() // chain files are optional input - Channel.fromPath("$projectDir/assets/NO_FILE", checkIfExists: false).set { chain_files } + Channel.fromPath(optional_input).set { chain_files } if (params.hg19_chain && params.hg38_chain) { Channel.fromPath(params.hg19_chain, checkIfExists: true) .mix(Channel.fromPath(params.hg38_chain, checkIfExists: true)) @@ -230,9 +236,13 @@ workflow PGSCCALC { // SUBWORKFLOW: Run ancestry projection // - // reference allelic frequencies are optional inputs to scoring subworkflow - ref_afreq = Channel.fromPath(file('NO_FILE')) - intersect_count = Channel.fromPath(file('NO_FILE_INTERSECT_COUNT')) + // this process has two optional inputs: + // - reference allelic frequencies + // - intersect counts + // optional inputs need different names to prevent collisions during stage in + optional_intersect_count = file(projectDir / "assets" / "NO_FILE_INTERSECT_COUNT", checkIfExists: true) + ref_afreq = Channel.value([[:], optional_input]) + intersect_count = Channel.fromPath(optional_intersect_count, checkIfExists: true) if (run_ancestry_assign) { intersection = Channel.empty() @@ -268,7 +278,7 @@ workflow PGSCCALC { // intersected variants ( across ref & target ) are an optional input intersection = ANCESTRY_PROJECT.out.intersection } else { - dummy_input = Channel.of(file('NO_FILE')) // dummy file that doesn't exist + dummy_input = Channel.of(optional_input) // dummy file that doesn't exist // associate each sampleset with the dummy file MAKE_COMPATIBLE.out.geno.map { meta = it[0].clone()