PGScatalog · nebfield · Feb 28, 2024 · Feb 26, 2024 · Feb 27, 2024 · Feb 28, 2024
diff --git a/assets/NO_FILE_INTERSECT_COUNT b/assets/NO_FILE_INTERSECT_COUNT
diff --git a/modules/local/ancestry/ancestry_analysis.nf b/modules/local/ancestry/ancestry_analysis.nf
@@ -21,7 +21,6 @@ process ANCESTRY_ANALYSIS {
 
     script:
     """
-    # TODO: --ref_pcs is a horrible hack to select the first duplicate
     ancestry_analysis -d $meta.target_id \
         -r reference \
         --psam $ref_psam \

diff --git a/modules/local/ancestry/bootstrap/make_database.nf b/modules/local/ancestry/bootstrap/make_database.nf
@@ -3,7 +3,7 @@ process MAKE_DATABASE {
     label 'process_low'
     label 'zstd' // controls conda, docker, + singularity options
 
-    storeDir "${workDir.resolve()}/reference"
+    storeDir workDir / "reference"
 
     conda "${task.ext.conda}"
 

diff --git a/modules/local/ancestry/extract_database.nf b/modules/local/ancestry/extract_database.nf
@@ -3,7 +3,7 @@ process EXTRACT_DATABASE {
     label 'process_low'
     label 'zstd' // controls conda, docker, + singularity options
 
-    storeDir "${workDir.resolve()}/ref_extracted/"
+    storeDir workDir / "ancestry" / "ref_extracted"
 
     conda "${task.ext.conda}"
 

diff --git a/modules/local/ancestry/intersect_variants.nf b/modules/local/ancestry/intersect_variants.nf
@@ -4,8 +4,9 @@ process INTERSECT_VARIANTS {
     label 'zstd' // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome $meta.chrom"
-    def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
-    storeDir "${baseDir}/intersected/${params.target_build}/$meta.id/$meta.chrom"
+
+    cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
+    storeDir cachedir / "ancestry" / "intersected"
 
     conda "${task.ext.conda}"
 
@@ -19,14 +20,15 @@ process INTERSECT_VARIANTS {
         path(ref_geno), path(ref_pheno), path(ref_variants)
 
     output:
-    tuple val(id), path("${meta.id}_${meta.chrom}_matched.txt.gz"), emit: intersection
+    tuple val(id), path("${output}.txt.gz"), emit: intersection
     path "intersect_counts_*.txt", emit: intersect_count
     path "versions.yml", emit: versions
 
     script:
     def mem_mb = task.memory.toMega() // plink is greedy
     def file_format = meta.is_pfile ? 'pvar' : 'bim'
     id = meta.subMap('id', 'build', 'n_chrom', 'chrom')
+    output = "${meta.id}_${meta.chrom}_matched"
     """
     intersect_variants.sh <(zstdcat $ref_variants) \
         <(zstdcat $variants) \
@@ -37,7 +39,7 @@ process INTERSECT_VARIANTS {
         echo "ERROR: No variants in intersection"
         exit 1
     else
-        mv matched_variants.txt ${meta.id}_${meta.chrom}_matched.txt
+        mv matched_variants.txt ${output}.txt
         gzip *_variants.txt *_matched.txt
     fi
 

diff --git a/modules/local/ancestry/oadp/fraposa_pca.nf b/modules/local/ancestry/oadp/fraposa_pca.nf
@@ -5,8 +5,9 @@ process FRAPOSA_PCA {
 
     tag "reference"
     // permanently derive a PCA for each reference - sampleset combination
-    def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
-    storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${ref_geno.baseName}/${targetmeta.id}/"
+
+    cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
+    storeDir cachedir / "ancestry" / "fraposa_pca"
 
     conda "${task.ext.conda}"
 
@@ -20,9 +21,10 @@ process FRAPOSA_PCA {
     tuple val(targetmeta), path(target_geno)
 
     output:
-    path "*.{dat,pcs}", emit: pca
+    path "${output}*.{dat,pcs}", emit: pca
     path "versions.yml", emit: versions
 
+    output = "${params.target_build}_${meta.id}_${meta.chrom}"
     script:
     """
     fraposa ${ref_geno.baseName} \

diff --git a/modules/local/ancestry/oadp/fraposa_project.nf b/modules/local/ancestry/oadp/fraposa_project.nf
@@ -5,8 +5,8 @@ process FRAPOSA_PROJECT {
 
     tag "${target_geno.baseName.tokenize('_')[1]}"
 
-    def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
-    storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${target_geno.baseName}/${split_fam.baseName}"
+    cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
+    storeDir cachedir / "ancestry" / "fraposa" / "project"
 
     conda "${task.ext.conda}"
 
@@ -21,12 +21,13 @@ process FRAPOSA_PROJECT {
         path(pca)
 
     output:
-    tuple val(oadp_meta), path("GRCh3?_${target_id}_*.pcs"), emit: pca
+    tuple val(oadp_meta), path("${output}.pcs"), emit: pca
     path "versions.yml", emit: versions
 
     script:
     target_id = target_geno.baseName.tokenize('_')[1]
     oadp_meta = ['target_id':target_id]
+    output = "${params.target_build}_${target_id}_${split_fam}"
     """
     fraposa ${ref_geno.baseName} \
         --method $params.projection_method \

diff --git a/modules/local/ancestry/oadp/intersect_thinned.nf b/modules/local/ancestry/oadp/intersect_thinned.nf
@@ -11,7 +11,8 @@ process INTERSECT_THINNED {
     label 'plink2' // controls conda, docker, + singularity options
 
     tag "$meta.id"
-    storeDir "${workDir.resolve()}/ancestry/thinned_intersection/${params.target_build}/${meta.id}"
+
+    storeDir workDir / "ancestry" / "thinned_intersections"
 
     conda "${task.ext.conda}"
 
@@ -24,10 +25,10 @@ process INTERSECT_THINNED {
     tuple val(meta), path(matched), path(pruned), val(geno_meta), path(genomes)
 
     output:
-    path("*_thinned.txt.gz"), emit: match_thinned
-    tuple val(geno_meta), path("*_extracted.pgen"), emit: geno
-    tuple val(geno_meta), path("*_extracted.pvar.gz"), emit: variants
-    tuple val(geno_meta), path("*_extracted.psam"), emit: pheno
+    path("${thin_output}.txt.gz"), emit: match_thinned
+    tuple val(geno_meta), path("${output}.pgen"), emit: geno
+    tuple val(geno_meta), path("${output}.pvar.gz"), emit: variants
+    tuple val(geno_meta), path("${output}.psam"), emit: pheno
     path "versions.yml"           , emit: versions
 
     script:
@@ -37,6 +38,8 @@ process INTERSECT_THINNED {
     // input options
     def input = (geno_meta.is_pfile) ? '--pfile' : '--bfile'
 
+    output = "${params.target_build}_${meta.id}_ALL_extracted"
+    thin_output = "${meta.id}_ALL_matched_thinned"
     """
     # 1) intersect thinned variants --------------------------------------------
 

diff --git a/modules/local/ancestry/oadp/plink2_makebed.nf b/modules/local/ancestry/oadp/plink2_makebed.nf
@@ -5,7 +5,8 @@ process PLINK2_MAKEBED {
     label "plink2" // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome"
-    storeDir "${workDir.resolve()}/ancestry/bed/${geno.baseName}/"
+
+    storeDir workDir / "ancestry" / "bed"
 
     conda "${task.ext.conda}"
 
@@ -19,10 +20,10 @@ process PLINK2_MAKEBED {
     tuple val(meta), path(geno), path(pheno), path(variants), path(pruned)
 
     output:
-    tuple val(meta), path("*.bed"), emit: geno
-    tuple val(meta), path("*.bim"), emit: variants
-    tuple val(meta), path("*.fam"), emit: pheno
-    tuple val(meta), path("splitfam*"), emit: splits, optional: true
+    tuple val(meta), path("${output}.bed"), emit: geno
+    tuple val(meta), path("${output}.bim"), emit: variants
+    tuple val(meta), path("${output}.fam"), emit: pheno
+    tuple val(meta), path("${split_output}*"), emit: splits, optional: true
     path "versions.yml"           , emit: versions
 
     script:
@@ -33,7 +34,8 @@ process PLINK2_MAKEBED {
     def extract = pruned.name != 'NO_FILE' ? "--extract $pruned" : ''
     def extracted = pruned.name != 'NO_FILE' ? "_extracted" : ''
     def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_"
-
+    output = "${params.target_build}_${prefix}${meta.chrom}${extracted}"
+    split_output = "${meta.id}_splitfam"
     """
     # use explicit flag because pfile prefix might be different
     plink2 \
@@ -45,11 +47,11 @@ process PLINK2_MAKEBED {
         --pvar $variants \
         --make-bed \
         $extract \
-        --out ${params.target_build}_${prefix}${meta.chrom}${extracted}
+        --out ${output}
 
     if [ $meta.id != 'reference' ]
     then
-        split -l 50000 <(grep -v '#' $pheno) splitfam
+        split -l 50000 <(grep -v '#' $pheno) ${split_output}
     fi
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/local/ancestry/oadp/plink2_orient.nf b/modules/local/ancestry/oadp/plink2_orient.nf
@@ -5,7 +5,8 @@ process PLINK2_ORIENT {
     label "plink2" // controls conda, docker, + singularity options
 
     tag "$meta.id"
-    storeDir "${workDir.resolve()}/ancestry/oriented/${geno.baseName}/"
+
+    storeDir = workDir / "ancestry" / "oriented"
 
     conda "${task.ext.conda}"
 
@@ -19,9 +20,9 @@ process PLINK2_ORIENT {
     tuple val(meta), path(geno), path(pheno), path(variants), path(ref_variants)
 
     output:
-    tuple val(meta), path("*.bed"), emit: geno
-    tuple val(meta), path("*.bim"), emit: variants
-    tuple val(meta), path("*.fam"), emit: pheno
+    tuple val(meta), path("${output}.bed"), emit: geno
+    tuple val(meta), path("${output}.bim"), emit: variants
+    tuple val(meta), path("${output}.fam"), emit: pheno
     path "versions.yml"           , emit: versions
 
     script:
@@ -30,7 +31,7 @@ process PLINK2_ORIENT {
 
     // output options
     def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_"
-
+    output = "${params.target_build}_${prefix}${meta.chrom}_oriented"
     """
     plink2 \
         --threads $task.cpus \
@@ -41,7 +42,7 @@ process PLINK2_ORIENT {
         --bim $variants \
         --a1-allele $ref_variants 5 2 \
         --make-bed \
-        --out ${params.target_build}_${prefix}${meta.chrom}_oriented
+        --out $output
 
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}:

diff --git a/modules/local/ancestry/relabel_ids.nf b/modules/local/ancestry/relabel_ids.nf
@@ -4,7 +4,9 @@ process RELABEL_IDS {
     label 'pgscatalog_utils' // controls conda, docker, + singularity options
 
     tag "$meta.id $meta.effect_type $target_format"
-    storeDir { refgeno.name != 'NO_FILE' ?  "${workDir.resolve()}/ancestry/relabel/${refgeno.baseName}/${meta.id}/" : false }
+
+    cachedir = workDir / "ancestry" / "relabel"
+    storeDir { refgeno.name != 'NO_FILE' ? cachedir : false }
 
     conda "${task.ext.conda}"
 

diff --git a/modules/local/match_combine.nf b/modules/local/match_combine.nf
@@ -6,7 +6,6 @@ process MATCH_COMBINE {
 
     // first element of tag must be sampleset
     tag "$meta.id"
-    scratch (workflow.containerEngine == 'singularity')
 
     conda "${task.ext.conda}"
 

diff --git a/modules/local/match_variants.nf b/modules/local/match_variants.nf
@@ -5,7 +5,6 @@ process MATCH_VARIANTS {
 
     // first element of tag must be sampleset
     tag "$meta.id chromosome $meta.chrom"
-    scratch (workflow.containerEngine == 'singularity')
     errorStrategy 'finish'
 
     conda "${task.ext.conda}"

diff --git a/modules/local/plink2_relabelbim.nf b/modules/local/plink2_relabelbim.nf
@@ -5,8 +5,10 @@ process PLINK2_RELABELBIM {
     label "plink2" // controls conda, docker, + singularity options
 
     tag "$meta.id chromosome $meta.chrom"
-    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
-              "${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}")
+
+    cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
+    storeDir cachedir / "genomes" / "relabelled"
+
     conda "${task.ext.conda}"
 
     container "${ workflow.containerEngine == 'singularity' &&
@@ -19,10 +21,10 @@ process PLINK2_RELABELBIM {
     tuple val(meta), path(geno), path(variants), path(pheno)
 
     output:
-    tuple val(meta), path("${meta.build}_*.bed"), emit: geno
-    tuple val(meta), path("${meta.build}_*.zst"), emit: variants
-    tuple val(meta), path("${meta.build}_*.fam"), emit: pheno
-    tuple val(meta), path("*.vmiss.gz"), emit: vmiss
+    tuple val(meta), path("${output}.bed"), emit: geno
+    tuple val(meta), path("${output}.pvar.zst"), emit: variants
+    tuple val(meta), path("${output}.fam"), emit: pheno
+    tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss
     path "versions.yml"           , emit: versions
 
     when:
@@ -36,7 +38,8 @@ process PLINK2_RELABELBIM {
     def mem_mb = task.memory.toMega() // plink is greedy
     // if dropping multiallelic variants, set a generic ID that won't match
     def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'
-
+    // def limits scope to process block, so don't use it
+    output = "${meta.build}_${prefix}_${meta.chrom}"
     """
     plink2 \\
         --threads $task.cpus \\
@@ -47,12 +50,12 @@ process PLINK2_RELABELBIM {
         $set_ma_missing \\
         --bfile ${geno.baseName} $compressed \\
         --make-just-bim zs \\
-        --out ${meta.build}_${prefix}_${meta.chrom}
+        --out ${output}
 
     # cross platform (mac, linux) method of preserving symlinks
-    cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.bed
-    cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.fam
-    gzip *.vmiss
+    cp -a $geno ${output}.bed
+    cp -a $pheno ${output}.fam
+    gzip ${output}.vmiss
 
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}:

diff --git a/modules/local/plink2_relabelpvar.nf b/modules/local/plink2_relabelpvar.nf
@@ -6,8 +6,8 @@ process PLINK2_RELABELPVAR {
 
     tag "$meta.id chromosome $meta.chrom"
 
-    storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
-              "${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}")
+    cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
+    storeDir cachedir / "genomes" / "relabelled"
 
     conda "${task.ext.conda}"
 
@@ -21,10 +21,10 @@ process PLINK2_RELABELPVAR {
     tuple val(meta), path(geno), path(pheno), path(variants)
 
     output:
-    tuple val(meta), path("${meta.build}_*.pgen"), emit: geno
-    tuple val(meta), path("${meta.build}_*.pvar.zst") , emit: variants
-    tuple val(meta), path("${meta.build}_*.psam"), emit: pheno
-    tuple val(meta), path("*.vmiss.gz"), emit: vmiss
+    tuple val(meta), path("${output}.pgen"), emit: geno
+    tuple val(meta), path("${output}.pvar.zst") , emit: variants
+    tuple val(meta), path("${output}.psam"), emit: pheno
+    tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss
     path "versions.yml"            , emit: versions
 
     when:
@@ -38,7 +38,8 @@ process PLINK2_RELABELPVAR {
     def mem_mb = task.memory.toMega() // plink is greedy
     // if dropping multiallelic variants, set a generic ID that won't match
     def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'
-
+    // def limits scope to process block, so don't use it
+    output = "${meta.build}_${prefix}_${meta.chrom}"
     """
     plink2 \\
         --threads $task.cpus \\
@@ -49,13 +50,13 @@ process PLINK2_RELABELPVAR {
         $set_ma_missing \\
         --pfile ${geno.baseName} $compressed \\
         --make-just-pvar zs \\
-        --out ${meta.build}_${prefix}_${meta.chrom}
+        --out $output
 
     # cross platform (mac, linux) method of preserving symlinks
-    cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.pgen
-    cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.psam
+    cp -a $geno ${output}.pgen
+    cp -a $pheno ${output}.psam
 
-    gzip *.vmiss
+    gzip ${output}.vmiss
 
     cat <<-END_VERSIONS > versions.yml
     ${task.process.tokenize(':').last()}: