Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove storeDir semantic structure #256

Merged
merged 4 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added assets/NO_FILE_INTERSECT_COUNT
Empty file.
1 change: 0 additions & 1 deletion modules/local/ancestry/ancestry_analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ process ANCESTRY_ANALYSIS {

script:
"""
# TODO: --ref_pcs is a horrible hack to select the first duplicate
ancestry_analysis -d $meta.target_id \
-r reference \
--psam $ref_psam \
Expand Down
2 changes: 1 addition & 1 deletion modules/local/ancestry/bootstrap/make_database.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process MAKE_DATABASE {
label 'process_low'
label 'zstd' // controls conda, docker, + singularity options

storeDir "${workDir.resolve()}/reference"
storeDir workDir / "reference"

conda "${task.ext.conda}"

Expand Down
2 changes: 1 addition & 1 deletion modules/local/ancestry/extract_database.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process EXTRACT_DATABASE {
label 'process_low'
label 'zstd' // controls conda, docker, + singularity options

storeDir "${workDir.resolve()}/ref_extracted/"
storeDir workDir / "ancestry" / "ref_extracted"

conda "${task.ext.conda}"

Expand Down
10 changes: 6 additions & 4 deletions modules/local/ancestry/intersect_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ process INTERSECT_VARIANTS {
label 'zstd' // controls conda, docker, + singularity options

tag "$meta.id chromosome $meta.chrom"
def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
storeDir "${baseDir}/intersected/${params.target_build}/$meta.id/$meta.chrom"

cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
storeDir cachedir / "ancestry" / "intersected"

conda "${task.ext.conda}"

Expand All @@ -19,14 +20,15 @@ process INTERSECT_VARIANTS {
path(ref_geno), path(ref_pheno), path(ref_variants)

output:
tuple val(id), path("${meta.id}_${meta.chrom}_matched.txt.gz"), emit: intersection
tuple val(id), path("${output}.txt.gz"), emit: intersection
path "intersect_counts_*.txt", emit: intersect_count
path "versions.yml", emit: versions

script:
def mem_mb = task.memory.toMega() // plink is greedy
def file_format = meta.is_pfile ? 'pvar' : 'bim'
id = meta.subMap('id', 'build', 'n_chrom', 'chrom')
output = "${meta.id}_${meta.chrom}_matched"
"""
intersect_variants.sh <(zstdcat $ref_variants) \
<(zstdcat $variants) \
Expand All @@ -37,7 +39,7 @@ process INTERSECT_VARIANTS {
echo "ERROR: No variants in intersection"
exit 1
else
mv matched_variants.txt ${meta.id}_${meta.chrom}_matched.txt
mv matched_variants.txt ${output}.txt
gzip *_variants.txt *_matched.txt
fi

Expand Down
8 changes: 5 additions & 3 deletions modules/local/ancestry/oadp/fraposa_pca.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ process FRAPOSA_PCA {

tag "reference"
// permanently derive a PCA for each reference - sampleset combination
def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${ref_geno.baseName}/${targetmeta.id}/"

cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
storeDir cachedir / "ancestry" / "fraposa_pca"

conda "${task.ext.conda}"

Expand All @@ -20,9 +21,10 @@ process FRAPOSA_PCA {
tuple val(targetmeta), path(target_geno)

output:
path "*.{dat,pcs}", emit: pca
path "${output}*.{dat,pcs}", emit: pca
path "versions.yml", emit: versions

output = "${params.target_build}_${meta.id}_${meta.chrom}"
script:
"""
fraposa ${ref_geno.baseName} \
Expand Down
7 changes: 4 additions & 3 deletions modules/local/ancestry/oadp/fraposa_project.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ process FRAPOSA_PROJECT {

tag "${target_geno.baseName.tokenize('_')[1]}"

def baseDir = ( params.genotypes_cache ? "$params.genotypes_cache" : "${workDir.resolve()}" )
storeDir "${baseDir}/ancestry/fraposa/${params.target_build}/${target_geno.baseName}/${split_fam.baseName}"
cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
storeDir cachedir / "ancestry" / "fraposa" / "project"

conda "${task.ext.conda}"

Expand All @@ -21,12 +21,13 @@ process FRAPOSA_PROJECT {
path(pca)

output:
tuple val(oadp_meta), path("GRCh3?_${target_id}_*.pcs"), emit: pca
tuple val(oadp_meta), path("${output}.pcs"), emit: pca
path "versions.yml", emit: versions

script:
target_id = target_geno.baseName.tokenize('_')[1]
oadp_meta = ['target_id':target_id]
output = "${params.target_build}_${target_id}_${split_fam}"
"""
fraposa ${ref_geno.baseName} \
--method $params.projection_method \
Expand Down
13 changes: 8 additions & 5 deletions modules/local/ancestry/oadp/intersect_thinned.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ process INTERSECT_THINNED {
label 'plink2' // controls conda, docker, + singularity options

tag "$meta.id"
storeDir "${workDir.resolve()}/ancestry/thinned_intersection/${params.target_build}/${meta.id}"

storeDir workDir / "ancestry" / "thinned_intersections"

conda "${task.ext.conda}"

Expand All @@ -24,10 +25,10 @@ process INTERSECT_THINNED {
tuple val(meta), path(matched), path(pruned), val(geno_meta), path(genomes)

output:
path("*_thinned.txt.gz"), emit: match_thinned
tuple val(geno_meta), path("*_extracted.pgen"), emit: geno
tuple val(geno_meta), path("*_extracted.pvar.gz"), emit: variants
tuple val(geno_meta), path("*_extracted.psam"), emit: pheno
path("${thin_output}.txt.gz"), emit: match_thinned
tuple val(geno_meta), path("${output}.pgen"), emit: geno
tuple val(geno_meta), path("${output}.pvar.gz"), emit: variants
tuple val(geno_meta), path("${output}.psam"), emit: pheno
path "versions.yml" , emit: versions

script:
Expand All @@ -37,6 +38,8 @@ process INTERSECT_THINNED {
// input options
def input = (geno_meta.is_pfile) ? '--pfile' : '--bfile'

output = "${params.target_build}_${meta.id}_ALL_extracted"
thin_output = "${meta.id}_ALL_matched_thinned"
"""
# 1) intersect thinned variants --------------------------------------------

Expand Down
18 changes: 10 additions & 8 deletions modules/local/ancestry/oadp/plink2_makebed.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ process PLINK2_MAKEBED {
label "plink2" // controls conda, docker, + singularity options

tag "$meta.id chromosome"
storeDir "${workDir.resolve()}/ancestry/bed/${geno.baseName}/"

storeDir workDir / "ancestry" / "bed"

conda "${task.ext.conda}"

Expand All @@ -19,10 +20,10 @@ process PLINK2_MAKEBED {
tuple val(meta), path(geno), path(pheno), path(variants), path(pruned)

output:
tuple val(meta), path("*.bed"), emit: geno
tuple val(meta), path("*.bim"), emit: variants
tuple val(meta), path("*.fam"), emit: pheno
tuple val(meta), path("splitfam*"), emit: splits, optional: true
tuple val(meta), path("${output}.bed"), emit: geno
tuple val(meta), path("${output}.bim"), emit: variants
tuple val(meta), path("${output}.fam"), emit: pheno
tuple val(meta), path("${split_output}*"), emit: splits, optional: true
path "versions.yml" , emit: versions

script:
Expand All @@ -33,7 +34,8 @@ process PLINK2_MAKEBED {
def extract = pruned.name != 'NO_FILE' ? "--extract $pruned" : ''
def extracted = pruned.name != 'NO_FILE' ? "_extracted" : ''
def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_"

output = "${params.target_build}_${prefix}${meta.chrom}${extracted}"
split_output = "${meta.id}_splitfam"
"""
# use explicit flag because pfile prefix might be different
plink2 \
Expand All @@ -45,11 +47,11 @@ process PLINK2_MAKEBED {
--pvar $variants \
--make-bed \
$extract \
--out ${params.target_build}_${prefix}${meta.chrom}${extracted}
--out ${output}

if [ $meta.id != 'reference' ]
then
split -l 50000 <(grep -v '#' $pheno) splitfam
split -l 50000 <(grep -v '#' $pheno) ${split_output}
fi

cat <<-END_VERSIONS > versions.yml
Expand Down
13 changes: 7 additions & 6 deletions modules/local/ancestry/oadp/plink2_orient.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ process PLINK2_ORIENT {
label "plink2" // controls conda, docker, + singularity options

tag "$meta.id"
storeDir "${workDir.resolve()}/ancestry/oriented/${geno.baseName}/"

storeDir = workDir / "ancestry" / "oriented"

conda "${task.ext.conda}"

Expand All @@ -19,9 +20,9 @@ process PLINK2_ORIENT {
tuple val(meta), path(geno), path(pheno), path(variants), path(ref_variants)

output:
tuple val(meta), path("*.bed"), emit: geno
tuple val(meta), path("*.bim"), emit: variants
tuple val(meta), path("*.fam"), emit: pheno
tuple val(meta), path("${output}.bed"), emit: geno
tuple val(meta), path("${output}.bim"), emit: variants
tuple val(meta), path("${output}.fam"), emit: pheno
path "versions.yml" , emit: versions

script:
Expand All @@ -30,7 +31,7 @@ process PLINK2_ORIENT {

// output options
def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}_" : "${meta.id}_"

output = "${params.target_build}_${prefix}${meta.chrom}_oriented"
"""
plink2 \
--threads $task.cpus \
Expand All @@ -41,7 +42,7 @@ process PLINK2_ORIENT {
--bim $variants \
--a1-allele $ref_variants 5 2 \
--make-bed \
--out ${params.target_build}_${prefix}${meta.chrom}_oriented
--out $output

cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
Expand Down
4 changes: 3 additions & 1 deletion modules/local/ancestry/relabel_ids.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ process RELABEL_IDS {
label 'pgscatalog_utils' // controls conda, docker, + singularity options

tag "$meta.id $meta.effect_type $target_format"
storeDir { refgeno.name != 'NO_FILE' ? "${workDir.resolve()}/ancestry/relabel/${refgeno.baseName}/${meta.id}/" : false }

cachedir = workDir / "ancestry" / "relabel"
storeDir { refgeno.name != 'NO_FILE' ? cachedir : false }

conda "${task.ext.conda}"

Expand Down
1 change: 0 additions & 1 deletion modules/local/match_combine.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ process MATCH_COMBINE {

// first element of tag must be sampleset
tag "$meta.id"
scratch (workflow.containerEngine == 'singularity')

conda "${task.ext.conda}"

Expand Down
1 change: 0 additions & 1 deletion modules/local/match_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ process MATCH_VARIANTS {

// first element of tag must be sampleset
tag "$meta.id chromosome $meta.chrom"
scratch (workflow.containerEngine == 'singularity')
errorStrategy 'finish'

conda "${task.ext.conda}"
Expand Down
25 changes: 14 additions & 11 deletions modules/local/plink2_relabelbim.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ process PLINK2_RELABELBIM {
label "plink2" // controls conda, docker, + singularity options

tag "$meta.id chromosome $meta.chrom"
storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
"${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}")

cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
storeDir cachedir / "genomes" / "relabelled"

conda "${task.ext.conda}"

container "${ workflow.containerEngine == 'singularity' &&
Expand All @@ -19,10 +21,10 @@ process PLINK2_RELABELBIM {
tuple val(meta), path(geno), path(variants), path(pheno)

output:
tuple val(meta), path("${meta.build}_*.bed"), emit: geno
tuple val(meta), path("${meta.build}_*.zst"), emit: variants
tuple val(meta), path("${meta.build}_*.fam"), emit: pheno
tuple val(meta), path("*.vmiss.gz"), emit: vmiss
tuple val(meta), path("${output}.bed"), emit: geno
tuple val(meta), path("${output}.pvar.zst"), emit: variants
tuple val(meta), path("${output}.fam"), emit: pheno
tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss
path "versions.yml" , emit: versions

when:
Expand All @@ -36,7 +38,8 @@ process PLINK2_RELABELBIM {
def mem_mb = task.memory.toMega() // plink is greedy
// if dropping multiallelic variants, set a generic ID that won't match
def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'

// def limits scope to process block, so don't use it
output = "${meta.build}_${prefix}_${meta.chrom}"
"""
plink2 \\
--threads $task.cpus \\
Expand All @@ -47,12 +50,12 @@ process PLINK2_RELABELBIM {
$set_ma_missing \\
--bfile ${geno.baseName} $compressed \\
--make-just-bim zs \\
--out ${meta.build}_${prefix}_${meta.chrom}
--out ${output}

# cross platform (mac, linux) method of preserving symlinks
cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.bed
cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.fam
gzip *.vmiss
cp -a $geno ${output}.bed
cp -a $pheno ${output}.fam
gzip ${output}.vmiss

cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
Expand Down
23 changes: 12 additions & 11 deletions modules/local/plink2_relabelpvar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ process PLINK2_RELABELPVAR {

tag "$meta.id chromosome $meta.chrom"

storeDir ( params.genotypes_cache ? "$params.genotypes_cache/${meta.id}/${meta.build}/${meta.chrom}" :
"${workDir.resolve()}/genomes/${meta.id}/${meta.build}/${meta.chrom}")
cachedir = params.genotypes_cache ? file(params.genotypes_cache) : workDir
storeDir cachedir / "genomes" / "relabelled"

conda "${task.ext.conda}"

Expand All @@ -21,10 +21,10 @@ process PLINK2_RELABELPVAR {
tuple val(meta), path(geno), path(pheno), path(variants)

output:
tuple val(meta), path("${meta.build}_*.pgen"), emit: geno
tuple val(meta), path("${meta.build}_*.pvar.zst") , emit: variants
tuple val(meta), path("${meta.build}_*.psam"), emit: pheno
tuple val(meta), path("*.vmiss.gz"), emit: vmiss
tuple val(meta), path("${output}.pgen"), emit: geno
tuple val(meta), path("${output}.pvar.zst") , emit: variants
tuple val(meta), path("${output}.psam"), emit: pheno
tuple val(meta), path("${output}.vmiss.gz"), emit: vmiss
path "versions.yml" , emit: versions

when:
Expand All @@ -38,7 +38,8 @@ process PLINK2_RELABELPVAR {
def mem_mb = task.memory.toMega() // plink is greedy
// if dropping multiallelic variants, set a generic ID that won't match
def set_ma_missing = params.keep_multiallelic ? '' : '--var-id-multi @:#'

// def limits scope to process block, so don't use it
output = "${meta.build}_${prefix}_${meta.chrom}"
"""
plink2 \\
--threads $task.cpus \\
Expand All @@ -49,13 +50,13 @@ process PLINK2_RELABELPVAR {
$set_ma_missing \\
--pfile ${geno.baseName} $compressed \\
--make-just-pvar zs \\
--out ${meta.build}_${prefix}_${meta.chrom}
--out $output

# cross platform (mac, linux) method of preserving symlinks
cp -a $geno ${meta.build}_${prefix}_${meta.chrom}.pgen
cp -a $pheno ${meta.build}_${prefix}_${meta.chrom}.psam
cp -a $geno ${output}.pgen
cp -a $pheno ${output}.psam

gzip *.vmiss
gzip ${output}.vmiss

cat <<-END_VERSIONS > versions.yml
${task.process.tokenize(':').last()}:
Expand Down
Loading