Skip to content

Commit

Permalink
New option to build bismark index. New UPPMAX test script. Docker tes…
Browse files Browse the repository at this point in the history
…ts with and without bismark index.
  • Loading branch information
ewels committed Jun 25, 2017
1 parent 73072b5 commit e2bfd12
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 40 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ env:
- s=docker_test
- s=docker_test_bwameth

script: "./${s}.sh"
script:
- "./${s}.sh"
- "./${s}.sh true" # Run again, building reference genome
87 changes: 64 additions & 23 deletions bismark.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ vim: syntax=groovy
version = 0.1

// Configurable variables
params.name = false
params.project = false
params.email = false
params.genome = false
params.bismark_index = params.genome ? params.genomes[ params.genome ].bismark ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.saveReference = false
params.saveTrimmed = false
params.saveAlignedIntermediates = false
Expand All @@ -46,13 +48,29 @@ params.numMismatches = 0.6

// Validate inputs
if( params.bismark_index ){
bismark_index = file(params.bismark_index)
if( !bismark_index.exists() ) exit 1, "Bismark index not found: ${params.bismark_index}"
} else {
exit 1, "No reference genome specified! Please use --genome or --bismark_index"
bismark_index = Channel
.fromPath(params.bismark_index)
.ifEmpty { exit 1, "Bismark index not found: ${params.bismark_index}" }
}
else if ( params.fasta ){
fasta = file(params.fasta)
if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}"
}
else {
exit 1, "No reference genome specified! Please use --genome, --bismark_index or --fasta"
}
multiqc_config = file(params.multiqc_config)

// Validate inputs
if( workflow.profile == 'standard' && !params.project ) exit 1, "No UPPMAX project ID found! Use --project"

// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
custom_runName = workflow.runName
}

params.rrbs = false
params.pbat = false
params.single_cell = false
Expand Down Expand Up @@ -105,22 +123,12 @@ log.info "=================================================="
log.info " NGI-MethylSeq : Bisulfite-Seq Best Practice v${version}"
log.info "=================================================="
def summary = [:]
summary['Run Name'] = custom_runName ?: workflow.runName
summary['Reads'] = params.reads
summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End'
summary['Genome'] = params.genome
summary['Bismark Index'] = params.bismark_index
summary['Current home'] = "$HOME"
summary['Current user'] = "$USER"
summary['Current path'] = "$PWD"
summary['Working dir'] = workflow.workDir
summary['Output dir'] = params.outdir
summary['Script dir'] = workflow.projectDir
summary['Deduplication'] = params.nodedup ? 'No' : 'Yes'
summary['Save Trimmed'] = params.saveTrimmed
summary['Save Unmapped'] = params.unmapped ? 'Yes' : 'No'
summary['Save Intermeds'] = params.saveAlignedIntermediates
summary['Directional Mode'] = params.non_directional ? 'No' : 'Yes'
summary['All C Contexts'] = params.comprehensive ? 'Yes' : 'No'
if(params.bismark_index) summary['Bismark Index'] = params.bismark_index
else if(params.fasta) summary['Fasta Ref'] = params.fasta
if(params.rrbs) summary['RRBS Mode'] = 'On'
if(params.relaxMismatches) summary['Mismatch Func'] = "L,0,-${params.numMismatches} (Bismark default = L,0,-0.2)"
if(params.notrim) summary['Trimming Step'] = 'Skipped'
Expand All @@ -129,18 +137,51 @@ if(params.single_cell) summary['Trim Profile'] = 'Single Cell'
if(params.epignome) summary['Trim Profile'] = 'Epignome'
if(params.accel) summary['Trim Profile'] = 'Accel'
if(params.cegx) summary['Trim Profile'] = 'CEGX'
if(params.clip_r1 > 0) summary['Trim R1'] = params.clip_r1
if(params.clip_r2 > 0) summary['Trim R2'] = params.clip_r2
if(params.three_prime_clip_r1 > 0) summary["Trim 3' R1"] = params.three_prime_clip_r1
if(params.three_prime_clip_r2 > 0) summary["Trim 3' R2"] = params.three_prime_clip_r2
summary['Trim R1'] = params.clip_r1
summary['Trim R2'] = params.clip_r2
summary["Trim 3' R1"] = params.three_prime_clip_r1
summary["Trim 3' R2"] = params.three_prime_clip_r2
summary['Deduplication'] = params.nodedup ? 'No' : 'Yes'
summary['Save Reference'] = params.saveReference ? 'Yes' : 'No'
summary['Save Trimmed'] = params.saveTrimmed ? 'Yes' : 'No'
summary['Save Unmapped'] = params.unmapped ? 'Yes' : 'No'
summary['Save Intermeds'] = params.saveAlignedIntermediates ? 'Yes' : 'No'
summary['Directional Mode'] = params.non_directional ? 'No' : 'Yes'
summary['All C Contexts'] = params.comprehensive ? 'Yes' : 'No'
summary['Current home'] = "$HOME"
summary['Current user'] = "$USER"
summary['Current path'] = "$PWD"
summary['Working dir'] = workflow.workDir
summary['Output dir'] = params.outdir
summary['Script dir'] = workflow.projectDir
summary['Config Profile'] = (workflow.profile == 'standard' ? 'UPPMAX' : workflow.profile)
if(params.project) summary['UPPMAX Project'] = params.project
if(params.email) summary['E-mail Address'] = params.email
log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
log.info "========================================="

// Validate inputs
if( workflow.profile == 'standard' && !params.project ) exit 1, "No UPPMAX project ID found! Use --project"

/*
* PREPROCESSING - Build Bismark index
*/
if(!params.bismark_index && fasta){
process makeBismarkIndex {
tag fasta
publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir },
saveAs: { params.saveReference ? it : null }, mode: 'copy'

input:
file fasta from fasta

output:
file "Bisulfite_Genome" into bismark_index

script:
"""
bismark_genome_preparation ./
"""
}
}


/*
Expand Down
11 changes: 4 additions & 7 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,18 @@ process {
```

### Reference Genomes
The NGI-MethylSeq pipeline needs a reference genome for alignment and annotation. If not already available, start by downloading the relevant reference, for example from [illumina iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html).
The NGI-MethylSeq pipeline needs a reference genome for read alignment. Support for many common genomes is built in if running on UPPMAX or AWS, by using [illumina iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html).

> NB: The below paragraph is a lie. You currently need a Bismark reference. Integrated builds from Fasta files coming soon...
If you don't want to use the illumina iGenomes you can supply either a Bismark reference or a FASTA file. If a Bismark reference is specified, the pipeline won't have to generate it and will be finished quite a bit faster. If a FASTA file is supplied then the Bismark reference will be built when the pipeline starts. Use the command line option `--saveReference` to keep the generated references so that they can be added to your config and used again in the future. Use `--bismark_index` or `--fasta` to specify the paths to the reference.

The minimal requirements are a FASTA file. If a Bismark reference is specified, the pipeline won't have to generate it and will be finished quite a bit faster. Use the command line option `--saveReference` to keep the generated references so that they can be added to your config and used again in the future.

A reference genome path can be specified on the command line each time you run with `--bismark_index` or `--fasta`. Alternatively, add the paths to the config under a relevant id and just specify this id with `--genome ID` when you run the pipeline _(this can also be set as a default in your config)_:
Alternatively, you can add the paths to your NextFlow config under a relevant id and just specify this id with `--genome ID` when you run the pipeline:

```groovy
params {
genomes {
'YOUR-ID' {
bismark = '<PATH TO BISMARK REF>/BismarkIndex'
fasta = '<PATH TO FASTA FILE>/genome.fa'
fasta = '<PATH TO FASTA FILE>/genome.fa' // used if above is not specified
}
'OTHER-GENOME' {
// [..]
Expand All @@ -106,7 +104,6 @@ params {
}
```


### Software Requirements
To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system.

Expand Down
19 changes: 10 additions & 9 deletions tests/docker_test.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
#!/usr/bin/env bash

script_path="../bismark.nf"
if [ -z $1]
then
echo "No argument given, going to try to run ../bismark.nf"
else
script_path=$1
fi

data_path="/tmp"
if [ -d "./test_data" ]
then
Expand All @@ -32,7 +24,16 @@ else
echo "Done"
fi

cmd="nextflow run $script_path -resume -profile testing --bismark_index ${data_dir}/references/BismarkIndex/ --singleEnd --reads \"${data_dir}/*.fastq.gz\""
if [ -z $1]
then
buildrefs="--fasta ${data_dir}/references/WholeGenomeFasta/genome.fa"
else
buildrefs="--bismark_index ${data_dir}/references/BismarkIndex/"
fi

run_name="Test MethylSeq Run: "$(date +%s)

cmd="nextflow run ../bismark.nf -resume -name \"$run_name\" -profile testing $buildrefs --singleEnd --reads \"${data_dir}/*.fastq.gz\""
echo "Starting nextflow... Command:"
echo $cmd
echo "-----"
Expand Down
41 changes: 41 additions & 0 deletions tests/uppmax_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

script_path="../bismark.nf"
if [ -z $1]
then
echo "No argument given, going to try to run ../main.nf"
else
script_path=$1
fi

data_path=$SNIC_NOBACKUP
if [ -d "./test_data" ]
then
data_path="./test_data"
echo "Found data directory in current working directory, using ./test_data/"
fi

curl --version >/dev/null 2>&1 || { echo >&2 "I require curl, but it's not installed. Aborting."; exit 1; }
tar --version >/dev/null 2>&1 || { echo >&2 "I require tar, but it's not installed. Aborting."; exit 1; }
nextflow -v >/dev/null 2>&1 || { echo >&2 "I require nextflow, but it's not installed. If you hava Java, run 'curl -fsSL get.nextflow.io | bash'. If not, install Java."; exit 1; }

data_dir=${data_path}/ngi-bisulfite_test_set
if [ -d $data_dir ]
then
echo "Found existing test set, using $data_dir"
else
echo "Downloading test set..."
curl https://export.uppmax.uu.se/b2013064/test-data/ngi-bisulfite_test_set.tar.bz2 > ${data_path}/ngi-bisulfite_test_set.tar.bz2
echo "Unpacking test set..."
tar xvjf ${data_path}/ngi-bisulfite_test_set.tar.bz2 -C ${data_path}
echo "Done"
fi

run_name="Test RNA Run: "$(date +%s)

cmd="nextflow run $script_path -resume -name \"$run_name\" -profile devel --bismark_index ${data_dir}/references/BismarkIndex/ --singleEnd --reads \"${data_dir}/*.fastq.gz\""
echo "Starting nextflow... Command:"
echo $cmd
echo "-----"
eval $cmd

0 comments on commit e2bfd12

Please sign in to comment.