Skip to content

Commit

Permalink
Merge pull request #4 from IARCbioinfo/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
nalcala authored Aug 4, 2020
2 parents 83de666 + c9291c6 commit 6697e6d
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 16 deletions.
7 changes: 4 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ jobs:
- run: cd ~ && git clone -b v2.3 https://github.com/iarcbioinfo/RNAseq-nf.git
- run: cd ; nextflow run RNAseq-nf/ -profile docker --input_folder ~/data_test/FASTQ/ --output_folder BAM_realigned --ref_folder ~/data_test/REF --gtf ~/data_test/REF/TP53_small.gtf --bed ~/data_test/BED/TP53_small.bed --cpu 2 --mem 4
- run: cd ; nextflow run ~/project/ --help
- run: cd ; docker pull trinityctat/starfusion:1.8.1
- run: cd ; nextflow run ~/project/ -with-docker trinityctat/starfusion:1.8.1 --input_folder ~/data_test/FASTQ/ --output_folder RNAseq-fusion-out --CTAT_folder ~/data_test/REF/ctat_genome_lib_build_dir_TP53/ --fastq_ext fastq.gz --cpu 2 --mem 4 -with-dag dag.html
- run: cd ; nextflow run ~/project/ -with-docker trinityctat/starfusion:1.8.1 --input_folder ~/data_test/FASTQ/ --output_folder RNAseq-fusion-out --CTAT_folder ~/data_test/REF/ctat_genome_lib_build_dir_TP53/ --fastq_ext fastq.gz --cpu 2 --mem 4 -resume -with-dag dag.png
- run: cd ; docker pull trinityctat/starfusion:1.9.0
- run: cd ; nextflow run ~/project/ -with-docker trinityctat/starfusion:1.9.0 --input_folder ~/data_test/FASTQ/ --output_folder RNAseq-fusion-out --CTAT_folder ~/data_test/REF/ctat_genome_lib_build_dir_TP53/ --fastq_ext fastq.gz --cpu 2 --mem 4 -with-dag dag.html
- run: cd ; nextflow run ~/project/ -with-docker trinityctat/starfusion:1.9.0 --input_folder ~/data_test/FASTQ/ --output_folder RNAseq-fusion-out --CTAT_folder ~/data_test/REF/ctat_genome_lib_build_dir_TP53/ --fastq_ext fastq.gz --cpu 2 --mem 4 -resume -with-dag dag.png
- run: cd ; echo -e 'SM\tpair1\tpair2\tjunction\nNA06984\tdata_test/FASTQ/NA06984_T_1.fastq.gz\tdata_test/FASTQ/NA06984_T_2.fastq.gz\tnone\nNA06984_2RG\tdata_test/FASTQ/NA06984_T_RG1_1.fastq.gz\tdata_test/FASTQ/NA06984_T_RG1_2.fastq.gz\tnone\nNA06984_2RG\tdata_test/FASTQ/NA06984_T_RG2_1.fastq.gz\tdata_test/FASTQ/NA06984_T_RG2_2.fastq.gz\tnone' > input.txt ; nextflow run ~/project/ -with-docker trinityctat/starfusion:1.9.0 --input_file input.txt --output_folder RNAseq-fusion-out --CTAT_folder ~/data_test/REF/ctat_genome_lib_build_dir_TP53/ --fastq_ext fastq.gz --cpu 2 --mem 4
- run: cd ; cp ~/dag.* ~/project/.
- add_ssh_keys:
fingerprints:
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ In addition, STAR-Fusion requires a [CTAT bundle](https://data.broadinstitute.or
| --CTAT_folder |. | Folder with STAR-Fusion bundle (CTAT) |



* #### Optional
| Name | Default value | Description |
|-----------|---------------|-----------------|
| --input_file | NULL | Input file (comma-separated) with 4 columns: SM(sample name), pair1 (path to fastq pair 1), pair2 (path to fastq pair 2), and junction (path to junction file) |
| --output_folder | results_fusion | Output folder |
| --fastq_ext | fq.gz | Extension of fastq files |
| --suffix1 | _1 | Suffix of 1st element of fastq files pair |
Expand All @@ -49,6 +49,7 @@ In addition, STAR-Fusion requires a [CTAT bundle](https://data.broadinstitute.or
| --cpu | 2 | Number of cpu used by bwa mem and sambamba |
| --mem | 2 | Size of memory used for mapping (in GB)|

Note: using the input_file mode allows to specify multiple fastq files for a given sample, that are merged during the alignment phase.

* #### Flags

Expand All @@ -59,10 +60,11 @@ Flags are special parameters without value.
| --junctions | Option to use STAR junction files already generated |
| --help | Display help |

Note: when the --junctions option is not used, the junction column of the input file is ignored.

## Usage
```
nextflow run iarcbioinfo/RNAseq-fusion-nf -r v1.0 -profile singularity --input_folder input --CTAT_folder CTAT --output_folder output
nextflow run iarcbioinfo/RNAseq-fusion-nf -r v1.1 -profile singularity --input_folder input --CTAT_folder CTAT --output_folder output
```

To run the pipeline without singularity just remove "-profile singularity"; you can also directly download a singularity image at https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/STAR-Fusion/ using the command `singularity pull https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/STAR-Fusion/star-fusion.v1.9.0.simg`. Alternatively, one can run the pipeline using a docker container (-profile docker) the conda receipe containing all required dependencies (-profile conda).
Expand All @@ -83,7 +85,7 @@ To run the pipeline without singularity just remove "-profile singularity"; you

| Name | Email | Description |
|-----------|---------------|-----------------|
| Nicolas Alcala | alcalan@fellows.iarc.fr | Developer to contact for support |
| Nicolas Alcala | [email protected] | Developer to contact for support |

## References
Haas, B. J., Dobin, A., Li, B., Stransky, N., Pochet, N., & Regev, A. (2019). Accuracy assessment of fusion transcript detection via read-mapping and de novo fusion transcript assembly-based methods. Genome biology, 20(1), 213.
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ channels:
- conda-forge
- defaults
dependencies:
- star-fusion=1.8.1
- star-fusion=1.9.0
- python=3.6
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ profiles {
}
docker {
docker.enabled = true
process.container = 'trinityctat/starfusion:1.8.1'
process.container = 'trinityctat/starfusion:1.9.0'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
process.container = 'docker://trinityctat/starfusion:1.8.1'
process.container = 'docker://trinityctat/starfusion:1.9.0'
pullTimeout = "200 min"
}
}
Expand Down
33 changes: 26 additions & 7 deletions rnaseq-fusion.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

params.CTAT_folder = '.'
params.input_folder = '.'
params.input_file = null
params.output_folder= "results_fusion"
params.mem = 2
params.cpu = 2
Expand All @@ -31,7 +32,7 @@ params.help = null

log.info ""
log.info "--------------------------------------------------------"
log.info " rnaseq-fusion-nf v1.0: nextflow pipeline to run STAR-fusion "
log.info " rnaseq-fusion-nf v1.1: nextflow pipeline to run STAR-fusion "
log.info "--------------------------------------------------------"
log.info "Copyright (C) IARC/WHO"
log.info "This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE"
Expand All @@ -42,16 +43,19 @@ log.info ""

if (params.help) {
log.info "--------------------------------------------------------"
log.info " USAGE nextflow run rnaseq-fusion-nf --input_folder fastq/ --CTAT_folder GRCh38_CTAT/ "
log.info " USAGE "
log.info "--------------------------------------------------------"
log.info ""
log.info "nextflow run iarcbioinfo/rnaseq-transcript-nf [-with-docker] [OPTIONS]"
log.info "nextflow run IARCbioinfo/RNAseq-fusion-nf --input_folder fastq/ --CTAT_folder GRCh38_CTAT/ [-with-docker] [OPTIONS]"
log.info ""
log.info "Mandatory arguments:"
log.info ' --input_folder FOLDER Folder containing fastq files and STAR junction files.'
log.info ' --CTAT_folder FOLDER Folder with STAR-Fusion bundle (CTAT).'
log.info ""
log.info "Optional arguments:"
log.info ' --input_file STRING Input file (comma-separated) with 4 columns:'
log.info ' SM(sample name), pair1 (path to fastq pair 1), '
log.info ' pair2 (path to fastq pair 2), and junction (path to junction file).'
log.info ' --output_folder STRING Output folder (default: results_fusion).'
log.info ' --fastq_ext STRING Extension of fastq files (default: fq.gz).'
log.info ' --suffix1 STRING Suffix of 1st element of fastq files pair (default: _1).'
Expand All @@ -62,12 +66,13 @@ if (params.help) {
log.info ' --mem INTEGER Size of memory used for mapping (in GB) (default: 2).'
log.info ""
log.info "Flags:"
log.info "--junctions Option to use STAR junction files (default: null)."
log.info " --junctions Option to use STAR junction files (default: null)."
log.info ""
exit 0
} else {
/* Software information */
log.info "input_folder = ${params.input_folder}"
log.info "input_file = ${params.input_file}"
log.info "cpu = ${params.cpu}"
log.info "mem = ${params.mem}"
log.info "output_folder = ${params.output_folder}"
Expand All @@ -83,6 +88,13 @@ if (params.help) {


// Gather paired fastq files
if(params.input_file){
input_triplet = Channel.fromPath("${params.input_file}")
.splitCsv(header: true, sep: '\t', strip: true)
.map { row -> [row.SM , file(row.pair1), file(row.pair2), file(row.junction) ] }
.groupTuple(by: 0)
.map { row -> [row[0] , row[1], row[2], row[3][0] ] }
}else{
readPairs = Channel.fromFilePairs(params.input_folder +"/*{${params.suffix1},${params.suffix2}}" +'.'+ params.fastq_ext)
.map { row -> [ row[0], row[1][0], row[1][1] ] }

Expand All @@ -103,9 +115,9 @@ if (params.help) {
}else{
println "Do not gather STAR junction files; STAR will be used for alignment"
input_triplet = readPairs.map { pairs -> [ pairs[0],pairs[1], pairs[2], 'NO_FILE' ] }
}
}


process STAR_Fusion {
cpus params.cpu
memory params.mem+'G'
Expand All @@ -127,7 +139,14 @@ process STAR_Fusion {
}else{
SF_junction=" "
}
input_txt="${file_tag}\t${pair1[0]}\t${pair2[0]}"
if(pair1 instanceof List) {
for( i = 1; i < pair1.size(); i++){
input_txt=input_txt+"\n${file_tag}\t${pair1[i]}\t${pair2[i]}"
}
}
'''
!{params.starfusion_path} --genome_lib_dir $PWD/!{CTAT_folder} !{SF_junction} --left_fq !{pair1} --right_fq !{pair2} --output_dir . --FusionInspector validate --denovo_reconstruct --examine_coding_effect --CPU !{params.cpu}
echo '!{input_txt}' > input.txt
!{params.starfusion_path} --genome_lib_dir $PWD/!{CTAT_folder} !{SF_junction} --samples_file input.txt --output_dir . --FusionInspector validate --denovo_reconstruct --examine_coding_effect --CPU !{params.cpu}
'''
}
}

0 comments on commit 6697e6d

Please sign in to comment.