From cd1e9d0384bdef24c8bc203cd85ed80d0dce5b3f Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Tue, 9 Aug 2022 14:15:25 +0100
Subject: [PATCH 1/8] Initial nextflow commit

---
 nextflow/main.nf         | 470 +++++++++++++++++++++++++++++++++++++++
 nextflow/nextflow.config |  76 +++++++
 2 files changed, 546 insertions(+)
 create mode 100644 nextflow/main.nf
 create mode 100644 nextflow/nextflow.config
diff --git a/nextflow/main.nf b/nextflow/main.nf
new file mode 100644
index 0000000..7247c61
--- /dev/null
+++ b/nextflow/main.nf
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2014-2022 Genome Research Ltd
+ *
+ * Author: CASM/Cancer IT <cgphelp@sanger.ac.uk>
+ *
+ * This file is part of CaVEMan.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * 1. The usage of a range of years within a copyright statement contained within
+ * this distribution should be interpreted as being equivalent to a list of years
+ * including the first and last year specified and all consecutive years between
+ * them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
+ * 2009, 2011-2012’ should be interpreted as being identical to a statement that
+ * reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
+ * statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
+ * identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
+ * 2009, 2010, 2011, 2012’.
+ */
+
+nextflow.enable.dsl=2
+
+def helpMessage() {
+    log.info """
+    Usage:
+      nextflow run https://github.com/cancerit/CaVEMan -entry <entry-point> --help
+    Available entry points:
+      - caveman
+      - caveman_np
+    """.stripIndent()
+}
+
+def helpCavemanMessage() {
+    log.info """
+    Usage:
+      nextflow run https://github.com/cancerit/CaVEMan -entry caveman --help
+  Required parameters:
+    --outdir        Folder to output result to.
+    --genomefa      Path to reference index genome file *.fa.fai[.gz]
+    --mutBam        Tumour BAM/CRAM file (co-located index and bas files)
+    --controlBam    Normal BAM/CRAM file (co-located index and bas files)
+    --ignoreContigs Location of tsv ignore regions file
+
+
+
+    --simrep      Full path to tabix indexed simple/satellite repeats.
+    --filter      VCF filter rules file (see FlagVcf.pl for details)
+    --genes       Full path to tabix indexed coding gene footprints.
+    --unmatched   Full path to tabix indexed gff3 of unmatched normal panel
+                   - see pindel_np_from_vcf.pl
+  Optional
+    --seqtype    Sequencing protocol, expect all input to match [WGS]
+    --assembly   Name of assembly in use
+                  -  when not available in BAM header SQ line.
+    --species    Species
+                  -  when not available in BAM header SQ line.
+    --exclude    Exclude this list of ref sequences from processing, wildcard '%'
+                  - comma separated, e.g. NC_007605,hs37d5,GL%
+    --badloci    Tabix indexed BED file of locations to not accept as anchors
+                  - e.g. hi-seq depth from UCSC
+    --skipgerm   Don't output events with more evidence in normal BAM.
+    --softfil    VCF filter rules to be indicated in INFO field as soft flags
+    --limit      When defined with '-cpus' internally thread concurrent processes.
+                  - requires '-p', specifically for pindel/pin2vcf steps
+    --debug      Don't cleanup workarea on completion.
+    --apid       Analysis process ID (numeric) - for cgpAnalysisProc header info
+                  - not necessary for external use
+    """.stripIndent()
+}
+
+def get_chr_count (genome, ignorecontigs) {
+  echo "$genome\t$ignorecontigs"
+}
+
+
+process setup {
+
+  input:
+    //setup
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+
+    //Optional setup - or empty file and blanket CN
+    val normcn
+    val tumcn
+    //tumcn
+    val configF
+    val algBeanF
+    path ('results')
+    val splitList
+    val includeSW
+    val includeSE
+    val includeDups
+
+  output:
+    //setup outputs
+    path "${configF}", emit: configFile
+    path "${algBeanF}", emit: algBeanFile
+
+  log.info """\
+  CaVEMan:setup
+  """.stripIndent()
+
+  script:
+    def applySW = includeSW != "NO_SW" ? "-w $includeSW" : ''
+    def applySE = includeSE != "NO_SE" ? "-z $includeSE" : ''
+    def applyDup = includeDups != "NO_DUPS" ? "-u $includeDups" : ''
+    def applyNCN = normcn != "NO_NCN" ? "-j $normcn" : ''
+    def applyTCN = tumcn != "NO_TCN" ? "-e $tumcn" : ''
+    """ 
+    caveman setup \
+    ${applySW} \
+    ${applySE} \
+    ${applyDup} \
+    ${applyNCN} \
+    ${applyTCN} \
+    -l $splitList \
+    -a $algBeanF \
+    -c $configF \
+    -t mt.bam \
+    -n wt.bam \
+    -r genome.fa.fai \
+    -g ign.file \
+    -f results \
+    """
+}
+
+process split {
+
+  input:
+    path configFile
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+    each index
+    val maxSplitReadNum
+    val splitList
+
+  output:
+    path "${splitList}*": emit splitFiles
+
+  log.info """\
+  CaVEMan:split
+  """.stripIndent()
+
+  script:
+    """
+    caveman split \
+    -f $configFile \
+    -i $index \
+    -e $maxSplitReadNum \
+    """
+}
+
+process split_concat {
+  input:
+    path 'splitList.*'
+    path splitList
+
+  output:
+    path $splitList, emit: mergedSplitList
+
+  log.info """\
+  CaVEMan:split_concat
+  """.stripIndent()
+
+  script:
+  """
+  cat splitList.* > $splitList
+  """
+}
+
+process mstep {
+  input:
+    path configFile
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+    each index
+    val mstepSplitSize
+    val mstepMinBaseQual
+    val splitList
+    path ('results')
+
+  output:
+
+  log.info """\
+  CaVEMan:mstep
+  """.stripIndent()
+
+  script:
+    """
+    caveman mstep \
+    -i $index \
+    -f $configFile \
+    -a $mstepSplitSize \
+    -m $mstepMinBaseQual \
+    """
+
+}
+//    -i jobindex [-f path] [-c int] [-m int] [-e int] 
+//   split -i %d -f %s -e %d
+//   caveman split -i jobindex [-f path] [-m int] [-e int] 
+
+// -i  --index [int]                 Job index (e.g. from $LSB_JOBINDEX)
+
+// Optional
+// -f  --config-file [file]          Path to the config file produced by setup [default:'caveman.cfg.ini'].
+// -e  --read-count [int]            Guide for maximum read count in a section [default:350000]
+// -h    help                          Display this usage information.
+/*}
+
+process split_concat {
+	// # uncoverable subroutine
+	// my $options = shift;
+	// my $tmp = $options->{'tmp'};
+	// my $out = $options->{'out_file'};
+	// my $target = $options->{'target_files'};
+	// return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
+	// my $command = sprintf('cat %s > %s',$target,$out);
+	// PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
+
+	// return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
+}
+
+process mstep {
+// mstep -i %d -f %s
+// caveman mstep -i jobindex [-f file] [-m int] [-a int]
+
+// -i  --index [int]                 Job index.
+
+// Optional
+// -f  --config-file [file]          Path to the config file produced by setup [default: 'caveman.cfg.ini'].
+// -a  --split_size [int]            Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
+// -m  --min-base-qual [int]         Minimum base quality for inclusion of a read position in analysis [default:11]
+// -h	help                          Display this usage information.
+
+}
+
+process merge {
+// merge -c %s -p %s -f %s
+// caveman merge [-f file] [-c file] [-p file] 
+
+// Optional
+// -f  --config-file file               Path to the config file produced by setup. [default: 'caveman.cfg.ini']
+
+// -c  --covariate-file filename        Location to write merged covariate array [default: covs_arr]
+// -p  --probabilities-file filename    Location to write probability array [default: probs_arr]
+// -h	help                             Display this usage information.
+}
+
+process estep {
+// estep -i %d -k %f -g %s -o %s -v %s -w %s -f %s -l %s -r %s
+// const my $CAVEMAN_ESTEP_MUT_PRIOR_EXT => q{ -c %s};
+// const my $CAVEMAN_ESTEP_SNP_PRIOR_EXT => q{ -d %s};
+// const my $CAVEMAN_ESTEP_NPLATFORM_EXT => q{ -P %s};
+// const my $CAVEMAN_ESTEP_TPLATFORM_EXT => q{ -T %s};
+// caveman estep -i jobindex [-f file] [-m int] [-k float] [-b float] [-p float] [-q float] [-x int] [-y int] [-c float] [-d float] [-a int]
+
+// -i  --index [int]                                Job index (e.g. from $LSB_JOBINDEX)
+
+// Optional
+// -f  --config-file [file]                         Path to the config file produced by setup. [default:'caveman.cfg.ini']
+// -m  --min-base-qual [int]                        Minimum base quality for inclusion of a read position [default:11]
+// -c  --prior-mut-probability [float]              Prior somatic probability [default:0.000006]
+// -d  --prior-snp-probability [float]              Prior germline mutant probability [default:0.000100]
+// -k  --normal-contamination [float]               Normal contamination of tumour [default:0.100000]
+// -b  --reference-bias [float]                     Reference bias [default:0.950000]
+// -p  --mut-probability-cutoff [float]             Minimum probability call for a somatic mutant position to be output [default:0.800000]
+// -q  --snp-probability-cutoff [float]             Minimum probability call for a germline mutant position to be output [default:0.950000]
+// -x  --min-tum-coverage [int]                     Minimum tumour coverage for analysis of a position [default:1]
+// -y  --min-norm-coverage [int]                    Minimum normal coverage for analysis of a position [default:1]
+// -a  --split-size [int]                           Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
+// -s  --debug                                      Adds an extra output to a debug file. Every base analysed has an output
+// -g  --cov-file [file]                            File location of the covariate array. [default:'covs_arr']
+// -o  --prob-file [file]                           File location of the prob array. [default:'probs_arr']
+// -v  --species-assembly [string]                  Species assembly (eg 37/GRCh37), required if bam header SQ lines do not contain AS and SP information.
+// -w  --species [string]                           Species name (eg Human), required if bam header SQ lines do not contain AS and SP information.
+// -n  --normal-copy-number [int]                   Copy number to use when filling gaps in the normal copy number file [default:2].
+// -t  --tumour-copy-number [int]                   Copy number to use when filling gaps in the tumour copy number file [default:2].
+// -l  --normal-protocol [string]                   Normal protocol. Ideally this should match -r but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
+// -r  --tumour-protocol [string]                   Tumour protocol. Ideally this should match -l but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
+// -P  --normal-platform [string]                   Normal platform. Overrides the values retrieved from bam header.
+// -T  --tumour-platform [string]                   Tumour platform. Overrides the values retrieved from bam header.
+// -M  --max-copy-number [int]                      Maximum copy number permitted. If exceeded the copy number for the offending region will be set to this value. [default:10].
+// -h	help                                         Display this usage information.
+
+}
+
+process merge_results {
+// mergeCavemanResults -s %s -o %s -f %s
+// Usage:
+//     mergeCavemanResults [options] [file(s)...]
+
+//     Required parameters: --output -o File to output result to. --splitlist
+//     -s File containing list of split section --file-match -f ls style
+//     pattern match of files to be merged. e.g. 
+//     --help -h Brief help message.
+}
+
+process add_ids {
+//   cgpAppendIdsToVcf.pl -i %s -o %s 
+//   cgpAppendIdsToVcf.pl --help
+// Usage
+
+// Usage:
+//     cgpAppendIdsToVcf.pl [-h] -i this.vcf -o this_with_ids.vcf
+
+//       General Options:
+
+//       --help      (-h)       Brief documentation
+
+//             --file      (-i)       The file to append IDs to.
+
+//             --outFile   (-o)       The output filename
+
+//             Optional parameters:
+
+//             --idstart   (-g)       Will set a sequential id generator to the given integer value. If not present will assign UUIDs to each variant.
+
+//             --version   (-v)       Prints version information.
+
+//       Examples:
+
+//         cgpAppendIdsToVcf.pl -f this.vcf -o this_with_ids.vcf -g 1
+}
+
+process flag {
+  cgpFlagCaVEMan.pl -i %s -o %s -s %s -m %s -n %s -b %s -g %s -umv %s -ref %s -t %s -sa %s
+}
+*/
+
+// Print help if no workflow specified
+workflow { 
+    if ( params.help ) {
+        helpMessage()
+        exit 0
+    }
+}
+
+workflow caveman {
+  //wf for running caveman
+  //Help message
+  if ( params.help ) {
+    helpCavemanMessage()
+    exit 0
+  }
+  log.info """\
+    CaVEMan:caveman - NF Pipeline
+    ------------------------------
+    //setup
+    genomefa: ${params.genomefa}
+    mutBam: ${params.mutBam}
+    controlBam: ${params.controlBam}
+    ignoreContigs: ${params.ignoreContigs}
+  """.stripIndent()
+
+  main:
+    genome = tuple file(params.genomefa), file("${params.genomefa}.fai")
+    mt = tuple file(params.mutBam), file("${params.mutBam}.bai")
+    wt = tuple file(params.controlBam), file("${params.controlBam}.bai")
+    ign_file = file(params.ignoreContigs)
+    results = file(params.resultsDir)
+    
+    subCaVEMan(
+      //setup
+      genome,
+      mt,
+      wt,
+      ign_file,
+
+      //Optional setup - or empty file and blanket CN
+      params.normcn,
+      params.tumcn,
+      params.configF,
+      params.algBeanF,
+      results,
+      params.splitList,
+      params.includeSW,
+      params.includeSE,
+      params.includeDups,
+
+      params.maxSplitReadNum
+    )
+}
+
+workflow subCaVEMan {
+  //sub workflow, params and help in entrypoint wf
+  take:
+    //setup
+    genome
+    mt
+    wt
+    ign_file
+
+    //Optional setup - or empty file and blanket CN
+    normcn
+    tumcn
+    configF
+    algBeanF
+    results
+    splitList
+    includeSW
+    includeSE
+    includeDups
+
+    //Optional spliut 
+    maxSplitReadNum
+
+  main:
+    setup(
+      genome,
+      mt,
+      wt,
+      ign_file,
+      normcn,
+      tumcn,
+      configF,
+      algBeanF,
+      results,
+      splitList,
+      includeSW,
+      includeSE,
+      includeDups
+    )
+    // contigs(
+    //   genome
+    // )
+    Channel.of(1..file("${params.genomefa}.fai").countLines()).set({contig_index})
+    split(
+      setup.out.configFile,
+      genome,
+      mt,
+      wt,
+      ign_file,
+      contig_index,
+      maxSplitReadNum,
+      splitList
+    ) | collect | 
+    split_concat(
+
+    )
+    mstep(
+      setup.out.configFile,
+      genome,
+      mt,
+      wt,
+      ign_file,
+      split.out.splitFiles,
+      mstepSplitSize,
+      mstepMinBaseQual,
+      splitList,
+      results
+    )
+}
diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config
new file mode 100644
index 0000000..5e85dff
--- /dev/null
+++ b/nextflow/nextflow.config
@@ -0,0 +1,76 @@
+params {
+  
+  //Generic - help etc
+  help = false
+
+  //Required
+  genomefa = null
+  mutBam = null
+  controlBam = null
+  ignoreContigs = null
+
+  //Optional setup - or empty file and blanket CN
+  normcn = "NO_NCN"
+  tumcn = "NO_TCN"
+  configF = "caveman.cfg.ini"
+  algBeanF = "alg_bean"
+  resultsDir = "./results"
+  splitList = "splitList"
+  includeSW = "NO_SW"
+  includeSE = "NO_SE"
+  includeDups = "NO_DUPS"
+
+  //Optional split 
+  maxSplitReadNum = 350000
+
+  //Optional mstep
+  mstepSplitSize = 50000
+  mstepMinBaseQual = 11
+
+
+}
+
+/*
+process {
+
+  //container = "quay.io/wtsicgp/cgpcavemanwrapper:1.18.1"
+
+  withName: 'setup' {
+    cpus = 1
+    memory = 250 MB
+  }
+
+  withName: 'split' {
+
+  }
+
+  withName: 'split_concat' {
+
+  }
+
+  withName: 'mstep' {
+
+  }
+
+  withName: 'merge' {
+
+  }
+
+  withName: 'estep' {
+
+  }
+
+  withName: 'merge_results' {
+
+  }
+
+  withName: 'add_ids' {
+
+  }
+
+  withName: 'flag' {
+    
+  }
+
+}
+*/

From 47ec458fe703bd80d6fbfc98ea955df22a3b0e1a Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Tue, 9 Aug 2022 14:15:42 +0100
Subject: [PATCH 2/8] Add nextflow to gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 9c9e155..b801e6c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@
 setup.log
 /tests/*.dSYM
 /install
+.vstags
+.nextflow

From a28711814ace74b2f146affe07a03dfc5332a196 Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Tue, 9 Aug 2022 14:17:59 +0100
Subject: [PATCH 3/8] Remove CWD from config file. Unused commandline args in
 split

---
 src/config_file_access.c | 6 +++---
 src/split.c              | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/config_file_access.c b/src/config_file_access.c
index c56b270..2c46e59 100644
--- a/src/config_file_access.c
+++ b/src/config_file_access.c
@@ -159,9 +159,9 @@ int config_file_access_write_config_file(FILE *file, char *tum_bam_file, char *n
 	check_mem(curr_wd);
 	char *chk = getcwd(curr_wd,size);
 	check(chk!=NULL,"Error retrieving cwd for config file.");
-	int res = fprintf(file,"%s=%s\n",CWD,curr_wd);
-	check(res>=0,"Error writing cwd to config file.");
-	res = fprintf(file,"%s=%s\n",MUT_TUM,tum_bam_file);
+	//int res = fprintf(file,"%s=%s\n",CWD,curr_wd);
+	//check(res>=0,"Error writing cwd to config file.");
+	int res = fprintf(file,"%s=%s\n",MUT_TUM,tum_bam_file);
 	check(res>=0,"Error writing mutant bam file loc to config file.");
 	res = fprintf(file,"%s=%s\n",NORM_TUM,norm_bam_file);
 	check(res>=0,"Error writing normal bam file loc to config file.");
diff --git a/src/split.c b/src/split.c
index 1257fc7..536b270 100644
--- a/src/split.c
+++ b/src/split.c
@@ -69,7 +69,7 @@ static char tum_cn_loc[512];
 static int idx = 0;
 
 void split_print_usage (int exit_code){
-    printf ("Usage: caveman split -i jobindex [-f path] [-c int] [-m int] [-e int] \n\n");
+    printf ("Usage: caveman split -i jobindex [-f path] [-e int] \n\n");
     printf("-i  --index [int]                 Job index (e.g. from $LSB_JOBINDEX)\n\n");
     printf("Optional\n");
     printf("-f  --config-file [file]          Path to the config file produced by setup [default:'%s'].\n",config_file);
@@ -82,8 +82,6 @@ int split_setup_options(int argc, char *argv[]){
     const struct option long_opts[] =
     {
         {"config-file", required_argument, 0, 'f'},
-        {"increment", required_argument, 0, 'c'},
-        {"max-read-count",required_argument , 0, 'm'},
         {"read-count", required_argument, 0, 'e'},
         {"index", required_argument, 0, 'i'},
         {"help", no_argument, 0, 'h'},

From e78f7ca8fe64ace100d276aaf0388b7f3212121d Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Tue, 9 Aug 2022 14:18:48 +0100
Subject: [PATCH 4/8] Initial nextflow commit

---
 nextflow/main.nf | 38 +-------------------------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/nextflow/main.nf b/nextflow/main.nf
index 7247c61..5b8715a 100644
--- a/nextflow/main.nf
+++ b/nextflow/main.nf
@@ -213,44 +213,8 @@ process mstep {
     """
 
 }
-//    -i jobindex [-f path] [-c int] [-m int] [-e int] 
-//   split -i %d -f %s -e %d
-//   caveman split -i jobindex [-f path] [-m int] [-e int] 
 
-// -i  --index [int]                 Job index (e.g. from $LSB_JOBINDEX)
-
-// Optional
-// -f  --config-file [file]          Path to the config file produced by setup [default:'caveman.cfg.ini'].
-// -e  --read-count [int]            Guide for maximum read count in a section [default:350000]
-// -h    help                          Display this usage information.
-/*}
-
-process split_concat {
-	// # uncoverable subroutine
-	// my $options = shift;
-	// my $tmp = $options->{'tmp'};
-	// my $out = $options->{'out_file'};
-	// my $target = $options->{'target_files'};
-	// return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
-	// my $command = sprintf('cat %s > %s',$target,$out);
-	// PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
-
-	// return PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
-}
-
-process mstep {
-// mstep -i %d -f %s
-// caveman mstep -i jobindex [-f file] [-m int] [-a int]
-
-// -i  --index [int]                 Job index.
-
-// Optional
-// -f  --config-file [file]          Path to the config file produced by setup [default: 'caveman.cfg.ini'].
-// -a  --split_size [int]            Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
-// -m  --min-base-qual [int]         Minimum base quality for inclusion of a read position in analysis [default:11]
-// -h	help                          Display this usage information.
-
-}
+/*
 
 process merge {
 // merge -c %s -p %s -f %s

From 9e3b3c12fff5c5bdb8fd07c45a128e3095edeeb6 Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Wed, 17 Aug 2022 11:38:51 +0100
Subject: [PATCH 5/8] nf working up to beginngin of mstep

---
 nextflow/main.nf                 | 299 ++++++-------------------------
 nextflow/modules/caveman-core.nf | 237 ++++++++++++++++++++++++
 nextflow/nextflow.config         |   1 +
 3 files changed, 290 insertions(+), 247 deletions(-)
 create mode 100644 nextflow/modules/caveman-core.nf

diff --git a/nextflow/main.nf b/nextflow/main.nf
index 5b8715a..a49aaf2 100644
--- a/nextflow/main.nf
+++ b/nextflow/main.nf
@@ -79,236 +79,25 @@ def helpCavemanMessage() {
     """.stripIndent()
 }
 
-def get_chr_count (genome, ignorecontigs) {
-  echo "$genome\t$ignorecontigs"
-}
-
-
-process setup {
-
-  input:
-    //setup
-    tuple path('genome.fa'), path('genome.fa.fai')
-    tuple path('mt.bam'), path('mt.bam.bai')
-    tuple path('wt.bam'), path('wt.bam.bai')
-    path ('ign.file')
-
-    //Optional setup - or empty file and blanket CN
-    val normcn
-    val tumcn
-    //tumcn
-    val configF
-    val algBeanF
-    path ('results')
-    val splitList
-    val includeSW
-    val includeSE
-    val includeDups
-
-  output:
-    //setup outputs
-    path "${configF}", emit: configFile
-    path "${algBeanF}", emit: algBeanFile
-
-  log.info """\
-  CaVEMan:setup
-  """.stripIndent()
-
-  script:
-    def applySW = includeSW != "NO_SW" ? "-w $includeSW" : ''
-    def applySE = includeSE != "NO_SE" ? "-z $includeSE" : ''
-    def applyDup = includeDups != "NO_DUPS" ? "-u $includeDups" : ''
-    def applyNCN = normcn != "NO_NCN" ? "-j $normcn" : ''
-    def applyTCN = tumcn != "NO_TCN" ? "-e $tumcn" : ''
-    """ 
-    caveman setup \
-    ${applySW} \
-    ${applySE} \
-    ${applyDup} \
-    ${applyNCN} \
-    ${applyTCN} \
-    -l $splitList \
-    -a $algBeanF \
-    -c $configF \
-    -t mt.bam \
-    -n wt.bam \
-    -r genome.fa.fai \
-    -g ign.file \
-    -f results \
-    """
-}
-
-process split {
-
-  input:
-    path configFile
-    tuple path('genome.fa'), path('genome.fa.fai')
-    tuple path('mt.bam'), path('mt.bam.bai')
-    tuple path('wt.bam'), path('wt.bam.bai')
-    path ('ign.file')
-    each index
-    val maxSplitReadNum
-    val splitList
-
-  output:
-    path "${splitList}*": emit splitFiles
-
-  log.info """\
-  CaVEMan:split
-  """.stripIndent()
-
-  script:
-    """
-    caveman split \
-    -f $configFile \
-    -i $index \
-    -e $maxSplitReadNum \
-    """
-}
-
-process split_concat {
-  input:
-    path 'splitList.*'
-    path splitList
-
-  output:
-    path $splitList, emit: mergedSplitList
-
-  log.info """\
-  CaVEMan:split_concat
-  """.stripIndent()
-
-  script:
-  """
-  cat splitList.* > $splitList
-  """
-}
-
-process mstep {
-  input:
-    path configFile
-    tuple path('genome.fa'), path('genome.fa.fai')
-    tuple path('mt.bam'), path('mt.bam.bai')
-    tuple path('wt.bam'), path('wt.bam.bai')
-    path ('ign.file')
-    each index
-    val mstepSplitSize
-    val mstepMinBaseQual
-    val splitList
-    path ('results')
-
-  output:
-
-  log.info """\
-  CaVEMan:mstep
-  """.stripIndent()
-
-  script:
-    """
-    caveman mstep \
-    -i $index \
-    -f $configFile \
-    -a $mstepSplitSize \
-    -m $mstepMinBaseQual \
-    """
-
-}
-
-/*
-
-process merge {
-// merge -c %s -p %s -f %s
-// caveman merge [-f file] [-c file] [-p file] 
-
-// Optional
-// -f  --config-file file               Path to the config file produced by setup. [default: 'caveman.cfg.ini']
-
-// -c  --covariate-file filename        Location to write merged covariate array [default: covs_arr]
-// -p  --probabilities-file filename    Location to write probability array [default: probs_arr]
-// -h	help                             Display this usage information.
-}
-
-process estep {
-// estep -i %d -k %f -g %s -o %s -v %s -w %s -f %s -l %s -r %s
-// const my $CAVEMAN_ESTEP_MUT_PRIOR_EXT => q{ -c %s};
-// const my $CAVEMAN_ESTEP_SNP_PRIOR_EXT => q{ -d %s};
-// const my $CAVEMAN_ESTEP_NPLATFORM_EXT => q{ -P %s};
-// const my $CAVEMAN_ESTEP_TPLATFORM_EXT => q{ -T %s};
-// caveman estep -i jobindex [-f file] [-m int] [-k float] [-b float] [-p float] [-q float] [-x int] [-y int] [-c float] [-d float] [-a int]
-
-// -i  --index [int]                                Job index (e.g. from $LSB_JOBINDEX)
-
-// Optional
-// -f  --config-file [file]                         Path to the config file produced by setup. [default:'caveman.cfg.ini']
-// -m  --min-base-qual [int]                        Minimum base quality for inclusion of a read position [default:11]
-// -c  --prior-mut-probability [float]              Prior somatic probability [default:0.000006]
-// -d  --prior-snp-probability [float]              Prior germline mutant probability [default:0.000100]
-// -k  --normal-contamination [float]               Normal contamination of tumour [default:0.100000]
-// -b  --reference-bias [float]                     Reference bias [default:0.950000]
-// -p  --mut-probability-cutoff [float]             Minimum probability call for a somatic mutant position to be output [default:0.800000]
-// -q  --snp-probability-cutoff [float]             Minimum probability call for a germline mutant position to be output [default:0.950000]
-// -x  --min-tum-coverage [int]                     Minimum tumour coverage for analysis of a position [default:1]
-// -y  --min-norm-coverage [int]                    Minimum normal coverage for analysis of a position [default:1]
-// -a  --split-size [int]                           Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
-// -s  --debug                                      Adds an extra output to a debug file. Every base analysed has an output
-// -g  --cov-file [file]                            File location of the covariate array. [default:'covs_arr']
-// -o  --prob-file [file]                           File location of the prob array. [default:'probs_arr']
-// -v  --species-assembly [string]                  Species assembly (eg 37/GRCh37), required if bam header SQ lines do not contain AS and SP information.
-// -w  --species [string]                           Species name (eg Human), required if bam header SQ lines do not contain AS and SP information.
-// -n  --normal-copy-number [int]                   Copy number to use when filling gaps in the normal copy number file [default:2].
-// -t  --tumour-copy-number [int]                   Copy number to use when filling gaps in the tumour copy number file [default:2].
-// -l  --normal-protocol [string]                   Normal protocol. Ideally this should match -r but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
-// -r  --tumour-protocol [string]                   Tumour protocol. Ideally this should match -l but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
-// -P  --normal-platform [string]                   Normal platform. Overrides the values retrieved from bam header.
-// -T  --tumour-platform [string]                   Tumour platform. Overrides the values retrieved from bam header.
-// -M  --max-copy-number [int]                      Maximum copy number permitted. If exceeded the copy number for the offending region will be set to this value. [default:10].
-// -h	help                                         Display this usage information.
-
-}
-
-process merge_results {
-// mergeCavemanResults -s %s -o %s -f %s
-// Usage:
-//     mergeCavemanResults [options] [file(s)...]
-
-//     Required parameters: --output -o File to output result to. --splitlist
-//     -s File containing list of split section --file-match -f ls style
-//     pattern match of files to be merged. e.g. 
-//     --help -h Brief help message.
-}
-
-process add_ids {
-//   cgpAppendIdsToVcf.pl -i %s -o %s 
-//   cgpAppendIdsToVcf.pl --help
-// Usage
-
-// Usage:
-//     cgpAppendIdsToVcf.pl [-h] -i this.vcf -o this_with_ids.vcf
-
-//       General Options:
-
-//       --help      (-h)       Brief documentation
-
-//             --file      (-i)       The file to append IDs to.
-
-//             --outFile   (-o)       The output filename
-
-//             Optional parameters:
-
-//             --idstart   (-g)       Will set a sequential id generator to the given integer value. If not present will assign UUIDs to each variant.
-
-//             --version   (-v)       Prints version information.
-
-//       Examples:
-
-//         cgpAppendIdsToVcf.pl -f this.vcf -o this_with_ids.vcf -g 1
+def getUsableContigIndices(genomeFai, ignoreContigs) {
+  fai_lines = file(genomeFai).readLines()
+  def contig_fai = []
+  for (fai_line in fai_lines){
+    contig_fai.push(fai_line.split("\\s+")[0])
+  }
+  contig_fai = contig_fai.reverse()
+  def ignore_contigs_list  = file(ignoreContigs).readLines()
+  def unique_fai = (contig_fai + ignore_contigs_list ) - contig_fai.intersect( ignore_contigs_list )
+  def indices = [] 
+  for(contig in unique_fai){
+    indices.add((contig_fai.findIndexOf { "$it" == "$contig" })+1)
+  }
+  return indices
 }
 
-process flag {
-  cgpFlagCaVEMan.pl -i %s -o %s -s %s -m %s -n %s -b %s -g %s -umv %s -ref %s -t %s -sa %s
-}
-*/
+include { setup; split; split_concat; mstep } from './modules/caveman-core.nf'
 
+//mstep; merge; estep; combine;
 // Print help if no workflow specified
 workflow { 
     if ( params.help ) {
@@ -332,14 +121,17 @@ workflow caveman {
     mutBam: ${params.mutBam}
     controlBam: ${params.controlBam}
     ignoreContigs: ${params.ignoreContigs}
+    ignoreRegions: ${params.ignoreRegions}
   """.stripIndent()
 
   main:
     genome = tuple file(params.genomefa), file("${params.genomefa}.fai")
     mt = tuple file(params.mutBam), file("${params.mutBam}.bai")
     wt = tuple file(params.controlBam), file("${params.controlBam}.bai")
-    ign_file = file(params.ignoreContigs)
+    ign_file = file(params.ignoreRegions)
     results = file(params.resultsDir)
+    splitF = file(params.splitList)
+    ignoreContigFile = file(params.ignoreContigs)
     
     subCaVEMan(
       //setup
@@ -347,19 +139,22 @@ workflow caveman {
       mt,
       wt,
       ign_file,
-
+      ignoreContigFile,
       //Optional setup - or empty file and blanket CN
       params.normcn,
       params.tumcn,
       params.configF,
       params.algBeanF,
       results,
-      params.splitList,
+      splitF,
       params.includeSW,
       params.includeSE,
       params.includeDups,
 
-      params.maxSplitReadNum
+      params.maxSplitReadNum,
+
+      params.mstepSplitSize,
+      params.mstepMinBaseQual
     )
 }
 
@@ -371,14 +166,14 @@ workflow subCaVEMan {
     mt
     wt
     ign_file
-
+    ignoreContigFile
     //Optional setup - or empty file and blanket CN
     normcn
     tumcn
     configF
     algBeanF
     results
-    splitList
+    splitF
     includeSW
     includeSE
     includeDups
@@ -386,6 +181,10 @@ workflow subCaVEMan {
     //Optional spliut 
     maxSplitReadNum
 
+    //Optional mstep
+    mstepSplitSize
+    mstepMinBaseQual
+
   main:
     setup(
       genome,
@@ -394,18 +193,16 @@ workflow subCaVEMan {
       ign_file,
       normcn,
       tumcn,
-      configF,
-      algBeanF,
-      results,
-      splitList,
+      // configF,
+      // algBeanF,
+      // results,
+      // splitF,
       includeSW,
       includeSE,
       includeDups
     )
-    // contigs(
-    //   genome
-    // )
-    Channel.of(1..file("${params.genomefa}.fai").countLines()).set({contig_index})
+    //Only want the indices of usable contigs - subtract contents of the ignoreContigsFile from the genome.fa.fa file
+    Channel.fromList(getUsableContigIndices("${params.genomefa}.fai", ignoreContigFile)).set({contig_index})
     split(
       setup.out.configFile,
       genome,
@@ -414,21 +211,29 @@ workflow subCaVEMan {
       ign_file,
       contig_index,
       maxSplitReadNum,
-      splitList
-    ) | collect | 
+    )
+    //Concatenate split files
     split_concat(
-
+      split.out.splitFiles.collect(),
     )
+    Channel.of(1..file("$baseDir/splitList").countLines()).set({mstep_index})
     mstep(
       setup.out.configFile,
+      setup.out.algBeanFile, 
+      split_concat.out.splitList,
       genome,
       mt,
       wt,
       ign_file,
-      split.out.splitFiles,
+      mstep_index,
       mstepSplitSize,
       mstepMinBaseQual,
-      splitList,
-      results
+      split.out.rposFiles.collect()
     )
+    // merge(
+    //   setup.out.configFile,
+    //   setup.out.algBeanFile, 
+    //   split_concat.out.splitList,
+    //   mstep.mstepResults.collect()
+    // )
 }
diff --git a/nextflow/modules/caveman-core.nf b/nextflow/modules/caveman-core.nf
new file mode 100644
index 0000000..4470105
--- /dev/null
+++ b/nextflow/modules/caveman-core.nf
@@ -0,0 +1,237 @@
+def get_chr_count (genome, ignorecontigs) {
+  echo "$genome\t$ignorecontigs"
+}
+
+
+process setup {
+
+  input:
+    //setup
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    file ('ign.file')
+
+    //Optional setup - or empty file and blanket CN
+    val normcn
+    val tumcn
+    //tumcn
+    // file ('caveman.cfg.ini')
+    // file ('alg_bean')
+    // path ('results')
+    // file ('splitList')
+    val includeSW
+    val includeSE
+    val includeDups
+
+  publishDir "$baseDir", mode: 'copy', pattern: 'caveman.cfg.ini', overwrite: true
+  publishDir "$baseDir", mode: 'copy', pattern: 'alg_bean', overwrite: true
+
+  output:
+    //setup outputs
+    path "caveman.cfg.ini", emit: configFile
+    path "alg_bean", emit: algBeanFile
+
+  script:
+    def applySW = includeSW != "NO_SW" ? "-w $includeSW" : ''
+    def applySE = includeSE != "NO_SE" ? "-z $includeSE" : ''
+    def applyDup = includeDups != "NO_DUPS" ? "-u $includeDups" : ''
+    def applyNCN = normcn != "NO_NCN" ? "-j $normcn" : ''
+    def applyTCN = tumcn != "NO_TCN" ? "-e $tumcn" : ''
+    """ 
+    caveman setup \
+    ${applySW} \
+    ${applySE} \
+    ${applyDup} \
+    ${applyNCN} \
+    ${applyTCN} \
+    -l splitList \
+    -a alg_bean \
+    -c caveman.cfg.ini \
+    -t mt.bam \
+    -n wt.bam \
+    -r genome.fa.fai \
+    -g ign.file \
+    -f results \
+    """
+}
+
+process split {
+
+  input:
+    path 'caveman.cfg.ini'
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+    each index
+    val maxSplitReadNum
+
+  publishDir "$baseDir/splits/", mode: 'symlink', pattern: 'splitList.*', overwrite: true
+
+  output:
+    path "splitList.*", emit: splitFiles
+    path "caveman.cfg.ini", emit: configFile
+    path "readpos.*", optional: true, emit: rposFiles
+
+  script:
+    """
+    caveman split \
+    -f caveman.cfg.ini \
+    -i $index \
+    -e $maxSplitReadNum \
+    """
+}
+
+process split_concat {
+  input:
+    path splitFileList
+
+  publishDir "$baseDir", mode: 'copy', pattern: 'splitList', overwrite: true
+
+  output:
+    path 'splitList', emit: splitList
+
+  script:
+  """
+  for splitFile in ${splitFileList}
+  do
+    cat \$splitFile >> splitList
+  done
+  """
+}
+
+process mstep {
+  input:
+    path 'caveman.cfg.ini'
+    path 'alg_bean'
+    path 'splitList'
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+    each index
+    val mstepSplitSize
+    val mstepMinBaseQual
+    path rposFiles
+
+  publishDir "$baseDir/results/", mode: 'symlink', pattern: 'results/*/*.covs', overwrite: true
+
+  output:
+    path "results/*/*.covs", emit: mstepResults
+
+  script:
+    """
+    caveman mstep \
+    -i $index \
+    -f caveman.cfg.ini \
+    -a $mstepSplitSize \
+    -m $mstepMinBaseQual
+    """
+
+}
+
+// process merge{
+//   input:
+//     path 'caveman.cfg.ini'
+//     path 'alg_bean'
+//     path 'splitList'
+//     path 'results'
+
+//   output:
+//     path 'covs_arr', emit: covArrFile
+//     path 'probs_arr', emit: probsArrFile
+
+//   publishDir "$baseDir/", mode: 'copy', pattern: 'covs_arr', overwrite: true
+//   publishDir "$baseDir/", mode: 'copy', pattern: 'probs_arr', overwrite: true
+
+//   script:
+//   """
+//   caveman merge \
+//   -c caveman.cfg.ini \
+//   -p probs_arr \
+//   -f covs_arr
+//   """
+// }
+
+/*
+process estep {
+// estep -i %d -k %f -g %s -o %s -v %s -w %s -f %s -l %s -r %s
+// const my $CAVEMAN_ESTEP_MUT_PRIOR_EXT => q{ -c %s};
+// const my $CAVEMAN_ESTEP_SNP_PRIOR_EXT => q{ -d %s};
+// const my $CAVEMAN_ESTEP_NPLATFORM_EXT => q{ -P %s};
+// const my $CAVEMAN_ESTEP_TPLATFORM_EXT => q{ -T %s};
+// caveman estep -i jobindex [-f file] [-m int] [-k float] [-b float] [-p float] [-q float] [-x int] [-y int] [-c float] [-d float] [-a int]
+
+// -i  --index [int]                                Job index (e.g. from $LSB_JOBINDEX)
+
+// Optional
+// -f  --config-file [file]                         Path to the config file produced by setup. [default:'caveman.cfg.ini']
+// -m  --min-base-qual [int]                        Minimum base quality for inclusion of a read position [default:11]
+// -c  --prior-mut-probability [float]              Prior somatic probability [default:0.000006]
+// -d  --prior-snp-probability [float]              Prior germline mutant probability [default:0.000100]
+// -k  --normal-contamination [float]               Normal contamination of tumour [default:0.100000]
+// -b  --reference-bias [float]                     Reference bias [default:0.950000]
+// -p  --mut-probability-cutoff [float]             Minimum probability call for a somatic mutant position to be output [default:0.800000]
+// -q  --snp-probability-cutoff [float]             Minimum probability call for a germline mutant position to be output [default:0.950000]
+// -x  --min-tum-coverage [int]                     Minimum tumour coverage for analysis of a position [default:1]
+// -y  --min-norm-coverage [int]                    Minimum normal coverage for analysis of a position [default:1]
+// -a  --split-size [int]                           Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
+// -s  --debug                                      Adds an extra output to a debug file. Every base analysed has an output
+// -g  --cov-file [file]                            File location of the covariate array. [default:'covs_arr']
+// -o  --prob-file [file]                           File location of the prob array. [default:'probs_arr']
+// -v  --species-assembly [string]                  Species assembly (eg 37/GRCh37), required if bam header SQ lines do not contain AS and SP information.
+// -w  --species [string]                           Species name (eg Human), required if bam header SQ lines do not contain AS and SP information.
+// -n  --normal-copy-number [int]                   Copy number to use when filling gaps in the normal copy number file [default:2].
+// -t  --tumour-copy-number [int]                   Copy number to use when filling gaps in the tumour copy number file [default:2].
+// -l  --normal-protocol [string]                   Normal protocol. Ideally this should match -r but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
+// -r  --tumour-protocol [string]                   Tumour protocol. Ideally this should match -l but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
+// -P  --normal-platform [string]                   Normal platform. Overrides the values retrieved from bam header.
+// -T  --tumour-platform [string]                   Tumour platform. Overrides the values retrieved from bam header.
+// -M  --max-copy-number [int]                      Maximum copy number permitted. If exceeded the copy number for the offending region will be set to this value. [default:10].
+// -h	help                                         Display this usage information.
+
+}
+
+process merge_results {
+// mergeCavemanResults -s %s -o %s -f %s
+// Usage:
+//     mergeCavemanResults [options] [file(s)...]
+
+//     Required parameters: --output -o File to output result to. --splitlist
+//     -s File containing list of split section --file-match -f ls style
+//     pattern match of files to be merged. e.g. 
+//     --help -h Brief help message.
+}
+
+process add_ids {
+//   cgpAppendIdsToVcf.pl -i %s -o %s 
+//   cgpAppendIdsToVcf.pl --help
+// Usage
+
+// Usage:
+//     cgpAppendIdsToVcf.pl [-h] -i this.vcf -o this_with_ids.vcf
+
+//       General Options:
+
+//       --help      (-h)       Brief documentation
+
+//             --file      (-i)       The file to append IDs to.
+
+//             --outFile   (-o)       The output filename
+
+//             Optional parameters:
+
+//             --idstart   (-g)       Will set a sequential id generator to the given integer value. If not present will assign UUIDs to each variant.
+
+//             --version   (-v)       Prints version information.
+
+//       Examples:
+
+//         cgpAppendIdsToVcf.pl -f this.vcf -o this_with_ids.vcf -g 1
+}
+
+process flag {
+  cgpFlagCaVEMan.pl -i %s -o %s -s %s -m %s -n %s -b %s -g %s -umv %s -ref %s -t %s -sa %s
+}
+*/
diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config
index 5e85dff..f2fe13b 100644
--- a/nextflow/nextflow.config
+++ b/nextflow/nextflow.config
@@ -8,6 +8,7 @@ params {
   mutBam = null
   controlBam = null
   ignoreContigs = null
+  ignoreRegions = null
 
   //Optional setup - or empty file and blanket CN
   normcn = "NO_NCN"

From a756daf3789ce2f4b7ad5950f3407089e449922e Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Wed, 17 Aug 2022 12:39:52 +0100
Subject: [PATCH 6/8] mstep working

---
 .gitignore                       | 1 +
 nextflow/modules/caveman-core.nf | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index b801e6c..45b43e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ setup.log
 /install
 .vstags
 .nextflow
+nextflow/.nextflow*
diff --git a/nextflow/modules/caveman-core.nf b/nextflow/modules/caveman-core.nf
index 4470105..b224e01 100644
--- a/nextflow/modules/caveman-core.nf
+++ b/nextflow/modules/caveman-core.nf
@@ -115,7 +115,7 @@ process mstep {
     val mstepMinBaseQual
     path rposFiles
 
-  publishDir "$baseDir/results/", mode: 'symlink', pattern: 'results/*/*.covs', overwrite: true
+  publishDir "$baseDir/", mode: 'symlink', pattern: 'results/*/*.covs', overwrite: true
 
   output:
     path "results/*/*.covs", emit: mstepResults

From 11c07aa62c07339b87fcbb1eeeb70fa3255a485c Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Fri, 9 Sep 2022 14:35:45 +0100
Subject: [PATCH 7/8] nf working up to merge

---
 nextflow/main.nf                 | 138 ++++++++++++++--
 nextflow/modules/caveman-core.nf | 261 ++++++++++++++++++-------------
 nextflow/nextflow.config         |  22 ++-
 3 files changed, 297 insertions(+), 124 deletions(-)

diff --git a/nextflow/main.nf b/nextflow/main.nf
index a49aaf2..a3d912e 100644
--- a/nextflow/main.nf
+++ b/nextflow/main.nf
@@ -51,6 +51,9 @@ def helpCavemanMessage() {
     --mutBam        Tumour BAM/CRAM file (co-located index and bas files)
     --controlBam    Normal BAM/CRAM file (co-located index and bas files)
     --ignoreContigs Location of tsv ignore regions file
+    --species       Species name to be output in VCF
+    --assembly      Species assembly to be output in VCF
+    --analysisName
 
 
 
@@ -76,6 +79,21 @@ def helpCavemanMessage() {
     --debug      Don't cleanup workarea on completion.
     --apid       Analysis process ID (numeric) - for cgpAnalysisProc header info
                   - not necessary for external use
+
+      priorMutProb = 0.000006
+  priorSnpProb = 0.000100
+  normalContamination = 0.100000
+  refBias = 0.950000
+  mutProbCutoff = 0.800000
+  snpProbCutoff = 0.950000
+  minTumCvg = 1
+  minNormCvg = 1
+  normProtocol = "WGS"
+  tumProtocol = "WGS"
+  normCNFill = 2
+  tumCNFill = 2
+  normSeqPlatform = "NO_PLAT"
+  tumSeqPlatform = "NO_PLAT"
     """.stripIndent()
 }
 
@@ -95,7 +113,7 @@ def getUsableContigIndices(genomeFai, ignoreContigs) {
   return indices
 }
 
-include { setup; split; split_concat; mstep } from './modules/caveman-core.nf'
+include { setup; split; split_concat; generate_mstep_indices; mstep; merge; estep; merge_results; add_ids; } from './modules/caveman-core.nf'
 
 //mstep; merge; estep; combine;
 // Print help if no workflow specified
@@ -117,11 +135,14 @@ workflow caveman {
     CaVEMan:caveman - NF Pipeline
     ------------------------------
     //setup
+    analysisName: ${params.analysisName}
     genomefa: ${params.genomefa}
     mutBam: ${params.mutBam}
     controlBam: ${params.controlBam}
     ignoreContigs: ${params.ignoreContigs}
     ignoreRegions: ${params.ignoreRegions}
+    species: ${params.species}
+    assembly: ${params.assembly}
   """.stripIndent()
 
   main:
@@ -140,6 +161,8 @@ workflow caveman {
       wt,
       ign_file,
       ignoreContigFile,
+      params.species,
+      params.assembly,
       //Optional setup - or empty file and blanket CN
       params.normcn,
       params.tumcn,
@@ -154,7 +177,26 @@ workflow caveman {
       params.maxSplitReadNum,
 
       params.mstepSplitSize,
-      params.mstepMinBaseQual
+      params.minBaseQual,
+
+      //Optional estep
+      params.priorMutProb,
+      params.priorSnpProb,
+      params.normalContamination,
+      params.refBias,
+      params.mutProbCutoff,
+      params.snpProbCutoff,
+      params.minTumCvg,
+      params.minNormCvg,
+      params.normProtocol,
+      params.tumProtocol,
+      params.normCNFill,
+      params.tumCNFill,
+      params.normSeqPlatform,
+      params.tumSeqPlatform,
+
+      //Analysis name for output files
+      params.analysisName
     )
 }
 
@@ -167,6 +209,8 @@ workflow subCaVEMan {
     wt
     ign_file
     ignoreContigFile
+    species
+    assembly
     //Optional setup - or empty file and blanket CN
     normcn
     tumcn
@@ -177,13 +221,28 @@ workflow subCaVEMan {
     includeSW
     includeSE
     includeDups
-
     //Optional spliut 
     maxSplitReadNum
-
     //Optional mstep
     mstepSplitSize
-    mstepMinBaseQual
+    minBaseQual
+    //Optional estep
+    priorMutProb
+    priorSnpProb
+    normalContamination
+    refBias
+    mutProbCutoff
+    snpProbCutoff
+    minTumCvg
+    minNormCvg
+    normProtocol
+    tumProtocol
+    normCNFill
+    tumCNFill
+    normSeqPlatform
+    tumSeqPlatform
+    //Output files naming
+    analysisName
 
   main:
     setup(
@@ -210,13 +269,15 @@ workflow subCaVEMan {
       wt,
       ign_file,
       contig_index,
-      maxSplitReadNum,
+      maxSplitReadNum
     )
     //Concatenate split files
     split_concat(
-      split.out.splitFiles.collect(),
+      split.out.splitFiles.collect()
+    )
+    generate_mstep_indices(
+      split_concat.out.splitList
     )
-    Channel.of(1..file("$baseDir/splitList").countLines()).set({mstep_index})
     mstep(
       setup.out.configFile,
       setup.out.algBeanFile, 
@@ -225,15 +286,58 @@ workflow subCaVEMan {
       mt,
       wt,
       ign_file,
-      mstep_index,
       mstepSplitSize,
-      mstepMinBaseQual,
-      split.out.rposFiles.collect()
+      minBaseQual,
+      split.out.rposFiles.collect(),
+      generate_mstep_indices.out.mstep_index.splitText()
+    )
+    merge(
+      setup.out.configFile,
+      setup.out.algBeanFile,
+      split_concat.out.splitList,
+      mstep.out.mstepResults.collect(),
+      "$launchDir/results"
+    )
+    estep(
+      setup.out.configFile,
+      merge.out.covArrFile,
+      merge.out.probsArrFile,
+      setup.out.algBeanFile, 
+      split_concat.out.splitList,
+      "$launchDir/results",
+      genome,
+      mt,
+      wt,
+      ign_file,
+      split.out.rposFiles.collect(),
+      minBaseQual,
+      species,
+      assembly,
+      priorMutProb,
+      priorSnpProb,
+      normalContamination,
+      refBias,
+      mutProbCutoff,
+      snpProbCutoff,
+      minTumCvg,
+      minNormCvg,
+      normProtocol,
+      tumProtocol,
+      normCNFill,
+      tumCNFill,
+      normSeqPlatform,
+      tumSeqPlatform,
+      generate_mstep_indices.out.mstep_index.splitText()
+    )
+    merge_results(
+      "$launchDir/results",
+      split_concat.out.splitList,
+      estep.out.estep_idx.collect(),
+      analysisName
+    )
+    add_ids(
+      merge_results.out.mergedMutsVCF,
+      merge_results.out.mergedSnpVCF,
+      analysisName
     )
-    // merge(
-    //   setup.out.configFile,
-    //   setup.out.algBeanFile, 
-    //   split_concat.out.splitList,
-    //   mstep.mstepResults.collect()
-    // )
 }
diff --git a/nextflow/modules/caveman-core.nf b/nextflow/modules/caveman-core.nf
index b224e01..df05b5d 100644
--- a/nextflow/modules/caveman-core.nf
+++ b/nextflow/modules/caveman-core.nf
@@ -2,7 +2,6 @@ def get_chr_count (genome, ignorecontigs) {
   echo "$genome\t$ignorecontigs"
 }
 
-
 process setup {
 
   input:
@@ -15,17 +14,12 @@ process setup {
     //Optional setup - or empty file and blanket CN
     val normcn
     val tumcn
-    //tumcn
-    // file ('caveman.cfg.ini')
-    // file ('alg_bean')
-    // path ('results')
-    // file ('splitList')
     val includeSW
     val includeSE
     val includeDups
 
-  publishDir "$baseDir", mode: 'copy', pattern: 'caveman.cfg.ini', overwrite: true
-  publishDir "$baseDir", mode: 'copy', pattern: 'alg_bean', overwrite: true
+  publishDir "$launchDir", mode: 'copy', pattern: 'caveman.cfg.ini', overwrite: true
+  publishDir "$launchDir", mode: 'copy', pattern: 'alg_bean', overwrite: true
 
   output:
     //setup outputs
@@ -67,7 +61,7 @@ process split {
     each index
     val maxSplitReadNum
 
-  publishDir "$baseDir/splits/", mode: 'symlink', pattern: 'splitList.*', overwrite: true
+  publishDir "$launchDir/splits/", mode: 'symlink', pattern: 'splitList.*', overwrite: true
 
   output:
     path "splitList.*", emit: splitFiles
@@ -87,18 +81,31 @@ process split_concat {
   input:
     path splitFileList
 
-  publishDir "$baseDir", mode: 'copy', pattern: 'splitList', overwrite: true
+  publishDir "$launchDir/", mode: 'copy', pattern: 'splitList.tmp', overwrite: true
+
+  output:
+    path "splitList", emit: splitList
+
+  script:
+    """
+    for splitFile in ${splitFileList}
+    do
+      cat \$splitFile >> splitList
+    done
+    """
+}
+
+process generate_mstep_indices {
+  input:
+    path 'fileForLineCount'
 
   output:
-    path 'splitList', emit: splitList
+    stdout emit: mstep_index
 
   script:
-  """
-  for splitFile in ${splitFileList}
-  do
-    cat \$splitFile >> splitList
-  done
-  """
+    """
+    sed -n '=' fileForLineCount
+    """
 }
 
 process mstep {
@@ -110,12 +117,12 @@ process mstep {
     tuple path('mt.bam'), path('mt.bam.bai')
     tuple path('wt.bam'), path('wt.bam.bai')
     path ('ign.file')
-    each index
     val mstepSplitSize
-    val mstepMinBaseQual
+    val minBaseQual
     path rposFiles
+    each index
 
-  publishDir "$baseDir/", mode: 'symlink', pattern: 'results/*/*.covs', overwrite: true
+  publishDir "$launchDir/", mode: 'symlink', pattern: 'results/*/*.covs', overwrite: true
 
   output:
     path "results/*/*.covs", emit: mstepResults
@@ -123,114 +130,158 @@ process mstep {
   script:
     """
     caveman mstep \
-    -i $index \
     -f caveman.cfg.ini \
     -a $mstepSplitSize \
-    -m $mstepMinBaseQual
+    -m $minBaseQual \
+    -i $index
     """
 
 }
 
-// process merge{
-//   input:
-//     path 'caveman.cfg.ini'
-//     path 'alg_bean'
-//     path 'splitList'
-//     path 'results'
-
-//   output:
-//     path 'covs_arr', emit: covArrFile
-//     path 'probs_arr', emit: probsArrFile
-
-//   publishDir "$baseDir/", mode: 'copy', pattern: 'covs_arr', overwrite: true
-//   publishDir "$baseDir/", mode: 'copy', pattern: 'probs_arr', overwrite: true
-
-//   script:
-//   """
-//   caveman merge \
-//   -c caveman.cfg.ini \
-//   -p probs_arr \
-//   -f covs_arr
-//   """
-// }
+process merge {
+  input:
+    path 'caveman.cfg.ini'
+    path 'alg_bean'
+    path 'splitList'
+    path 'mstepResults'
+    path 'results'
+
+  output:
+    path 'covs_arr', emit: covArrFile
+    path 'probs_arr', emit: probsArrFile
 
-/*
-process estep {
-// estep -i %d -k %f -g %s -o %s -v %s -w %s -f %s -l %s -r %s
-// const my $CAVEMAN_ESTEP_MUT_PRIOR_EXT => q{ -c %s};
-// const my $CAVEMAN_ESTEP_SNP_PRIOR_EXT => q{ -d %s};
-// const my $CAVEMAN_ESTEP_NPLATFORM_EXT => q{ -P %s};
-// const my $CAVEMAN_ESTEP_TPLATFORM_EXT => q{ -T %s};
-// caveman estep -i jobindex [-f file] [-m int] [-k float] [-b float] [-p float] [-q float] [-x int] [-y int] [-c float] [-d float] [-a int]
-
-// -i  --index [int]                                Job index (e.g. from $LSB_JOBINDEX)
-
-// Optional
-// -f  --config-file [file]                         Path to the config file produced by setup. [default:'caveman.cfg.ini']
-// -m  --min-base-qual [int]                        Minimum base quality for inclusion of a read position [default:11]
-// -c  --prior-mut-probability [float]              Prior somatic probability [default:0.000006]
-// -d  --prior-snp-probability [float]              Prior germline mutant probability [default:0.000100]
-// -k  --normal-contamination [float]               Normal contamination of tumour [default:0.100000]
-// -b  --reference-bias [float]                     Reference bias [default:0.950000]
-// -p  --mut-probability-cutoff [float]             Minimum probability call for a somatic mutant position to be output [default:0.800000]
-// -q  --snp-probability-cutoff [float]             Minimum probability call for a germline mutant position to be output [default:0.950000]
-// -x  --min-tum-coverage [int]                     Minimum tumour coverage for analysis of a position [default:1]
-// -y  --min-norm-coverage [int]                    Minimum normal coverage for analysis of a position [default:1]
-// -a  --split-size [int]                           Size of section to retrieve at a time from bam file. Allows memory footprint tuning [default:50000].
-// -s  --debug                                      Adds an extra output to a debug file. Every base analysed has an output
-// -g  --cov-file [file]                            File location of the covariate array. [default:'covs_arr']
-// -o  --prob-file [file]                           File location of the prob array. [default:'probs_arr']
-// -v  --species-assembly [string]                  Species assembly (eg 37/GRCh37), required if bam header SQ lines do not contain AS and SP information.
-// -w  --species [string]                           Species name (eg Human), required if bam header SQ lines do not contain AS and SP information.
-// -n  --normal-copy-number [int]                   Copy number to use when filling gaps in the normal copy number file [default:2].
-// -t  --tumour-copy-number [int]                   Copy number to use when filling gaps in the tumour copy number file [default:2].
-// -l  --normal-protocol [string]                   Normal protocol. Ideally this should match -r but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
-// -r  --tumour-protocol [string]                   Tumour protocol. Ideally this should match -l but not checked (WGS|WXS|RNA|RNA-Seq|AMPLICON|TARGETED) [default:WGS].
-// -P  --normal-platform [string]                   Normal platform. Overrides the values retrieved from bam header.
-// -T  --tumour-platform [string]                   Tumour platform. Overrides the values retrieved from bam header.
-// -M  --max-copy-number [int]                      Maximum copy number permitted. If exceeded the copy number for the offending region will be set to this value. [default:10].
-// -h	help                                         Display this usage information.
+  publishDir "$launchDir/", mode: 'copy', pattern: 'covs_arr', overwrite: true
+  publishDir "$launchDir/", mode: 'copy', pattern: 'probs_arr', overwrite: true
 
+  script:
+    """
+    caveman merge \
+    -c covs_arr \
+    -p probs_arr \
+    -f caveman.cfg.ini
+    """
 }
 
-process merge_results {
-// mergeCavemanResults -s %s -o %s -f %s
-// Usage:
-//     mergeCavemanResults [options] [file(s)...]
-
-//     Required parameters: --output -o File to output result to. --splitlist
-//     -s File containing list of split section --file-match -f ls style
-//     pattern match of files to be merged. e.g. 
-//     --help -h Brief help message.
-}
+process estep {
 
-process add_ids {
-//   cgpAppendIdsToVcf.pl -i %s -o %s 
-//   cgpAppendIdsToVcf.pl --help
-// Usage
+  input:
+    path 'caveman.cfg.ini'
+    path 'covs_arr'
+    path 'probs_arr'
+    path 'alg_bean'
+    path 'splitList'
+    path 'results'
+    tuple path('genome.fa'), path('genome.fa.fai')
+    tuple path('mt.bam'), path('mt.bam.bai')
+    tuple path('wt.bam'), path('wt.bam.bai')
+    path ('ign.file')
+    path rposFiles
+    val minBQ
+    val spec
+    val ass
+    val priorMutProb
+    val priorSnpProb
+    val normalContamination
+    val refBias
+    val mutProbCutoff
+    val snpProbCutoff
+    val minTumCvg
+    val minNormCvg
+    val normProtocol
+    val tumProtocol
+    val normCNFill
+    val tumCNFill
+    val normSeqPlatform
+    val tumSeqPlatform
+    each index
+
+  output:
+    path "results/*/*.vcf.gz", emit: estepVCFs
+    path "results/*/*.bed", emit: estepNoAnalysis
+    val index, emit: estep_idx
+
+  publishDir "$launchDir/", mode: 'symlink', pattern: 'results/*/*.vcf.gz', overwrite: true
+  publishDir "$launchDir/", mode: 'symlink', pattern: 'results/*/*.bed', overwrite: true
+
+  script:
+    def applyNormPlat = normSeqPlatform != "NO_PLAT" ? "-P $normSeqPlatform" : ''
+    def applyTumPlat = tumSeqPlatform != "NO_PLAT" ? "-T $tumSeqPlatform" : ''
+    """
+    caveman estep \
+    -f caveman.cfg.ini \
+    -g covs_arr \
+    -o probs_arr \
+    -m $minBQ \
+    -v $ass \
+    -w $spec \
+    -c $priorMutProb \
+    -d $priorSnpProb \
+    -k $normalContamination \
+    -b $refBias \
+    -p $mutProbCutoff \
+    -q $snpProbCutoff \
+    -x $minTumCvg \
+    -y $minNormCvg \
+    -l $normProtocol \
+    -r $tumProtocol \
+    -n $normCNFill \
+    -t $tumCNFill \
+    ${applyNormPlat} \
+    ${applyTumPlat} \
+    -i $index
+    """
 
-// Usage:
-//     cgpAppendIdsToVcf.pl [-h] -i this.vcf -o this_with_ids.vcf
+}
 
-//       General Options:
+process merge_results {
+  input:
+    path 'results'
+    path 'splitList'
+    val indexes
+    val analysisName
 
-//       --help      (-h)       Brief documentation
+  output:
+    path "${analysisName}.muts.raw.vcf", emit: mergedMutsVCF
+    path "${analysisName}.snps.raw.vcf", emit: mergedSnpVCF
+    path "${analysisName}.no_analysis.bed.gz", emit: mergedNoAnalysis
 
-//             --file      (-i)       The file to append IDs to.
+  publishDir "$launchDir/", mode: 'copy', pattern: '*.no_analysis.bed.gz*', overwrite: true
 
-//             --outFile   (-o)       The output filename
+  script:
+    """
+    mergeCavemanResults -s splitList -o ${analysisName}.muts.raw.vcf -f results/%/%.muts.vcf.gz && 
+    mergeCavemanResults -s splitList -o ${analysisName}.snps.raw.vcf -f results/%/%.snps.vcf.gz && \
+    mergeCavemanResults -s splitList -o ${analysisName}.no_analysis.bed -f results/%/%.no_analysis.bed && \
+    bgzip ${analysisName}.no_analysis.bed && tabix -p bed ${analysisName}.no_analysis.bed.gz\
+    """
 
-//             Optional parameters:
+}
 
-//             --idstart   (-g)       Will set a sequential id generator to the given integer value. If not present will assign UUIDs to each variant.
+process add_ids {
+  input:
+    path 'muts.raw.vcf'
+    path 'snp.raw.vcf'
+    val analysisName
 
-//             --version   (-v)       Prints version information.
+  output:
+    path "${analysisName}.muts.vcf.gz", emit: mutsVCFIIDs
+    path "${analysisName}.muts.vcf.gz.tbi", emit: mutsVCFIIDsTbx
+    path "${analysisName}.snps.vcf.gz", emit: snpsVCFIIDs
+    path "${analysisName}.snps.vcf.gz.tbi", emit: snpsVCFIIDsTbx
 
-//       Examples:
+  publishDir "$launchDir/", mode: 'copy', pattern: '*.snps.vcf.gz*', overwrite: true
+  publishDir "$launchDir/", mode: 'copy', pattern: '*.muts.vcf.gz*', overwrite: true
 
-//         cgpAppendIdsToVcf.pl -f this.vcf -o this_with_ids.vcf -g 1
+  script:
+    """
+    cgpAppendIdsToVcf.pl -i muts.raw.vcf -o ${analysisName}.muts.vcf && \
+    cgpAppendIdsToVcf.pl -i snp.raw.vcf -o ${analysisName}.snps.vcf && \
+    bgzip ${analysisName}.muts.vcf && bgzip ${analysisName}.snps.vcf &&
+    tabix -p vcf ${analysisName}.muts.vcf.gz && tabix -p vcf ${analysisName}.snps.vcf.gz
+    """
 }
 
+/*
 process flag {
   cgpFlagCaVEMan.pl -i %s -o %s -s %s -m %s -n %s -b %s -g %s -umv %s -ref %s -t %s -sa %s
 }
diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config
index f2fe13b..fdccdd8 100644
--- a/nextflow/nextflow.config
+++ b/nextflow/nextflow.config
@@ -9,6 +9,9 @@ params {
   controlBam = null
   ignoreContigs = null
   ignoreRegions = null
+  species = null
+  assembly = null
+  analysisName = null
 
   //Optional setup - or empty file and blanket CN
   normcn = "NO_NCN"
@@ -26,8 +29,23 @@ params {
 
   //Optional mstep
   mstepSplitSize = 50000
-  mstepMinBaseQual = 11
-
+  minBaseQual = 11
+
+  //Optional estep
+  priorMutProb = 0.000006
+  priorSnpProb = 0.000100
+  normalContamination = 0.100000
+  refBias = 0.950000
+  mutProbCutoff = 0.800000
+  snpProbCutoff = 0.950000
+  minTumCvg = 1
+  minNormCvg = 1
+  normProtocol = "WGS"
+  tumProtocol = "WGS"
+  normCNFill = 2
+  tumCNFill = 2
+  normSeqPlatform = "NO_PLAT"
+  tumSeqPlatform = "NO_PLAT"
 
 }
 

From 612ff3d773c4ccb9e1db0eb9cb5df5ae2341abfe Mon Sep 17 00:00:00 2001
From: David Jones <drj@sanger.ac.uk>
Date: Mon, 12 Sep 2022 13:36:25 +0100
Subject: [PATCH 8/8] Working nf upto merge results and add IDs. Includes setup
 for lsf

---
 nextflow/nextflow.config | 67 +++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 14 deletions(-)

diff --git a/nextflow/nextflow.config b/nextflow/nextflow.config
index fdccdd8..d2e49d0 100644
--- a/nextflow/nextflow.config
+++ b/nextflow/nextflow.config
@@ -43,53 +43,92 @@ params {
   normProtocol = "WGS"
   tumProtocol = "WGS"
   normCNFill = 2
-  tumCNFill = 2
+  tumCNFill = 5
   normSeqPlatform = "NO_PLAT"
   tumSeqPlatform = "NO_PLAT"
 
 }
 
-/*
 process {
 
-  //container = "quay.io/wtsicgp/cgpcavemanwrapper:1.18.1"
-
   withName: 'setup' {
     cpus = 1
-    memory = 250 MB
+    memory = '250 MB'
+    queue = 'small'
   }
 
   withName: 'split' {
-
+    cpus = 1
+    memory = '1500 MB'
+    queue = 'normal'
   }
 
   withName: 'split_concat' {
+    cpus = 1
+    memory = '2 GB'
+    queue = 'small'
+  }
 
+  withName: 'generate_mstep_indices' {
+    cpus = 1
+    memory = '500 MB'
+    queue = 'small'
   }
 
   withName: 'mstep' {
-
+    cpus = 1
+    memory = '8 GB'
+    queue = 'normal'
   }
 
   withName: 'merge' {
-
+    cpus = 1
+    memory = '5 GB'
+    queue = 'small'
   }
 
   withName: 'estep' {
-
+    cpus = 1
+    memory = '8 GB'
+    queue = 'normal'
   }
 
   withName: 'merge_results' {
-
+    cpus = 1
+    memory = '5 GB'
+    queue = 'small'
   }
 
   withName: 'add_ids' {
-
+    cpus = 1
+    memory = '2 GB'
+    queue = 'small'
   }
-
+/*
   withName: 'flag' {
     
-  }
+  }*/
+}
 
+// this will only work if there is a docker/singularity image available
+profiles {
+    local {
+        process.executor = 'local'
+    }
+    lsf {
+        process.executor = 'lsf'
+        executor.perJobMemLimit = true
+    }
+    docker {
+        docker.enabled         = true
+        docker.userEmulation   = true
+        singularity.enabled    = false
+    }
+    singularity {
+        singularity.enabled    = true
+        singularity.autoMounts = true
+        singularity.runOptions = '--cleanenv'
+        docker.enabled         = false
+        process.container = 'quay.io/wtsicgp/cgpcavemanwrapper:1.18.3'
+    }
 }
-*/