From 3ced867dbe26df7fb83ff96e87dda74b85d7f3a6 Mon Sep 17 00:00:00 2001 From: Natalia Garcia Date: Fri, 13 Sep 2024 22:19:43 +0000 Subject: [PATCH] template updates --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 4 ++ README.md | 2 +- bin/workflow_glue/check_bam_headers_in_dir.py | 8 ++- bin/workflow_glue/check_xam_index.py | 12 ++-- bin/workflow_glue/report.py | 11 ++-- docs/04_install_and_run.md | 2 +- lib/common.nf | 1 + lib/ingress.nf | 61 +++++++++++++------ nextflow.config | 6 +- 10 files changed, 72 insertions(+), 37 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 87626df..e950f47 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: always_run: true pass_filenames: false additional_dependencies: - - epi2melabs==0.0.56 + - epi2melabs==0.0.57 - id: build_models name: build_models entry: datamodel-codegen --strict-nullable --base-class workflow_glue.results_schema_helpers.BaseModel --use-schema-description --disable-timestamp --input results_schema.yml --input-file-type openapi --output bin/workflow_glue/results_schema.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ba130f7..c3ea2f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v1.1.4] +### Changed +- Updated Ezcharts to v0.11.2. + ## [v1.1.3] ### Fixed - Automated basecaller detection failing in some circumstances. diff --git a/README.md b/README.md index 3207d51..24a8c4c 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ therefore Nextflow will need to be installed before attempting to run the workflow. The workflow can currently be run using either -[Docker](https://www.docker.com/products/docker-desktop +[Docker](https://www.docker.com/products/docker-desktop) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html) to provide isolation of the required software. Both methods are automated out-of-the-box provided diff --git a/bin/workflow_glue/check_bam_headers_in_dir.py b/bin/workflow_glue/check_bam_headers_in_dir.py index 44e689b..199e056 100755 --- a/bin/workflow_glue/check_bam_headers_in_dir.py +++ b/bin/workflow_glue/check_bam_headers_in_dir.py @@ -29,7 +29,13 @@ def main(args): for xam_file in target_files: # get the `@SQ` and `@HD` lines in the header with pysam.AlignmentFile(xam_file, check_sq=False) as f: - sq_lines = f.header.get("SQ") + # compare only the SN/LN/M5 elements of SQ to avoid labelling XAM with + # same reference but different SQ.UR as mixed_header (see CW-4842) + sq_lines = [{ + "SN": sq["SN"], + "LN": sq["LN"], + "M5": sq.get("M5"), + } for sq in f.header.get("SQ", [])] hd_lines = f.header.get("HD") # Check if it is sorted. # When there is more than one BAM, merging/sorting diff --git a/bin/workflow_glue/check_xam_index.py b/bin/workflow_glue/check_xam_index.py index 3beae14..f9f631e 100755 --- a/bin/workflow_glue/check_xam_index.py +++ b/bin/workflow_glue/check_xam_index.py @@ -14,12 +14,12 @@ def validate_xam_index(xam_file): Invalid indexes will fail the call with a ValueError: ValueError: fetch called on bamfile without index """ - alignments = pysam.AlignmentFile(xam_file, check_sq=False) - try: - alignments.fetch() - has_valid_index = True - except ValueError: - has_valid_index = False + with pysam.AlignmentFile(xam_file, check_sq=False) as alignments: + try: + alignments.fetch() + has_valid_index = True + except ValueError: + has_valid_index = False return has_valid_index diff --git a/bin/workflow_glue/report.py b/bin/workflow_glue/report.py index b518564..c7b4b33 100755 --- a/bin/workflow_glue/report.py +++ b/bin/workflow_glue/report.py @@ -75,7 +75,7 @@ def argparser(): def main(args): """Run the entry point.""" - logger = get_named_logger("Report") + logger = get_named_logger("report") # in case there was a sample sheet, read it so that we can show the metadata in the # per-sample summary table @@ -502,12 +502,9 @@ def populate_report(report, metadata, all_datasets, ref_fasta, downsampling_size palette=palette, ) plt.title = {"text": "Coverage along amplicon"} - plt.xAxis.max = depth_df["pos"].max() - plt.xAxis.name = "Position along amplicon" - plt.yAxis.name = "Sequencing depth" - for s in plt.series: - # only show the line and no circles - s.showSymbol = False + plt._fig.x_range.end = depth_df["pos"].max() + plt._fig.xaxis.axis_label = "Position along amplicon" + plt._fig.yaxis.axis_label = "Sequencing depth" EZChart(plt, "epi2melabs") # add variant tables (skip if there were no VCFs) diff --git a/docs/04_install_and_run.md b/docs/04_install_and_run.md index 3c2fe46..d3962ea 100644 --- a/docs/04_install_and_run.md +++ b/docs/04_install_and_run.md @@ -9,7 +9,7 @@ therefore Nextflow will need to be installed before attempting to run the workflow. The workflow can currently be run using either -[Docker](https://www.docker.com/products/docker-desktop +[Docker](https://www.docker.com/products/docker-desktop) or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html) to provide isolation of the required software. Both methods are automated out-of-the-box provided diff --git a/lib/common.nf b/lib/common.nf index 2a31d49..3a8568d 100644 --- a/lib/common.nf +++ b/lib/common.nf @@ -15,6 +15,7 @@ process getParams { } process configure_igv { + publishDir "${params.out_dir}/", mode: 'copy', pattern: 'igv.json', enabled: params.containsKey("igv") && params.igv label "wf_common" cpus 1 memory "2 GB" diff --git a/lib/ingress.nf b/lib/ingress.nf index 6d14a83..2931357 100644 --- a/lib/ingress.nf +++ b/lib/ingress.nf @@ -197,15 +197,15 @@ def fastq_ingress(Map arguments) .map { meta, files, stats -> // new `arity: '1..*'` would be nice here files = files instanceof List ? files : [files] - new_keys = [ + def new_keys = [ "group_key": groupKey(meta["alias"], files.size()), "n_fastq": files.size()] - grp_index = (0.. - new_keys = [ + def new_keys = [ "group_index": "${meta["alias"]}_${grp_i}"] [meta + new_keys, files, stats] } @@ -279,17 +279,19 @@ def xam_ingress(Map arguments) // sorted, the index will be used. meta, paths -> boolean is_array = paths instanceof ArrayList - String xai_fn + String src_xam + String src_xai // Using `.uri` or `.Uri()` leads to S3 paths to be prefixed with `s3:///` // instead of `s3://`, causing the workflow to not find the index file. // `.toUriString()` returns the correct path. if (!is_array){ + src_xam = paths.toUriString() def xai = file(paths.toUriString() + ".bai") if (xai.exists()){ - xai_fn = xai.toUriString() + src_xai = xai.toUriString() } } - [meta + [xai_fn: xai_fn], paths] + [meta + [src_xam: src_xam, src_xai: src_xai], paths] } | checkBamHeaders | map { meta, paths, is_unaligned_env, mixed_headers_env, is_sorted_env -> @@ -331,9 +333,9 @@ def xam_ingress(Map arguments) // - between 1 and `N_OPEN_FILES_LIMIT` aligned files no_files: n_files == 0 indexed: \ - n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && meta["xai_fn"] - to_index: - n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && !meta["xai_fn"] + n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && meta["src_xai"] + to_index: \ + n_files == 1 && (meta["is_unaligned"] || meta["is_sorted"]) && !meta["src_xai"] to_catsort: \ (n_files == 1) || (n_files > N_OPEN_FILES_LIMIT) || meta["is_unaligned"] to_merge: true @@ -358,20 +360,20 @@ def xam_ingress(Map arguments) .map { meta, files, stats -> // new `arity: '1..*'` would be nice here files = files instanceof List ? files : [files] - new_keys = [ + def new_keys = [ "group_key": groupKey(meta["alias"], files.size()), "n_fastq": files.size()] - grp_index = (0.. - new_keys = [ + def new_keys = [ "group_index": "${meta["alias"]}_${grp_i}"] [meta + new_keys, files, stats] } .map { meta, path, stats -> - [meta.findAll { it.key !in ['xai_fn', 'is_sorted'] }, path, stats] + [meta.findAll { it.key !in ['is_sorted', 'src_xam', 'src_xai'] }, path, stats] } // add number of reads, run IDs, and basecall models to meta @@ -388,10 +390,18 @@ def xam_ingress(Map arguments) | sortBam | groupTuple | mergeBams + | map{ + meta, bam, bai -> + [meta + [src_xam: null, src_xai: null], bam, bai] + } // now handle samples with too many files for `samtools merge` ch_catsorted = ch_result.to_catsort | catSortBams + | map{ + meta, bam, bai -> + [meta + [src_xam: null, src_xai: null], bam, bai] + } // Validate the index of the input BAM. // If the input BAM index is invalid, regenerate it. @@ -399,7 +409,7 @@ def xam_ingress(Map arguments) ch_to_validate = ch_result.indexed | map{ meta, paths -> - bai = paths && meta.xai_fn ? file(meta.xai_fn) : null + def bai = paths && meta.src_xai ? file(meta.src_xai) : null [meta, paths, bai] } | branch { @@ -429,6 +439,10 @@ def xam_ingress(Map arguments) ch_indexed = ch_result.to_index | mix( ch_validated.invalid_idx ) | samtools_index + | map{ + meta, bam, bai -> + [meta + [src_xai: null], bam, bai] + } // Add extra null for the missing index to input.missing // as well as the missing metadata. @@ -439,7 +453,7 @@ def xam_ingress(Map arguments) ) | map{ meta, paths -> - [meta + [xai_fn: null, is_sorted: false], paths, null] + [meta + [src_xam: null, src_xai: null, is_sorted: false], paths, null] } // Combine all possible inputs @@ -480,7 +494,7 @@ def xam_ingress(Map arguments) } // Remove metadata that are unnecessary downstream: - // meta.xai_fn: not needed, as it will be part of the channel as a file + // meta.src_xai: not needed, as it will be part of the channel as a file // meta.is_sorted: if data are aligned, they will also be sorted/indexed // // The output meta can contain the following flags: @@ -498,7 +512,7 @@ def xam_ingress(Map arguments) ch_result | map{ meta, bam, bai, stats -> - [meta.findAll { it.key !in ['xai_fn', 'is_sorted'] }, [bam, bai], stats] + [meta.findAll { it.key !in ['is_sorted'] }, [bam, bai], stats] }, "xam" ) @@ -508,6 +522,19 @@ def xam_ingress(Map arguments) | map{ it.flatten() } + // Final check to ensure that src_xam/src_xai is not an s3 + // path. If so, drop it. We check src_xam also for src_xai + // as, the latter is irrelevant if the former is in s3. + | map{ + meta, bam, bai, stats -> + def xam = meta.src_xam + def xai = meta.src_xai + if (meta.src_xam){ + xam = meta.src_xam.startsWith('s3://') ? null : meta.src_xam + xai = meta.src_xam.startsWith('s3://') ? null : meta.src_xai + } + [ meta + [src_xam: xam, src_xai: xai], bam, bai, stats ] + } return ch_result } diff --git a/nextflow.config b/nextflow.config index ffd5378..0c888b9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,8 +62,8 @@ params { "--fastq 'wf-amplicon-demo/fastq'", "--reference 'wf-amplicon-demo/reference.fa'" ] - common_sha = "shad399cf22079b5b153920ac39ee40095a677933f1" - container_sha = "sha79b5ac4013195fec9809f2b71ef55b87bf9c7f16" + common_sha = "shad28e55140f75a68f59bbecc74e880aeab16ab158" + container_sha = "sha0ba67476938520e6f132759780d0a0e902925c59" container_sha_medaka = "sha3486abaab0d3b90351617eb8622acf2028edb154" agent = null } @@ -76,7 +76,7 @@ manifest { description = 'Amplicon workflow' mainScript = 'main.nf' nextflowVersion = '>=23.04.2' - version = 'v1.1.3' + version = 'v1.1.4' } epi2melabs {