From cc0ea1fdebea913d49b51a14fa00b091daa19705 Mon Sep 17 00:00:00 2001 From: Richard de Borja Date: Wed, 4 May 2022 11:35:12 -0400 Subject: [PATCH 1/4] updated to support Pangolin 4 and processing the lineage report --- README.md | 14 ++++++++++++++ VERSION.txt | 2 +- workflow/envs/environment.yml | 8 +++----- workflow/rules/analysis.smk | 5 +++-- workflow/rules/common.smk | 9 +++++++++ 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 01a740a..1201c69 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,20 @@ Variant annotation output can be found in `qc_annotation` and the recurrent amino acid change heatmap can be found in `plots/_aa_mutation_heatmap.pdf`. +## Pangolin Version 4 +Pangolin version 4 included several changes which required updates +to the `ncov-tools` environment. By default, `ncov-tools` will run pangolin +4 and will require changes to `ncov-parser` version 1.9 to parse the output +and populate the summary QC file. + +Backward compability with Pangolin 3 is available and will require the following +parameter addition in the `config.yaml` file: +``` +pangolin_version: "3" +``` +Note that the specific version is not required, only if it is "3" or "4". + + ## Credit and Acknowledgements * The tree-with-SNPs plot was inspired by a plot shared by Mads Albertsen. diff --git a/VERSION.txt b/VERSION.txt index c009739..da0694b 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -v.1.8.0 +v.1.9.0 diff --git a/workflow/envs/environment.yml b/workflow/envs/environment.yml index 0bfc987..a846207 100644 --- a/workflow/envs/environment.yml +++ b/workflow/envs/environment.yml @@ -1,4 +1,4 @@ -name: ncov-qc +name: ncov-qc-rdb channels: - conda-forge - bioconda @@ -32,10 +32,8 @@ dependencies: - dendropy>=4.4.0 - pyvcf - ncov-parser - - git+https://github.com/hCoV-2019/lineages.git - - git+https://github.com/hCoV-2019/pangolin.git - - git+https://github.com/cov-lineages/pangoLEARN.git + - git+https://github.com/cov-lineages/pangolin.git + - git+https://github.com/cov-lineages/pangolin-data.git - git+https://github.com/cov-lineages/constellations.git - git+https://github.com/cov-lineages/scorpio.git - - git+https://github.com/cov-lineages/pango-designation.git - git+https://github.com/jts/ncov-watch.git diff --git a/workflow/rules/analysis.smk b/workflow/rules/analysis.smk index 851e9e8..cacecc8 100644 --- a/workflow/rules/analysis.smk +++ b/workflow/rules/analysis.smk @@ -152,7 +152,8 @@ rule make_sample_qc_summary: py_script="get_qc.py", metadata_opt=get_qc_summary_metadata_opt, platform_opt=get_platform_opt, - run_name_opt=get_run_name_opt + run_name_opt=get_run_name_opt, + pangolin_version_opt=get_pangolin_version_opt shell: "{params.py_script} --alleles {input.alleles} \ --coverage {input.samplecoverage} \ @@ -162,7 +163,7 @@ rule make_sample_qc_summary: --sample {wildcards.sample} \ --lineage {input.lineagereport} \ --aa_table {input.aa_table} \ - --mutations {input.watch} \ + --mutations {input.watch} {params.pangolin_version_opt} \ --run_name {params.run_name_opt} > {output}" # merge the per-sample summary files into the run-level report diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e0f3810..3bd9ddd 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -235,6 +235,15 @@ def get_annotated_variants(wildcards): def get_all_masked_consensus(wildcards): return ["masked_fasta/{sample}.masked_consensus.fasta".format(sample=s) for s in get_sample_names()] +def get_pangolin_version_opt(wildcards): + if "pangolin_version" in config: + return "--pangolin_ver %s" % (config['pangolin_version']) + else: + return "" + + + + # generate the amplicon-level bed file from the input primer bed rule make_amplicon_bed: From 151b083b6bffeebf7dc55bdecd7c1a9462fc78f3 Mon Sep 17 00:00:00 2001 From: Richard de Borja Date: Wed, 4 May 2022 11:38:40 -0400 Subject: [PATCH 2/4] fixed conda env name typo --- workflow/envs/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/envs/environment.yml b/workflow/envs/environment.yml index a846207..0ceaacc 100644 --- a/workflow/envs/environment.yml +++ b/workflow/envs/environment.yml @@ -1,4 +1,4 @@ -name: ncov-qc-rdb +name: ncov-qc channels: - conda-forge - bioconda From 482f6ebc3931255a46bcad0014d1df7617e7d45b Mon Sep 17 00:00:00 2001 From: Richard de Borja Date: Thu, 5 May 2022 23:19:16 -0400 Subject: [PATCH 3/4] fixed pyvcf to pyvcf3 to fix Docker issue --- workflow/envs/environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/envs/environment.yml b/workflow/envs/environment.yml index 0ceaacc..a940187 100644 --- a/workflow/envs/environment.yml +++ b/workflow/envs/environment.yml @@ -1,4 +1,4 @@ -name: ncov-qc +name: ncov-qc-pyvcf3 channels: - conda-forge - bioconda @@ -30,7 +30,7 @@ dependencies: - usher - pip: - dendropy>=4.4.0 - - pyvcf + - pyvcf3 - ncov-parser - git+https://github.com/cov-lineages/pangolin.git - git+https://github.com/cov-lineages/pangolin-data.git From 85ea203a007acf2f626a9b88fc3710dec0ba8aa9 Mon Sep 17 00:00:00 2001 From: Richard de Borja Date: Thu, 5 May 2022 23:44:50 -0400 Subject: [PATCH 4/4] fixed conda environment name --- workflow/envs/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/envs/environment.yml b/workflow/envs/environment.yml index a940187..52ec21e 100644 --- a/workflow/envs/environment.yml +++ b/workflow/envs/environment.yml @@ -1,4 +1,4 @@ -name: ncov-qc-pyvcf3 +name: ncov-qc channels: - conda-forge - bioconda