Merge pull request #1164 from nextstrain/remove-run-pangolin

Remove "run_pangolin" config option
nextstrain · Dec 5, 2024 · 52edcc2 · 52edcc2
2 parents 61e5cf4 + bd4d994
commit 52edcc2
Show file tree

Hide file tree

Showing 5 changed files with 4 additions and 93 deletions.
diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md
@@ -5,6 +5,10 @@ We also use this change log to document new features that maintain backward comp
 
 ## New features since last version update
 
+## v15 (5 December 2024)
+
+- Remove `run_pangolin` configuration option from the workflow, since the pangoLEARN tool that this enabled has been deprecated. [PR 1164](https://github.com/nextstrain/ncov/pull/1164)
+
 ## v14 (23 October 2024)
 
 - 23 October 2024: Update workflow to use Nextclade v3. This includes the removal of unused mutation summary script and rules that expected Nextclade v2 outputs. Dropping the mutation summary rules removed the need for the full alignment rule `align` to produce the insertions and translations outputs, so they have been removed. The `build_align` rule no longer produces a separate `insertions.tsv` since insertions are now included in the `nextclade_qc.tsv`. [PR 1160](https://github.com/nextstrain/ncov/pull/1160)

diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst
@@ -728,13 +728,6 @@ crowding_penalty
 
 .. _title-1:
 
-run_pangolin
-------------
-
--  type: boolean
--  description: Enable annotation of Pangolin lineages for a given build's subsampled sequences.
--  default: ``false``
-
 .. _workflow-config-mask:
 
 mask

diff --git a/scripts/make_pangolin_node_data.py b/scripts/make_pangolin_node_data.py
diff --git a/workflow/envs/nextstrain.yaml b/workflow/envs/nextstrain.yaml
@@ -8,6 +8,4 @@ dependencies:
   - epiweeks=2.1.2
   - iqtree=2.2.0.3
   - nextclade=3.9.0
-  - pangolin=3.1.20
-  - pangolearn=2022.01.20
   - python>=3.8*
diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk
@@ -702,55 +702,6 @@ rule filter:
             --output-log {output.filter_log} 2>&1 | tee {log};
         """
 
-if "run_pangolin" in config and config["run_pangolin"]:
-    rule run_pangolin:
-        message:
-            """
-            Running pangolin to assign lineage labels to samples. Includes putative lineage definitions by default.
-            Please remember to update your installation of pangolin regularly to ensure the most up-to-date classifications.
-            """
-        input:
-            alignment = "results/{build_name}/aligned.fasta",
-        output:
-            lineages = "results/{build_name}/pangolineages.csv",
-        params:
-            outdir = "results/{build_name}",
-            csv_outfile = "pangolineages.csv",
-            node_data_outfile = "pangolineages.json"
-        log:
-            "logs/pangolin_{build_name}.txt"
-        conda: config["conda_environment"]
-        threads: 1
-        resources:
-            mem_mb=3000
-        benchmark:
-            "benchmarks/pangolineages_{build_name}.txt"
-        shell: ## once pangolin fully supports threads, add `--threads {threads}` to the below (existing pango cli param)
-            r"""
-            pangolin {input.alignment}\
-                --outdir {params.outdir} \
-                --outfile {params.csv_outfile} 2>&1 | tee {log}\
-            """
-
-    rule make_pangolin_node_data:
-        input:
-            lineages = rules.run_pangolin.output.lineages
-        output:
-            node_data = "results/{build_name}/pangolineages.json"
-        log:
-            "logs/pangolin_export_{build_name}.txt"
-        conda: config["conda_environment"]
-        resources:
-            mem_mb=3000
-        benchmark:
-            "benchmarks/make_pangolin_node_data_{build_name}.txt"
-        shell:
-            r"""
-            python3 scripts/make_pangolin_node_data.py \
-            --pangolineages {input.lineages} \
-            --node_data_outfile {output.node_data} 2>&1 | tee {log}\
-            """
-
 # TODO: This will probably not work for build names like "country_usa" where we need to know the country is "USA".
 rule adjust_metadata_regions:
     message:
@@ -1308,9 +1259,6 @@ def _get_node_data_by_wildcards(wildcards):
         rules.calculate_epiweeks.output.node_data,
     ]
 
-    if "run_pangolin" in config and config["run_pangolin"]:
-        inputs.append(rules.make_pangolin_node_data.output.node_data)
-
     # Convert input files from wildcard strings to real file names.
     inputs = [input_file.format(**wildcards_dict) for input_file in inputs]