From ee51953de0d2d1b2f378ad8d450eef97c1e1afe5 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Mon, 7 Oct 2024 10:37:58 +0200 Subject: [PATCH 1/7] add debug printouts to investigate clade mismatch against nearest node --- .../nextclade/src/run/nextclade_run_one.rs | 36 ++++++ packages/nextclade/src/tree/split_muts.rs | 3 +- packages/nextclade/src/tree/tree_builder.rs | 103 ++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) diff --git a/packages/nextclade/src/run/nextclade_run_one.rs b/packages/nextclade/src/run/nextclade_run_one.rs index 386467511..25d731dfe 100644 --- a/packages/nextclade/src/run/nextclade_run_one.rs +++ b/packages/nextclade/src/run/nextclade_run_one.rs @@ -35,6 +35,7 @@ use crate::analyze::virus_properties::PhenotypeData; use crate::coord::coord_map_global::CoordMapGlobal; use crate::coord::range::AaRefRange; use crate::graph::node::GraphNodeKey; +use crate::io::json::{json_stringify, JsonPretty}; use crate::qc::qc_run::qc_run; use crate::run::nextclade_wasm::{AnalysisOutput, Nextclade}; use crate::translate::aa_alignment_ranges::{gather_aa_alignment_ranges, GatherAaAlignmentRangesResult}; @@ -46,6 +47,7 @@ use crate::tree::tree_find_nearest_node::graph_find_nearest_nodes; use crate::types::outputs::{NextcladeOutputs, PeptideWarning, PhenotypeValue}; use eyre::Report; use itertools::Itertools; +use serde_json::json; use std::collections::{BTreeMap, HashSet}; #[derive(Default)] @@ -290,6 +292,16 @@ pub fn nextclade_run_one( let clade = nearest_node.clade(); + { + let nearest_node_clade = &clade; + println!( + "nearest | {:>5} | {:>15} | {:} |", + nearest_node_id, + nearest_node_clade.as_deref().unwrap_or(""), + nearest_node_name + ); + } + let clade_node_attr_descs = graph.data.meta.clade_node_attr_descs(); let clade_node_attrs = nearest_node.get_clade_node_attrs(clade_node_attr_descs); @@ -362,6 +374,30 @@ pub fn nextclade_run_one( .collect_vec() }); + { + let nuc_subs = substitutions.iter().map(ToString::to_string).collect_vec(); + + let private_nuc_subs = private_nuc_mutations + .private_substitutions + .iter() + .map(ToString::to_string) + .collect_vec(); + + println!( + "{}", + json_stringify( + &json!({ + "seq": { + "name": &seq_name, + "nuc_subs": nuc_subs, + "private_nuc_subs": private_nuc_subs, + } + }), + JsonPretty(true) + )? + ); + } + NextcladeResultWithGraph { clade, private_nuc_mutations, diff --git a/packages/nextclade/src/tree/split_muts.rs b/packages/nextclade/src/tree/split_muts.rs index b12204688..b6a9034e5 100644 --- a/packages/nextclade/src/tree/split_muts.rs +++ b/packages/nextclade/src/tree/split_muts.rs @@ -8,8 +8,9 @@ use eyre::{Report, WrapErr}; use itertools::{chain, Itertools}; use std::collections::BTreeMap; use std::fmt::Display; +use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct SplitMutsResult { pub left: BranchMutations, pub shared: BranchMutations, diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index 3b3cf6799..f9a50fd9d 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -6,6 +6,7 @@ use crate::analyze::nuc_del::NucDel; use crate::analyze::nuc_sub::NucSub; use crate::coord::range::NucRefGlobalRange; use crate::graph::node::{GraphNodeKey, Node}; +use crate::io::json::{json_stringify, JsonPretty}; use crate::tree::params::TreeBuilderParams; use crate::tree::split_muts::{difference_of_muts, split_muts, union_of_muts, SplitMutsResult}; use crate::tree::tree::{AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels}; @@ -15,6 +16,7 @@ use crate::types::outputs::NextcladeOutputs; use crate::utils::collections::concat_to_vec; use eyre::{Report, WrapErr}; use itertools::Itertools; +use serde_json::json; use std::collections::BTreeMap; pub fn graph_attach_new_nodes_in_place( @@ -89,6 +91,19 @@ pub fn graph_attach_new_node_in_place( finetune_nearest_node(graph, result.nearest_node_id, &mutations_seq)? }; + { + let finetuned_node_id = nearest_node_key; + let finetuned_node = graph.get_node(finetuned_node_id)?; + let finetuned_node_name = &finetuned_node.payload().name; + let finetuned_node_clade = &finetuned_node.payload().clade(); + println!( + "finetuned | {:>5} | {:>15} | {:} |", + finetuned_node_id, + finetuned_node_clade.as_deref().unwrap_or(""), + finetuned_node_name + ); + } + // add the new node at the fine-tuned position while accounting for shared mutations // on the branch leading to the nearest node. knit_into_graph(graph, nearest_node_key, result, &private_mutations, ref_seq_len, params)?; @@ -111,6 +126,7 @@ pub fn finetune_nearest_node( let mut best_node = graph.get_node(nearest_node_key)?; let mut private_mutations = seq_private_mutations.clone(); + let mut i = 0; loop { // Check how many mutations are shared with the branch leading to the current_best_node or any of its children let (candidate_node, candidate_split, shared_muts_score) = @@ -121,6 +137,76 @@ pub fn finetune_nearest_node( ) })?; + { + let best_key = &best_node.key(); + let best_name = &best_node.payload().name; + let best_clade = &best_node.payload().clade().unwrap(); + let best_div = &best_node.payload().node_attrs.div.unwrap(); + let best_nuc_muts = &best_node.payload().branch_attrs.mutations["nuc"]; + + let candidate_key = &candidate_node.key(); + let candidate_name = &candidate_node.payload().name; + let candidate_clade = &candidate_node.payload().clade().unwrap(); + let candidate_div = &candidate_node.payload().node_attrs.div.unwrap(); + let candidate_nuc_muts = &candidate_node.payload().branch_attrs.mutations["nuc"]; + + let left = &candidate_split + .left + .nuc_muts + .iter() + .map(ToString::to_string) + .collect_vec(); + + let shared = &candidate_split + .shared + .nuc_muts + .iter() + .map(ToString::to_string) + .collect_vec(); + + let right = &candidate_split + .right + .nuc_muts + .iter() + .map(ToString::to_string) + .collect_vec(); + + let private_muts = private_mutations.nuc_muts.iter().map(ToString::to_string).collect_vec(); + + println!("\n------------------------------------------"); + println!("finetune_nearest_node: iteration {i}"); + println!( + "{}", + json_stringify( + &json!({ + "best": { + "key": best_key, + "name": best_name, + "clade": best_clade, + "div": best_div, + "nuc_muts": best_nuc_muts + }, + "candidate": { + "key": candidate_key, + "name": candidate_name, + "clade": candidate_clade, + "div": candidate_div, + "nuc_muts": candidate_nuc_muts + }, + "comparison": { + "left": left, + "shared": shared, + "right": right, + "private_muts": &private_muts, + "shared_muts_score": &shared_muts_score + } + }), + JsonPretty(true) + )? + ); + println!("\n\n"); + } + // Check if the new candidate node is better than the current best let left_muts_score = score_nuc_muts(&candidate_split.left.nuc_muts, masked_ranges); match find_better_node_maybe(graph, best_node, candidate_node, shared_muts_score, left_muts_score) { @@ -135,6 +221,8 @@ pub fn finetune_nearest_node( best_node.payload().name ) })?; + + i += 1; } Ok((best_node.key(), private_mutations)) @@ -240,6 +328,21 @@ pub fn attach_to_internal_node( result: &NextcladeOutputs, divergence_new_node: f64, ) -> Result<(), Report> { + { + { + let attachment_node_id = nearest_node_id; + let attachment_node = graph.get_node(attachment_node_id)?; + let attachment_node_name = &attachment_node.payload().name; + let attachment_node_clade = &attachment_node.payload().clade(); + println!( + "attachment | {:>5} | {:>15} | {:} |", + attachment_node_id, + attachment_node_clade.as_deref().unwrap_or(""), + attachment_node_name + ); + } + } + //generated auspice payload for new node let mut new_graph_node = create_new_auspice_node(result, new_private_mutations, divergence_new_node); new_graph_node.tmp.private_mutations = new_private_mutations.clone(); From 4581a12275a4bc35599dd5fcf545cee0048c27a7 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Mon, 7 Oct 2024 17:54:26 +0200 Subject: [PATCH 2/7] fix: vote for split node's clade to prevent mismatch After the query node placement is adjusted during the [greedy tree building](https://docs.nextstrain.org/projects/nextclade/en/stable/user/algorithm/03-phylogenetic-placement.html#tree-building), sometimes the branch needs to be split and a new internal node inserted. Currently we copy the clade of this internal node from the attachment target node. However, this is not always correct and can lead to mismatch between clade of the query node and of the new internal node. Here I add a voting mechanism (simply a mode) between clades involved: of the parent, target and query nodes. --- packages/nextclade/src/tree/tree_builder.rs | 26 ++++++++++++++++++++- packages/nextclade/src/utils/mod.rs | 1 + packages/nextclade/src/utils/stats.rs | 13 +++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 packages/nextclade/src/utils/stats.rs diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index f9a50fd9d..1d05bad84 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -9,11 +9,14 @@ use crate::graph::node::{GraphNodeKey, Node}; use crate::io::json::{json_stringify, JsonPretty}; use crate::tree::params::TreeBuilderParams; use crate::tree::split_muts::{difference_of_muts, split_muts, union_of_muts, SplitMutsResult}; -use crate::tree::tree::{AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels}; +use crate::tree::tree::{ + AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels, TreeNodeAttr, +}; use crate::tree::tree_attach_new_nodes::create_new_auspice_node; use crate::tree::tree_preprocess::add_auspice_metadata_in_place; use crate::types::outputs::NextcladeOutputs; use crate::utils::collections::concat_to_vec; +use crate::utils::stats::mode; use eyre::{Report, WrapErr}; use itertools::Itertools; use serde_json::json; @@ -474,6 +477,9 @@ pub fn knit_into_graph( } set_branch_attrs_aa_labels(&mut new_internal_node); + // Vote for the most plausible clade + new_internal_node.node_attrs.clade_membership = vote_for_clade(graph, target_node, result); + new_internal_node.name = { let qry_name = &result.seq_name; let qry_index = &result.index; @@ -536,3 +542,21 @@ fn set_branch_attrs_aa_labels(node: &mut AuspiceGraphNodePayload) { }); } } + +// Vote for the most plausible clade for the new internal node +fn vote_for_clade( + graph: &AuspiceGraph, + target_node: &Node, + result: &NextcladeOutputs, +) -> Option { + let query_clade = &result.clade; + + let parent_node = &graph.parent_of(target_node); + let parent_clade = &parent_node.and_then(|node| node.payload().clade()); + // let sibling_clades = graph.iter_children_of(&parent_node).map(|child| child.payload().clade()); + + let target_clade = &target_node.payload().clade(); + + let possible_clades = [parent_clade, query_clade, target_clade].into_iter().flatten(); // exclude None + mode(possible_clades).map(|c| TreeNodeAttr::new(c)) +} diff --git a/packages/nextclade/src/utils/mod.rs b/packages/nextclade/src/utils/mod.rs index 1b6f01f18..276f73aa8 100644 --- a/packages/nextclade/src/utils/mod.rs +++ b/packages/nextclade/src/utils/mod.rs @@ -10,6 +10,7 @@ pub mod info; pub mod map; pub mod num; pub mod option; +pub mod stats; pub mod string; pub mod vec2d; pub mod wraparound; diff --git a/packages/nextclade/src/utils/stats.rs b/packages/nextclade/src/utils/stats.rs new file mode 100644 index 000000000..35b82db0c --- /dev/null +++ b/packages/nextclade/src/utils/stats.rs @@ -0,0 +1,13 @@ +use itertools::Itertools; +use std::hash::Hash; + +/// Calculate mode (the most frequently occurring element) of an iterator. +/// In case of a tie, the first occurrence is returned. Returns `None` if the iterator is empty. +pub fn mode(items: impl IntoIterator) -> Option { + items + .into_iter() + .counts() + .into_iter() + .max_by_key(|&(_, count)| count) + .map(|(item, _)| item) +} From 4e7974bd96ee7468e56eb2369f027353a9be6560 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Mon, 7 Oct 2024 20:06:50 +0200 Subject: [PATCH 3/7] fix: move label from target to internal node if target clade is selected --- packages/nextclade/src/tree/tree_builder.rs | 31 ++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index 1d05bad84..aff0d9438 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -467,7 +467,7 @@ pub fn knit_into_graph( // generate new internal node // add private mutations, divergence, name and branch attrs to new internal node let new_internal_node = { - let mut new_internal_node: AuspiceGraphNodePayload = target_node_auspice.clone(); + let mut new_internal_node: AuspiceGraphNodePayload = target_node_auspice.to_owned(); new_internal_node.tmp.private_mutations = muts_common_branch; new_internal_node.node_attrs.div = Some(divergence_middle_node); new_internal_node.branch_attrs.mutations = @@ -477,9 +477,6 @@ pub fn knit_into_graph( } set_branch_attrs_aa_labels(&mut new_internal_node); - // Vote for the most plausible clade - new_internal_node.node_attrs.clade_membership = vote_for_clade(graph, target_node, result); - new_internal_node.name = { let qry_name = &result.seq_name; let qry_index = &result.index; @@ -487,6 +484,23 @@ pub fn knit_into_graph( format!("nextclade__copy_of_{target_name}_for_placement_of_{qry_name}_#{qry_index}") }; + // Vote for the most plausible clade + let (clade, same_as_target) = vote_for_clade(graph, target_node, result); + new_internal_node.node_attrs.clade_membership = clade.as_deref().map(TreeNodeAttr::new); + + // If the target clade is selected, then move the clade label from target node to the internal node + if same_as_target { + let target_node = graph.get_node_mut(target_key)?; + if let Some(target_labels) = &mut target_node.payload_mut().branch_attrs.labels { + target_labels.clade = None; + new_internal_node + .branch_attrs + .labels + .get_or_insert(TreeBranchAttrsLabels::default()) + .clade = clade; + } + } + new_internal_node }; @@ -548,15 +562,18 @@ fn vote_for_clade( graph: &AuspiceGraph, target_node: &Node, result: &NextcladeOutputs, -) -> Option { +) -> (Option, bool) { let query_clade = &result.clade; let parent_node = &graph.parent_of(target_node); let parent_clade = &parent_node.and_then(|node| node.payload().clade()); - // let sibling_clades = graph.iter_children_of(&parent_node).map(|child| child.payload().clade()); let target_clade = &target_node.payload().clade(); let possible_clades = [parent_clade, query_clade, target_clade].into_iter().flatten(); // exclude None - mode(possible_clades).map(|c| TreeNodeAttr::new(c)) + let clade = mode(possible_clades).cloned(); + + let same_as_target = target_clade.is_some() && target_clade == &clade; + + (clade, same_as_target) } From 5ac6c709926d2bcb73daab31f1bebfe45a6e5c1d Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Mon, 7 Oct 2024 20:31:48 +0200 Subject: [PATCH 4/7] fix: only move label when the clade actually changes --- packages/nextclade/src/tree/tree_builder.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index aff0d9438..c626531c7 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -485,11 +485,11 @@ pub fn knit_into_graph( }; // Vote for the most plausible clade - let (clade, same_as_target) = vote_for_clade(graph, target_node, result); + let (clade, should_relabel) = vote_for_clade(graph, target_node, result); new_internal_node.node_attrs.clade_membership = clade.as_deref().map(TreeNodeAttr::new); - // If the target clade is selected, then move the clade label from target node to the internal node - if same_as_target { + // If decided, then move the clade label from target node to the internal node + if should_relabel { let target_node = graph.get_node_mut(target_key)?; if let Some(target_labels) = &mut target_node.payload_mut().branch_attrs.labels { target_labels.clade = None; @@ -573,7 +573,11 @@ fn vote_for_clade( let possible_clades = [parent_clade, query_clade, target_clade].into_iter().flatten(); // exclude None let clade = mode(possible_clades).cloned(); - let same_as_target = target_clade.is_some() && target_clade == &clade; + // We will need to change branch label if both: + // - clade transition happens from parent to the new node + // AND + // - when the target node's clade wins the vote + let should_relabel = (parent_clade != &clade) && (target_clade.is_some() && target_clade == &clade); - (clade, same_as_target) + (clade, should_relabel) } From f8b860bb11c4f407df4fa4acf9a8f0e38c6535e4 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 18 Oct 2024 04:54:07 +0200 Subject: [PATCH 5/7] Revert "add debug printouts to investigate clade mismatch against nearest node" This reverts commit ee51953de0d2d1b2f378ad8d450eef97c1e1afe5. --- .../nextclade/src/run/nextclade_run_one.rs | 36 ------ packages/nextclade/src/tree/split_muts.rs | 3 +- packages/nextclade/src/tree/tree_builder.rs | 103 ------------------ 3 files changed, 1 insertion(+), 141 deletions(-) diff --git a/packages/nextclade/src/run/nextclade_run_one.rs b/packages/nextclade/src/run/nextclade_run_one.rs index 25d731dfe..386467511 100644 --- a/packages/nextclade/src/run/nextclade_run_one.rs +++ b/packages/nextclade/src/run/nextclade_run_one.rs @@ -35,7 +35,6 @@ use crate::analyze::virus_properties::PhenotypeData; use crate::coord::coord_map_global::CoordMapGlobal; use crate::coord::range::AaRefRange; use crate::graph::node::GraphNodeKey; -use crate::io::json::{json_stringify, JsonPretty}; use crate::qc::qc_run::qc_run; use crate::run::nextclade_wasm::{AnalysisOutput, Nextclade}; use crate::translate::aa_alignment_ranges::{gather_aa_alignment_ranges, GatherAaAlignmentRangesResult}; @@ -47,7 +46,6 @@ use crate::tree::tree_find_nearest_node::graph_find_nearest_nodes; use crate::types::outputs::{NextcladeOutputs, PeptideWarning, PhenotypeValue}; use eyre::Report; use itertools::Itertools; -use serde_json::json; use std::collections::{BTreeMap, HashSet}; #[derive(Default)] @@ -292,16 +290,6 @@ pub fn nextclade_run_one( let clade = nearest_node.clade(); - { - let nearest_node_clade = &clade; - println!( - "nearest | {:>5} | {:>15} | {:} |", - nearest_node_id, - nearest_node_clade.as_deref().unwrap_or(""), - nearest_node_name - ); - } - let clade_node_attr_descs = graph.data.meta.clade_node_attr_descs(); let clade_node_attrs = nearest_node.get_clade_node_attrs(clade_node_attr_descs); @@ -374,30 +362,6 @@ pub fn nextclade_run_one( .collect_vec() }); - { - let nuc_subs = substitutions.iter().map(ToString::to_string).collect_vec(); - - let private_nuc_subs = private_nuc_mutations - .private_substitutions - .iter() - .map(ToString::to_string) - .collect_vec(); - - println!( - "{}", - json_stringify( - &json!({ - "seq": { - "name": &seq_name, - "nuc_subs": nuc_subs, - "private_nuc_subs": private_nuc_subs, - } - }), - JsonPretty(true) - )? - ); - } - NextcladeResultWithGraph { clade, private_nuc_mutations, diff --git a/packages/nextclade/src/tree/split_muts.rs b/packages/nextclade/src/tree/split_muts.rs index b6a9034e5..b12204688 100644 --- a/packages/nextclade/src/tree/split_muts.rs +++ b/packages/nextclade/src/tree/split_muts.rs @@ -8,9 +8,8 @@ use eyre::{Report, WrapErr}; use itertools::{chain, Itertools}; use std::collections::BTreeMap; use std::fmt::Display; -use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct SplitMutsResult { pub left: BranchMutations, pub shared: BranchMutations, diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index c626531c7..a291c3275 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -6,7 +6,6 @@ use crate::analyze::nuc_del::NucDel; use crate::analyze::nuc_sub::NucSub; use crate::coord::range::NucRefGlobalRange; use crate::graph::node::{GraphNodeKey, Node}; -use crate::io::json::{json_stringify, JsonPretty}; use crate::tree::params::TreeBuilderParams; use crate::tree::split_muts::{difference_of_muts, split_muts, union_of_muts, SplitMutsResult}; use crate::tree::tree::{ @@ -19,7 +18,6 @@ use crate::utils::collections::concat_to_vec; use crate::utils::stats::mode; use eyre::{Report, WrapErr}; use itertools::Itertools; -use serde_json::json; use std::collections::BTreeMap; pub fn graph_attach_new_nodes_in_place( @@ -94,19 +92,6 @@ pub fn graph_attach_new_node_in_place( finetune_nearest_node(graph, result.nearest_node_id, &mutations_seq)? }; - { - let finetuned_node_id = nearest_node_key; - let finetuned_node = graph.get_node(finetuned_node_id)?; - let finetuned_node_name = &finetuned_node.payload().name; - let finetuned_node_clade = &finetuned_node.payload().clade(); - println!( - "finetuned | {:>5} | {:>15} | {:} |", - finetuned_node_id, - finetuned_node_clade.as_deref().unwrap_or(""), - finetuned_node_name - ); - } - // add the new node at the fine-tuned position while accounting for shared mutations // on the branch leading to the nearest node. knit_into_graph(graph, nearest_node_key, result, &private_mutations, ref_seq_len, params)?; @@ -129,7 +114,6 @@ pub fn finetune_nearest_node( let mut best_node = graph.get_node(nearest_node_key)?; let mut private_mutations = seq_private_mutations.clone(); - let mut i = 0; loop { // Check how many mutations are shared with the branch leading to the current_best_node or any of its children let (candidate_node, candidate_split, shared_muts_score) = @@ -140,76 +124,6 @@ pub fn finetune_nearest_node( ) })?; - { - let best_key = &best_node.key(); - let best_name = &best_node.payload().name; - let best_clade = &best_node.payload().clade().unwrap(); - let best_div = &best_node.payload().node_attrs.div.unwrap(); - let best_nuc_muts = &best_node.payload().branch_attrs.mutations["nuc"]; - - let candidate_key = &candidate_node.key(); - let candidate_name = &candidate_node.payload().name; - let candidate_clade = &candidate_node.payload().clade().unwrap(); - let candidate_div = &candidate_node.payload().node_attrs.div.unwrap(); - let candidate_nuc_muts = &candidate_node.payload().branch_attrs.mutations["nuc"]; - - let left = &candidate_split - .left - .nuc_muts - .iter() - .map(ToString::to_string) - .collect_vec(); - - let shared = &candidate_split - .shared - .nuc_muts - .iter() - .map(ToString::to_string) - .collect_vec(); - - let right = &candidate_split - .right - .nuc_muts - .iter() - .map(ToString::to_string) - .collect_vec(); - - let private_muts = private_mutations.nuc_muts.iter().map(ToString::to_string).collect_vec(); - - println!("\n------------------------------------------"); - println!("finetune_nearest_node: iteration {i}"); - println!( - "{}", - json_stringify( - &json!({ - "best": { - "key": best_key, - "name": best_name, - "clade": best_clade, - "div": best_div, - "nuc_muts": best_nuc_muts - }, - "candidate": { - "key": candidate_key, - "name": candidate_name, - "clade": candidate_clade, - "div": candidate_div, - "nuc_muts": candidate_nuc_muts - }, - "comparison": { - "left": left, - "shared": shared, - "right": right, - "private_muts": &private_muts, - "shared_muts_score": &shared_muts_score - } - }), - JsonPretty(true) - )? - ); - println!("\n\n"); - } - // Check if the new candidate node is better than the current best let left_muts_score = score_nuc_muts(&candidate_split.left.nuc_muts, masked_ranges); match find_better_node_maybe(graph, best_node, candidate_node, shared_muts_score, left_muts_score) { @@ -224,8 +138,6 @@ pub fn finetune_nearest_node( best_node.payload().name ) })?; - - i += 1; } Ok((best_node.key(), private_mutations)) @@ -331,21 +243,6 @@ pub fn attach_to_internal_node( result: &NextcladeOutputs, divergence_new_node: f64, ) -> Result<(), Report> { - { - { - let attachment_node_id = nearest_node_id; - let attachment_node = graph.get_node(attachment_node_id)?; - let attachment_node_name = &attachment_node.payload().name; - let attachment_node_clade = &attachment_node.payload().clade(); - println!( - "attachment | {:>5} | {:>15} | {:} |", - attachment_node_id, - attachment_node_clade.as_deref().unwrap_or(""), - attachment_node_name - ); - } - } - //generated auspice payload for new node let mut new_graph_node = create_new_auspice_node(result, new_private_mutations, divergence_new_node); new_graph_node.tmp.private_mutations = new_private_mutations.clone(); From 592c887341bcf0af8a0728a5f1199e3d66e03f10 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 18 Oct 2024 17:35:07 +0200 Subject: [PATCH 6/7] feat: vote on clade-like attributes too This just extends the voting to not only clade_membership clade, but to other clade-like attributes which might be present on the tree (in `.meta.extensions.nextclade`) --- packages/nextclade/src/tree/tree.rs | 12 ++++++++ packages/nextclade/src/tree/tree_builder.rs | 32 ++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/packages/nextclade/src/tree/tree.rs b/packages/nextclade/src/tree/tree.rs index fc6676c7e..8330bd8f9 100644 --- a/packages/nextclade/src/tree/tree.rs +++ b/packages/nextclade/src/tree/tree.rs @@ -282,6 +282,11 @@ impl AuspiceGraphNodePayload { .and_then(|val| val.as_str()) } + /// Sets clade-like node attribute + pub fn set_clade_node_attr(&mut self, key: impl AsRef, value: String) { + self.node_attrs.other[key.as_ref()] = serde_json::Value::String(value); + } + /// Extracts clade-like node attributes, given a list of key descriptions pub fn get_clade_node_attrs(&self, clade_node_attr_descs: &[CladeNodeAttrKeyDesc]) -> BTreeMap { clade_node_attr_descs @@ -292,6 +297,13 @@ impl AuspiceGraphNodePayload { }) .collect() } + + /// Sets clade-like node attributes. Inserts if key does not exist and overwrites existing. + pub fn set_clade_node_attrs(&mut self, attrs: BTreeMap) { + for (key, val) in attrs { + self.set_clade_node_attr(key, val); + } + } } impl GraphNode for AuspiceGraphNodePayload {} diff --git a/packages/nextclade/src/tree/tree_builder.rs b/packages/nextclade/src/tree/tree_builder.rs index a291c3275..6c8e0179a 100644 --- a/packages/nextclade/src/tree/tree_builder.rs +++ b/packages/nextclade/src/tree/tree_builder.rs @@ -17,7 +17,7 @@ use crate::types::outputs::NextcladeOutputs; use crate::utils::collections::concat_to_vec; use crate::utils::stats::mode; use eyre::{Report, WrapErr}; -use itertools::Itertools; +use itertools::{chain, Itertools}; use std::collections::BTreeMap; pub fn graph_attach_new_nodes_in_place( @@ -385,6 +385,10 @@ pub fn knit_into_graph( let (clade, should_relabel) = vote_for_clade(graph, target_node, result); new_internal_node.node_attrs.clade_membership = clade.as_deref().map(TreeNodeAttr::new); + // Vote for the most plausible clade-like attrs + let clade_attrs = vote_for_clade_like_attrs(graph, target_node, result); + new_internal_node.set_clade_node_attrs(clade_attrs); + // If decided, then move the clade label from target node to the internal node if should_relabel { let target_node = graph.get_node_mut(target_key)?; @@ -478,3 +482,29 @@ fn vote_for_clade( (clade, should_relabel) } + +// Vote for the most plausible clade-like attribute values, for the new internal node +fn vote_for_clade_like_attrs( + graph: &AuspiceGraph, + target_node: &Node, + result: &NextcladeOutputs, +) -> BTreeMap { + let attr_descs = graph.data.meta.clade_node_attr_descs(); + + let query_attrs: &BTreeMap = &result.custom_node_attributes; + + let parent_node = &graph.parent_of(target_node); + let parent_attrs: &BTreeMap = &parent_node + .map(|node| node.payload().get_clade_node_attrs(attr_descs)) + .unwrap_or_default(); + + let target_attrs: &BTreeMap = &target_node.payload().get_clade_node_attrs(attr_descs); + + chain!(query_attrs.iter(), parent_attrs.iter(), target_attrs.iter()) + .into_group_map() + .into_iter() + .filter_map(|(key, grouped_values)| { + mode(grouped_values.into_iter().cloned()).map(|most_common| (key.clone(), most_common)) + }) + .collect() +} From fc282b34af24a53784aaf4af3bda075fe68d0f4b Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Tue, 29 Oct 2024 09:22:12 +0100 Subject: [PATCH 7/7] fix: set node clade-like attrs correctly It was missing the `.value` --- packages/nextclade/src/tree/tree.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/nextclade/src/tree/tree.rs b/packages/nextclade/src/tree/tree.rs index 8330bd8f9..ba45891ff 100644 --- a/packages/nextclade/src/tree/tree.rs +++ b/packages/nextclade/src/tree/tree.rs @@ -18,6 +18,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::collections::BTreeMap; use std::path::Path; use std::slice::Iter; +use serde_json::json; use traversal::{Bft, DftPost, DftPre}; use validator::Validate; @@ -283,8 +284,8 @@ impl AuspiceGraphNodePayload { } /// Sets clade-like node attribute - pub fn set_clade_node_attr(&mut self, key: impl AsRef, value: String) { - self.node_attrs.other[key.as_ref()] = serde_json::Value::String(value); + pub fn set_clade_node_attr(&mut self, key: impl AsRef, value: impl AsRef) { + self.node_attrs.other[key.as_ref()] = json!({ "value": value.as_ref() }); } /// Extracts clade-like node attributes, given a list of key descriptions