Skip to content

Commit

Permalink
fix: vote for split node's clade to prevent mismatch
Browse files Browse the repository at this point in the history
After the query node placement is adjusted during the [greedy tree building](https://docs.nextstrain.org/projects/nextclade/en/stable/user/algorithm/03-phylogenetic-placement.html#tree-building), sometimes the branch needs to be split and a new internal node inserted.

Currently we copy the clade of this internal node from the attachment target node. However, this is not always correct and can lead to mismatch between clade of the query node and of the new internal node.

Here I add a voting mechanism (simply a mode) between clades involved: of the parent, target and query nodes.
  • Loading branch information
ivan-aksamentov committed Oct 7, 2024
1 parent ee51953 commit 4581a12
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 1 deletion.
26 changes: 25 additions & 1 deletion packages/nextclade/src/tree/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ use crate::graph::node::{GraphNodeKey, Node};
use crate::io::json::{json_stringify, JsonPretty};
use crate::tree::params::TreeBuilderParams;
use crate::tree::split_muts::{difference_of_muts, split_muts, union_of_muts, SplitMutsResult};
use crate::tree::tree::{AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels};
use crate::tree::tree::{
AuspiceGraph, AuspiceGraphEdgePayload, AuspiceGraphNodePayload, TreeBranchAttrsLabels, TreeNodeAttr,
};
use crate::tree::tree_attach_new_nodes::create_new_auspice_node;
use crate::tree::tree_preprocess::add_auspice_metadata_in_place;
use crate::types::outputs::NextcladeOutputs;
use crate::utils::collections::concat_to_vec;
use crate::utils::stats::mode;
use eyre::{Report, WrapErr};
use itertools::Itertools;
use serde_json::json;
Expand Down Expand Up @@ -474,6 +477,9 @@ pub fn knit_into_graph(
}
set_branch_attrs_aa_labels(&mut new_internal_node);

// Vote for the most plausible clade
new_internal_node.node_attrs.clade_membership = vote_for_clade(graph, target_node, result);

new_internal_node.name = {
let qry_name = &result.seq_name;
let qry_index = &result.index;
Expand Down Expand Up @@ -536,3 +542,21 @@ fn set_branch_attrs_aa_labels(node: &mut AuspiceGraphNodePayload) {
});
}
}

// Vote for the most plausible clade for the new internal node
fn vote_for_clade(
graph: &AuspiceGraph,
target_node: &Node<AuspiceGraphNodePayload>,
result: &NextcladeOutputs,
) -> Option<TreeNodeAttr> {
let query_clade = &result.clade;

let parent_node = &graph.parent_of(target_node);
let parent_clade = &parent_node.and_then(|node| node.payload().clade());
// let sibling_clades = graph.iter_children_of(&parent_node).map(|child| child.payload().clade());

let target_clade = &target_node.payload().clade();

let possible_clades = [parent_clade, query_clade, target_clade].into_iter().flatten(); // exclude None
mode(possible_clades).map(|c| TreeNodeAttr::new(c))
}
1 change: 1 addition & 0 deletions packages/nextclade/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub mod info;
pub mod map;
pub mod num;
pub mod option;
pub mod stats;
pub mod string;
pub mod vec2d;
pub mod wraparound;
Expand Down
13 changes: 13 additions & 0 deletions packages/nextclade/src/utils/stats.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use itertools::Itertools;
use std::hash::Hash;

/// Calculate mode (the most frequently occurring element) of an iterator.
/// In case of a tie, the first occurrence is returned. Returns `None` if the iterator is empty.
pub fn mode<T: Hash + Eq + Clone>(items: impl IntoIterator<Item = T>) -> Option<T> {
items
.into_iter()
.counts()
.into_iter()
.max_by_key(|&(_, count)| count)
.map(|(item, _)| item)
}

0 comments on commit 4581a12

Please sign in to comment.