From 9fa2b6d87d29a9a029935dd32287471322b1a24e Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Tue, 15 Oct 2024 12:15:27 -0400 Subject: [PATCH 1/4] Lineage poc --- migrations/core/01-initial/up.sql | 7 ++ src/models/block_group.rs | 110 ++++++++++++++++++++++++++++++ src/updates/vcf.rs | 35 ++++++++++ 3 files changed, 152 insertions(+) diff --git a/migrations/core/01-initial/up.sql b/migrations/core/01-initial/up.sql index 2d5c592..7d359d3 100644 --- a/migrations/core/01-initial/up.sql +++ b/migrations/core/01-initial/up.sql @@ -38,6 +38,13 @@ CREATE TABLE block_group ( CREATE UNIQUE INDEX block_group_uidx ON block_group(collection_name, sample_name, name) WHERE sample_name is not null; CREATE UNIQUE INDEX block_group_null_sample_uidx ON block_group(collection_name, name) WHERE sample_name is null; +CREATE TABLE block_group_tree ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + parent_id INTEGER NOT NULL, + child_id INTEGER NOT NULL +) STRICT; +CREATE UNIQUE INDEX block_group_tree_uidx ON block_group_tree(parent_id, child_id); + CREATE TABLE path ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, block_group_id INTEGER NOT NULL, diff --git a/src/models/block_group.rs b/src/models/block_group.rs index d118bcb..ad303be 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -1,6 +1,8 @@ use std::collections::{HashMap, HashSet}; use intervaltree::IntervalTree; +use petgraph::graphmap::DiGraphMap; +use petgraph::visit::{depth_first_search, Dfs, DfsEvent, IntoEdgesDirected, Reversed}; use petgraph::Direction; use rusqlite::{params_from_iter, types::Value as SQLValue, Connection}; use serde::{Deserialize, Serialize}; @@ -159,6 +161,68 @@ impl BlockGroup { objs } + pub fn add_relation(conn: &Connection, source_block_group_id: i64, target_block_group_id: i64) { + let query = "INSERT OR IGNORE INTO block_group_tree (parent_id, child_id) values (?1, ?2)"; + let mut stmt = conn.prepare(query).unwrap(); + stmt.execute((source_block_group_id, target_block_group_id)) + .unwrap(); + } + + pub fn get_children(conn: &Connection, block_group_id: i64) -> Vec { + let query = "select child_id from block_group_tree where parent_id = ?1;"; + let mut stmt = conn.prepare(query).unwrap(); + let mut children = vec![]; + for row in stmt.query_map((block_group_id,), |row| row.get(0)).unwrap() { + children.push(row.unwrap()); + } + children + } + + pub fn get_ancestors(conn: &Connection, block_group_id: i64) -> Vec> { + let query = "WITH RECURSIVE ancestors(parent_id, child_id, depth) AS ( \ + VALUES(?1, NULL, 0) UNION \ + select bgt.parent_id, bgt.child_id, ancestors.depth+1 from block_group_tree bgt join ancestors ON bgt.child_id=ancestors.parent_id \ + ) SELECT parent_id, child_id, depth from ancestors where child_id is not null ORDER BY depth, parent_id DESC;"; + let mut stmt = conn.prepare(query).unwrap(); + + let mut graph: DiGraphMap = DiGraphMap::new(); + + for row in stmt + .query_map((block_group_id,), |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + }) + .unwrap() + { + let (parent_id, child_id, depth): (i64, i64, i64) = row.unwrap(); + graph.add_node(parent_id); + graph.add_node(child_id); + graph.add_edge(parent_id, child_id, ()); + } + + let rev = Reversed(&graph); + let mut paths = vec![]; + let mut current_path = vec![]; + depth_first_search(&rev, Some(block_group_id), |event| { + if let DfsEvent::TreeEdge(u, v) = event { + current_path.push(v); + if rev.edges_directed(v, Direction::Outgoing).next().is_none() { + paths.push(current_path.clone()); + current_path.clear(); + } + } else if let DfsEvent::CrossForwardEdge(u, v) = event { + if u != block_group_id { + current_path.push(u); + } + if rev.edges_directed(v, Direction::Outgoing).next().is_none() { + current_path.push(v); + paths.push(current_path.clone()); + current_path.clear(); + } + } + }); + paths + } + pub fn get_by_id(conn: &Connection, id: i64) -> BlockGroup { let query = "SELECT * FROM block_group WHERE id = ?1"; let mut stmt = conn.prepare(query).unwrap(); @@ -198,6 +262,8 @@ impl BlockGroup { .collect::>(); Path::create(conn, &path.name, target_block_group_id, &edge_ids); } + + BlockGroup::add_relation(conn, source_block_group_id, target_block_group_id); } pub fn get_or_create_sample_block_group( @@ -270,6 +336,14 @@ impl BlockGroup { } } + pub fn get_graph(conn: &Connection, block_group_id: i64) -> DiGraphMap { + let mut edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id); + let (blocks, boundary_edges) = Edge::blocks_from_edges(conn, &edges); + edges.extend(boundary_edges); + let (graph, _) = Edge::build_graph(&edges, &blocks); + graph + } + pub fn get_all_sequences(conn: &Connection, block_group_id: i64) -> HashSet { let mut edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id); let (blocks, boundary_edges) = Edge::blocks_from_edges(conn, &edges); @@ -1311,4 +1385,40 @@ mod tests { ]) ); } + + #[test] + fn test_adds_relation_on_clone() { + let conn = &get_connection(None); + let (block_group_id, path) = setup_block_group(conn); + let new_bg = BlockGroup::create(conn, "test", None, "test2"); + let new_bg_id = new_bg.id; + BlockGroup::clone(conn, block_group_id, new_bg_id); + assert_eq!( + BlockGroup::get_children(conn, block_group_id), + vec![new_bg_id] + ); + } + + #[test] + fn test_finds_all_ancestors() { + let conn = &get_connection("rec.db"); + let collection = Collection::create(conn, "test"); + let bg1 = BlockGroup::create(conn, "test", None, "test1"); + let bg2 = BlockGroup::create(conn, "test", None, "test2"); + let bg3 = BlockGroup::create(conn, "test", None, "test3"); + let bg4 = BlockGroup::create(conn, "test", None, "test4"); + BlockGroup::add_relation(conn, bg1.id, bg2.id); + BlockGroup::add_relation(conn, bg2.id, bg3.id); + BlockGroup::add_relation(conn, bg3.id, bg4.id); + BlockGroup::add_relation(conn, bg1.id, bg4.id); + BlockGroup::add_relation(conn, bg1.id, bg3.id); + assert_eq!( + BlockGroup::get_ancestors(conn, bg4.id), + vec![ + vec![bg3.id, bg2.id, bg1.id], + vec![bg3.id, bg1.id], + vec![bg1.id] + ] + ); + } } diff --git a/src/updates/vcf.rs b/src/updates/vcf.rs index 9247856..21ee6cd 100644 --- a/src/updates/vcf.rs +++ b/src/updates/vcf.rs @@ -411,6 +411,41 @@ mod tests { ); } + #[test] + fn test_diff_graph() { + setup_gen_dir(); + let mut vcf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + vcf_path.push("fixtures/simple.vcf"); + let mut fasta_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + fasta_path.push("fixtures/simple.fa"); + let conn = &get_connection(None); + let db_uuid = metadata::get_db_uuid(conn); + let op_conn = &get_operation_connection(None); + setup_db(op_conn, &db_uuid); + + let collection = "test".to_string(); + + import_fasta( + &fasta_path.to_str().unwrap().to_string(), + &collection, + false, + conn, + op_conn, + ); + update_with_vcf( + &vcf_path.to_str().unwrap().to_string(), + &collection, + "".to_string(), + "".to_string(), + conn, + op_conn, + ); + let bg1 = BlockGroup::get_graph(conn, 1); + let bg2 = BlockGroup::get_graph(conn, 2); + println!("1 {bg1:?}"); + println!("2 {bg2:?}"); + } + #[test] fn test_update_fasta_with_vcf_custom_genotype() { setup_gen_dir(); From 7f4e16194958693b317bb8647f7d97a151c6167e Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Wed, 16 Oct 2024 13:16:38 -0400 Subject: [PATCH 2/4] Remove unused test --- src/updates/vcf.rs | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/updates/vcf.rs b/src/updates/vcf.rs index 21ee6cd..9247856 100644 --- a/src/updates/vcf.rs +++ b/src/updates/vcf.rs @@ -411,41 +411,6 @@ mod tests { ); } - #[test] - fn test_diff_graph() { - setup_gen_dir(); - let mut vcf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - vcf_path.push("fixtures/simple.vcf"); - let mut fasta_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - fasta_path.push("fixtures/simple.fa"); - let conn = &get_connection(None); - let db_uuid = metadata::get_db_uuid(conn); - let op_conn = &get_operation_connection(None); - setup_db(op_conn, &db_uuid); - - let collection = "test".to_string(); - - import_fasta( - &fasta_path.to_str().unwrap().to_string(), - &collection, - false, - conn, - op_conn, - ); - update_with_vcf( - &vcf_path.to_str().unwrap().to_string(), - &collection, - "".to_string(), - "".to_string(), - conn, - op_conn, - ); - let bg1 = BlockGroup::get_graph(conn, 1); - let bg2 = BlockGroup::get_graph(conn, 2); - println!("1 {bg1:?}"); - println!("2 {bg2:?}"); - } - #[test] fn test_update_fasta_with_vcf_custom_genotype() { setup_gen_dir(); From 5a9ce5323b7904b1ac4e8a4e39ca910d9b6e59b5 Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Wed, 16 Oct 2024 13:19:17 -0400 Subject: [PATCH 3/4] parent lookup --- src/models/block_group.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/models/block_group.rs b/src/models/block_group.rs index ad303be..5458018 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -178,6 +178,16 @@ impl BlockGroup { children } + pub fn get_parents(conn: &Connection, block_group_id: i64) -> Vec { + let query = "select parent_id from block_group_tree where child_id = ?1;"; + let mut stmt = conn.prepare(query).unwrap(); + let mut ids = vec![]; + for row in stmt.query_map((block_group_id,), |row| row.get(0)).unwrap() { + ids.push(row.unwrap()); + } + ids + } + pub fn get_ancestors(conn: &Connection, block_group_id: i64) -> Vec> { let query = "WITH RECURSIVE ancestors(parent_id, child_id, depth) AS ( \ VALUES(?1, NULL, 0) UNION \ @@ -1397,11 +1407,15 @@ mod tests { BlockGroup::get_children(conn, block_group_id), vec![new_bg_id] ); + assert_eq!( + BlockGroup::get_parents(conn, new_bg_id), + vec![block_group_id] + ); } #[test] fn test_finds_all_ancestors() { - let conn = &get_connection("rec.db"); + let conn = &get_connection(None); let collection = Collection::create(conn, "test"); let bg1 = BlockGroup::create(conn, "test", None, "test1"); let bg2 = BlockGroup::create(conn, "test", None, "test2"); From 6654f3dc9195515d2bac7c297912aabd3e5080ca Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Thu, 17 Oct 2024 09:03:19 -0400 Subject: [PATCH 4/4] Rename --- migrations/core/01-initial/up.sql | 4 ++-- src/models/block_group.rs | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/migrations/core/01-initial/up.sql b/migrations/core/01-initial/up.sql index 7d359d3..8848b1d 100644 --- a/migrations/core/01-initial/up.sql +++ b/migrations/core/01-initial/up.sql @@ -38,12 +38,12 @@ CREATE TABLE block_group ( CREATE UNIQUE INDEX block_group_uidx ON block_group(collection_name, sample_name, name) WHERE sample_name is not null; CREATE UNIQUE INDEX block_group_null_sample_uidx ON block_group(collection_name, name) WHERE sample_name is null; -CREATE TABLE block_group_tree ( +CREATE TABLE block_group_lineage ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, parent_id INTEGER NOT NULL, child_id INTEGER NOT NULL ) STRICT; -CREATE UNIQUE INDEX block_group_tree_uidx ON block_group_tree(parent_id, child_id); +CREATE UNIQUE INDEX block_group_lineage_uidx ON block_group_lineage(parent_id, child_id); CREATE TABLE path ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, diff --git a/src/models/block_group.rs b/src/models/block_group.rs index 5458018..68edc67 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -162,14 +162,15 @@ impl BlockGroup { } pub fn add_relation(conn: &Connection, source_block_group_id: i64, target_block_group_id: i64) { - let query = "INSERT OR IGNORE INTO block_group_tree (parent_id, child_id) values (?1, ?2)"; + let query = + "INSERT OR IGNORE INTO block_group_lineage (parent_id, child_id) values (?1, ?2)"; let mut stmt = conn.prepare(query).unwrap(); stmt.execute((source_block_group_id, target_block_group_id)) .unwrap(); } pub fn get_children(conn: &Connection, block_group_id: i64) -> Vec { - let query = "select child_id from block_group_tree where parent_id = ?1;"; + let query = "select child_id from block_group_lineage where parent_id = ?1;"; let mut stmt = conn.prepare(query).unwrap(); let mut children = vec![]; for row in stmt.query_map((block_group_id,), |row| row.get(0)).unwrap() { @@ -179,7 +180,7 @@ impl BlockGroup { } pub fn get_parents(conn: &Connection, block_group_id: i64) -> Vec { - let query = "select parent_id from block_group_tree where child_id = ?1;"; + let query = "select parent_id from block_group_lineage where child_id = ?1;"; let mut stmt = conn.prepare(query).unwrap(); let mut ids = vec![]; for row in stmt.query_map((block_group_id,), |row| row.get(0)).unwrap() { @@ -191,7 +192,7 @@ impl BlockGroup { pub fn get_ancestors(conn: &Connection, block_group_id: i64) -> Vec> { let query = "WITH RECURSIVE ancestors(parent_id, child_id, depth) AS ( \ VALUES(?1, NULL, 0) UNION \ - select bgt.parent_id, bgt.child_id, ancestors.depth+1 from block_group_tree bgt join ancestors ON bgt.child_id=ancestors.parent_id \ + select bgt.parent_id, bgt.child_id, ancestors.depth+1 from block_group_lineage bgt join ancestors ON bgt.child_id=ancestors.parent_id \ ) SELECT parent_id, child_id, depth from ancestors where child_id is not null ORDER BY depth, parent_id DESC;"; let mut stmt = conn.prepare(query).unwrap();