diff --git a/migrations/01-initial/up.sql b/migrations/01-initial/up.sql index 567bf21..1863b07 100644 --- a/migrations/01-initial/up.sql +++ b/migrations/01-initial/up.sql @@ -56,13 +56,13 @@ CREATE TABLE path ( ); CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name); -CREATE TABLE path_edges ( +CREATE TABLE path_blocks ( id INTEGER PRIMARY KEY NOT NULL, path_id INTEGER NOT NULL, - source_edge_id INTEGER, - target_edge_id INTEGER, - FOREIGN KEY(source_edge_id) REFERENCES edges(id), - FOREIGN KEY(target_edge_id) REFERENCES edges(id), + source_block_id INTEGER, + target_block_id INTEGER, + FOREIGN KEY(source_block_id) REFERENCES block(id), + FOREIGN KEY(target_block_id) REFERENCES block(id), FOREIGN KEY(path_id) REFERENCES path(id) ); -CREATE UNIQUE INDEX path_edge_uidx ON path_edges(path_id, source_edge_id, target_edge_id); \ No newline at end of file +CREATE UNIQUE INDEX path_blocks_uidx ON path_blocks(path_id, source_block_id, target_block_id); \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 74da838..bc277f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -80,14 +80,9 @@ fn import_fasta(fasta: &String, name: &String, shallow: bool, conn: &mut Connect (sequence.len() as i32), &"1".to_string(), ); - let edge_1 = Edge::create(conn, None, Some(block.id), 0, 0); - let edge_2 = Edge::create(conn, Some(block.id), None, 0, 0); - Path::create( - conn, - record.id(), - block_group.id, - vec![edge_1.id, edge_2.id], - ); + Edge::create(conn, None, Some(block.id), 0, 0); + Edge::create(conn, Some(block.id), None, 0, 0); + Path::create(conn, record.id(), block_group.id, vec![block.id]); } println!("Created it"); } else { diff --git a/src/models.rs b/src/models.rs index a62f0e9..b8f57de 100644 --- a/src/models.rs +++ b/src/models.rs @@ -17,7 +17,7 @@ pub mod sequence; use crate::models; use crate::models::block::Block; use crate::models::edge::Edge; -use crate::models::path::{all_simple_paths, Path, PathEdge}; +use crate::models::path::{all_simple_paths, Path, PathBlock}; use crate::models::sequence::Sequence; #[derive(Debug)] @@ -158,10 +158,8 @@ impl BlockGroup { .join(", "); let mut it = stmt.query([block_keys]).unwrap(); let mut row = it.next().unwrap(); - let mut edge_map = HashMap::new(); while row.is_some() { let edge = row.unwrap(); - let edge_id: i32 = edge.get(0).unwrap(); let source_id: Option = edge.get(1).unwrap(); let target_id: Option = edge.get(2).unwrap(); let chrom_index = edge.get(3).unwrap(); @@ -198,7 +196,6 @@ impl BlockGroup { } else { panic!("no source and target specified."); } - edge_map.insert(edge_id, new_edge.id); row = it.next().unwrap(); } @@ -210,11 +207,11 @@ impl BlockGroup { ); for path in existing_paths { - let mut new_edges = vec![]; - for edge in path.edges { - new_edges.push(*edge_map.get(&edge).unwrap()); + let mut new_blocks = vec![]; + for block in path.blocks { + new_blocks.push(*block_map.get(&block).unwrap()); } - Path::create(conn, &path.name, target_block_group_id, new_edges); + Path::create(conn, &path.name, target_block_group_id, new_blocks); } } @@ -356,7 +353,7 @@ impl BlockGroup { // that means we have an edge with the chromosome index, that connects our start/end coordinates with the new block id let path = Path::get(conn, path_id); - let graph = PathEdge::edges_to_graph(conn, path.id); + let graph = PathBlock::blocks_to_graph(conn, path.id); println!("{path:?} {graph:?}"); let query = format!("SELECT id, sequence_hash, block_group_id, start, end, strand from block where id in ({block_ids})", block_ids = graph.nodes().map(|k| format!("{k}")).collect::>().join(",")); let mut stmt = conn.prepare(&query).unwrap(); @@ -380,21 +377,12 @@ impl BlockGroup { row = it.next().unwrap(); } // TODO: probably don't need the graph, just get vector of source_ids. - let mut start_node = -1; - let start_edge = Edge::get(conn, path.edges[0]); - if let Some(value) = start_edge.source_id { - start_node = value - } else if let Some(value) = start_edge.target_id { - start_node = value - } - let mut dfs = Dfs::new(&graph, start_node as u32); let mut path_start = 0; let mut path_end = 0; let mut new_edges = vec![]; let mut previous_block: Option<&Block> = None; - println!("{blocks:?}"); - while let Some(nx) = dfs.next(&graph) { - let block = blocks.get(&(nx as i32)).unwrap(); + for block_id in &path.blocks { + let block = blocks.get(block_id).unwrap(); let block_length = (block.end - block.start); path_end += block_length; @@ -406,9 +394,14 @@ impl BlockGroup { // our range is fully contained w/in the block // |----block------| // |----range---| - let (left_block, right_block) = - Block::split(conn, block, start - path_start, chromosome_index, phased) - .unwrap(); + let (left_block, right_block) = Block::split( + conn, + block, + block.start + start - path_start, + chromosome_index, + phased, + ) + .unwrap(); Block::delete(conn, block.id); // let left_block = Block::create( // conn, @@ -435,9 +428,14 @@ impl BlockGroup { // our range is overlapping the end of the block // |----block---| // |----range---| - let (left_block, right_block) = - Block::split(conn, block, start - path_start, chromosome_index, phased) - .unwrap(); + let (left_block, right_block) = Block::split( + conn, + block, + block.start + start - path_start, + chromosome_index, + phased, + ) + .unwrap(); Block::delete(conn, block.id); // let left_block = Block::create( // conn, @@ -457,8 +455,14 @@ impl BlockGroup { // our range is overlapping the beginning of the block // |----block---| // |----range---| - let (left_block, right_block) = - Block::split(conn, block, end - path_start, chromosome_index, phased).unwrap(); + let (left_block, right_block) = Block::split( + conn, + block, + block.start + end - path_start, + chromosome_index, + phased, + ) + .unwrap(); Block::delete(conn, block.id); // let right_block = Block::create( // conn, @@ -493,8 +497,6 @@ impl BlockGroup { previous_block = Some(block); } - println!("change is {path:?} {graph:?} {blocks:?} {new_edges:?}"); - for new_edge in new_edges { Edge::create(conn, new_edge.0, new_edge.1, chromosome_index, phased); } @@ -536,7 +538,7 @@ mod tests { conn, "chr1", block_group.id, - vec![edge_0.id, edge_1.id, edge_2.id, edge_3.id, edge_4.id], + vec![a_block.id, t_block.id, c_block.id, g_block.id], ); (block_group.id, path.id) } @@ -587,7 +589,8 @@ mod tests { HashSet::from_iter(vec![ "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), - "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string() + "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(), + "AAAAAAANNNNTTTTGGGGGGGGG".to_string(), ]) ) } diff --git a/src/models/block.rs b/src/models/block.rs index e140a53..bc93891 100644 --- a/src/models/block.rs +++ b/src/models/block.rs @@ -2,6 +2,7 @@ use crate::models::Path; use rusqlite::{params_from_iter, types::Value, Connection}; use crate::models::edge::{Edge, UpdatedEdge}; +use crate::models::path::PathBlock; #[derive(Debug)] pub struct Block { @@ -131,7 +132,12 @@ impl Block { phased: i32, ) -> Option<(Block, Block)> { if coordinate < block.start || coordinate >= block.end { - println!("Coordinate is out of block bounds"); + println!( + "Coordinate {coordinate} is out of block {block_id} bounds ({start}, {end})", + start = block.start, + end = block.end, + block_id = block.id + ); return None; } let new_left_block = Block::create( @@ -183,6 +189,41 @@ impl Block { Edge::bulk_update(conn, replacement_edges); + // replace paths using this block + let impacted_path_blocks = PathBlock::query( + conn, + "select * from path_blocks where source_block_id = ?1 OR target_block_id = ?1", + vec![Value::from(block.id)], + ); + + for path_block in impacted_path_blocks { + let path_id = path_block.path_id; + PathBlock::create( + conn, + path_id, + Some(new_left_block.id), + Some(new_right_block.id), + ); + if let Some(source_block_id) = path_block.source_block_id { + if source_block_id == block.id { + PathBlock::update( + conn, + "update path_blocks set source_block_id = ?2 where id = ?1", + vec![Value::from(path_block.id), Value::from(new_right_block.id)], + ); + } + } + if let Some(target_block_id) = path_block.target_block_id { + if target_block_id == block.id { + PathBlock::update( + conn, + "update path_blocks set target_block_id = ?2 where id = ?1", + vec![Value::from(path_block.id), Value::from(new_left_block.id)], + ); + } + } + } + // TODO: Delete existing block? -- leave to caller atm Some((new_left_block, new_right_block)) diff --git a/src/models/path.rs b/src/models/path.rs index f5fb0ae..b1d06fe 100644 --- a/src/models/path.rs +++ b/src/models/path.rs @@ -1,3 +1,4 @@ +use crate::models::edge::Edge; use petgraph::graphmap::DiGraphMap; use petgraph::prelude::Dfs; use petgraph::visit::{IntoNeighborsDirected, NodeCount}; @@ -12,11 +13,11 @@ pub struct Path { pub id: i32, pub name: String, pub block_group_id: i32, - pub edges: Vec, + pub blocks: Vec, } impl Path { - pub fn create(conn: &Connection, name: &str, block_group_id: i32, edges: Vec) -> Path { + pub fn create(conn: &Connection, name: &str, block_group_id: i32, blocks: Vec) -> Path { let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (id)"; let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt @@ -25,18 +26,18 @@ impl Path { id: row.get(0)?, name: name.to_string(), block_group_id, - edges: edges.clone(), + blocks: blocks.clone(), }) }) .unwrap(); let path = rows.next().unwrap().unwrap(); - for (index, edge) in edges.iter().enumerate() { - let next_edge = edges.get(index + 1); - if let Some(v) = next_edge { - PathEdge::create(conn, path.id, Some(*edge), Some(*v)); + for (index, block) in blocks.iter().enumerate() { + let next_block = blocks.get(index + 1); + if let Some(v) = next_block { + PathBlock::create(conn, path.id, Some(*block), Some(*v)); } else { - PathEdge::create(conn, path.id, Some(*edge), None); + PathBlock::create(conn, path.id, Some(*block), None); } } @@ -52,7 +53,7 @@ impl Path { id: row.get(0)?, block_group_id: row.get(1)?, name: row.get(2)?, - edges: PathEdge::get_edges(conn, path_id), + blocks: PathBlock::get_blocks(conn, path_id), }) }) .unwrap(); @@ -68,7 +69,7 @@ impl Path { id: path_id, block_group_id: row.get(1)?, name: row.get(2)?, - edges: PathEdge::get_edges(conn, path_id), + blocks: PathBlock::get_blocks(conn, path_id), }) }) .unwrap(); @@ -81,109 +82,142 @@ impl Path { } #[derive(Debug)] -pub struct PathEdge { +pub struct PathBlock { pub id: i32, pub path_id: i32, - pub source_edge_id: Option, - pub target_edge_id: Option, + pub source_block_id: Option, + pub target_block_id: Option, } -impl PathEdge { +impl PathBlock { pub fn create( conn: &Connection, path_id: i32, - source_edge_id: Option, - target_edge_id: Option, - ) -> PathEdge { + source_block_id: Option, + target_block_id: Option, + ) -> PathBlock { let query = - "INSERT INTO path_edges (path_id, source_edge_id, target_edge_id) VALUES (?1, ?2, ?3) RETURNING (id)"; + "INSERT INTO path_blocks (path_id, source_block_id, target_block_id) VALUES (?1, ?2, ?3) RETURNING (id)"; let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt - .query_map((path_id, source_edge_id, target_edge_id), |row| { - Ok(PathEdge { + .query_map((path_id, source_block_id, target_block_id), |row| { + Ok(PathBlock { id: row.get(0)?, path_id, - source_edge_id, - target_edge_id, + source_block_id, + target_block_id, }) }) .unwrap(); - rows.next().unwrap().unwrap() + match rows.next().unwrap() { + Ok(res) => res, + Err(rusqlite::Error::SqliteFailure(err, details)) => { + if err.code == rusqlite::ErrorCode::ConstraintViolation { + println!("{err:?} {details:?}"); + let mut query; + let mut placeholders = vec![path_id]; + if let Some(s) = source_block_id { + if let Some(t) = target_block_id { + query = "SELECT id from path_blocks where path_id = ?1 AND source_block_id = ?2 AND target_block_id = ?3;"; + placeholders.push(s); + placeholders.push(t); + } else { + query = "SELECT id from path_blocks where path_id = ?1 AND source_block_id = ?2 AND target_block_id is null;"; + placeholders.push(s); + } + } else if let Some(t) = target_block_id { + query = "SELECT id from path_blocks where path_id = ?1 AND source_block_id is null AND target_block_id = ?2;"; + placeholders.push(t); + } else { + panic!("No block ids passed"); + } + println!("{query} {placeholders:?}"); + PathBlock { + id: conn + .query_row(query, params_from_iter(&placeholders), |row| row.get(0)) + .unwrap(), + path_id, + source_block_id, + target_block_id, + } + } else { + panic!("something bad happened querying the database") + } + } + Err(_) => { + panic!("something bad happened querying the database") + } + } } - pub fn get_edges(conn: &Connection, path_id: i32) -> Vec { - let mut edges = vec![]; - let query = "SELECT source_edge_id, target_edge_id from path_edges where path_id = ?1;"; - let mut stmt = conn.prepare_cached(query).unwrap(); + pub fn query(conn: &Connection, query: &str, placeholders: Vec) -> Vec { + let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt - .query_map((path_id,), |row| { - let source_id: Option = row.get(0).unwrap(); - let target_id: Option = row.get(1).unwrap(); - Ok((source_id, target_id)) + .query_map(params_from_iter(placeholders), |row| { + Ok(PathBlock { + id: row.get(0)?, + path_id: row.get(1)?, + source_block_id: row.get(2)?, + target_block_id: row.get(3)?, + }) }) .unwrap(); - let mut edge_graph = DiGraphMap::new(); + let mut objs = vec![]; for row in rows { - let (source, target) = row.unwrap(); - if let Some(v) = source { - edge_graph.add_node(v); - } - if let Some(v) = target { - edge_graph.add_node(v); - } - if let Some(source_v) = source { - if let Some(target_v) = target { - edge_graph.add_edge(source_v, target_v, ()); - } - } + objs.push(row.unwrap()); } - let mut start_edge = None; - for node in edge_graph.nodes() { - let has_incoming = edge_graph - .neighbors_directed(node, Direction::Incoming) - .next(); + objs + } + + pub fn update(conn: &Connection, query: &str, placeholders: Vec) { + let mut stmt = conn.prepare(query).unwrap(); + stmt.execute(params_from_iter(placeholders)).unwrap(); + } + + pub fn get_blocks(conn: &Connection, path_id: i32) -> Vec { + let mut blocks = vec![]; + let graph = PathBlock::blocks_to_graph(conn, path_id); + let mut start_node = None; + for node in graph.nodes() { + let has_incoming = graph.neighbors_directed(node, Direction::Incoming).next(); if has_incoming.is_none() { - start_edge = Some(node); + start_node = Some(node); break; } } - if start_edge.is_none() { - panic!("No starting edge found in path {path_id}"); + if start_node.is_none() { + panic!("No starting block found in path {path_id}"); } - let mut dfs = Dfs::new(&edge_graph, start_edge.unwrap()); - while let Some(nx) = dfs.next(&edge_graph) { - edges.push(nx as i32); + let mut dfs = Dfs::new(&graph, start_node.unwrap()); + while let Some(nx) = dfs.next(&graph) { + blocks.push(nx as i32); } - edges + blocks } - pub fn edges_to_graph(conn: &Connection, path_id: i32) -> DiGraphMap<(u32), ()> { - let edges = PathEdge::get_edges(conn, path_id); - let edge_str = (*edges) - .iter() - .map(|v| format!("{v}")) - .collect::>() - .join(","); - let query = format!("SELECT source_id, target_id from edges where id IN ({edge_str});"); - let mut stmt = conn.prepare(&query).unwrap(); + pub fn blocks_to_graph(conn: &Connection, path_id: i32) -> DiGraphMap<(u32), ()> { + let query = "SELECT source_block_id, target_block_id from path_blocks where path_id = ?1;"; + let mut stmt = conn.prepare_cached(query).unwrap(); let mut rows = stmt - .query_map([], |row| { + .query_map((path_id,), |row| { let source_id: Option = row.get(0).unwrap(); let target_id: Option = row.get(1).unwrap(); Ok((source_id, target_id)) }) .unwrap(); let mut graph = DiGraphMap::new(); - for edge in rows { - let (source, target) = edge.unwrap(); - if let Some(source_value) = source { - graph.add_node(source_value); - if let Some(target_value) = target { - graph.add_edge(source_value, target_value, ()); - } + for row in rows { + let (source, target) = row.unwrap(); + if let Some(v) = source { + graph.add_node(v); } - if let Some(target_value) = target { - graph.add_node(target_value); + if let Some(v) = target { + graph.add_node(v); + } + if let Some(source_v) = source { + if let Some(target_v) = target { + graph.add_edge(source_v, target_v, ()); + } } } graph