Skip to content

Commit

Permalink
Merge pull request #12 from ginkgobioworks/convert-path-edge-to-block
Browse files Browse the repository at this point in the history
Convert path edges to path block
  • Loading branch information
Chris7 authored Aug 5, 2024
2 parents f6ec6dd + 55395ed commit e5b8996
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 122 deletions.
12 changes: 6 additions & 6 deletions migrations/01-initial/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ CREATE TABLE path (
);
CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name);

CREATE TABLE path_edges (
CREATE TABLE path_blocks (
id INTEGER PRIMARY KEY NOT NULL,
path_id INTEGER NOT NULL,
source_edge_id INTEGER,
target_edge_id INTEGER,
FOREIGN KEY(source_edge_id) REFERENCES edges(id),
FOREIGN KEY(target_edge_id) REFERENCES edges(id),
source_block_id INTEGER,
target_block_id INTEGER,
FOREIGN KEY(source_block_id) REFERENCES block(id),
FOREIGN KEY(target_block_id) REFERENCES block(id),
FOREIGN KEY(path_id) REFERENCES path(id)
);
CREATE UNIQUE INDEX path_edge_uidx ON path_edges(path_id, source_edge_id, target_edge_id);
CREATE UNIQUE INDEX path_blocks_uidx ON path_blocks(path_id, source_block_id, target_block_id);
11 changes: 3 additions & 8 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,9 @@ fn import_fasta(fasta: &String, name: &String, shallow: bool, conn: &mut Connect
(sequence.len() as i32),
&"1".to_string(),
);
let edge_1 = Edge::create(conn, None, Some(block.id), 0, 0);
let edge_2 = Edge::create(conn, Some(block.id), None, 0, 0);
Path::create(
conn,
record.id(),
block_group.id,
vec![edge_1.id, edge_2.id],
);
Edge::create(conn, None, Some(block.id), 0, 0);
Edge::create(conn, Some(block.id), None, 0, 0);
Path::create(conn, record.id(), block_group.id, vec![block.id]);
}
println!("Created it");
} else {
Expand Down
67 changes: 35 additions & 32 deletions src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub mod sequence;
use crate::models;
use crate::models::block::Block;
use crate::models::edge::Edge;
use crate::models::path::{all_simple_paths, Path, PathEdge};
use crate::models::path::{all_simple_paths, Path, PathBlock};
use crate::models::sequence::Sequence;

#[derive(Debug)]
Expand Down Expand Up @@ -158,10 +158,8 @@ impl BlockGroup {
.join(", ");
let mut it = stmt.query([block_keys]).unwrap();
let mut row = it.next().unwrap();
let mut edge_map = HashMap::new();
while row.is_some() {
let edge = row.unwrap();
let edge_id: i32 = edge.get(0).unwrap();
let source_id: Option<i32> = edge.get(1).unwrap();
let target_id: Option<i32> = edge.get(2).unwrap();
let chrom_index = edge.get(3).unwrap();
Expand Down Expand Up @@ -198,7 +196,6 @@ impl BlockGroup {
} else {
panic!("no source and target specified.");
}
edge_map.insert(edge_id, new_edge.id);

row = it.next().unwrap();
}
Expand All @@ -210,11 +207,11 @@ impl BlockGroup {
);

for path in existing_paths {
let mut new_edges = vec![];
for edge in path.edges {
new_edges.push(*edge_map.get(&edge).unwrap());
let mut new_blocks = vec![];
for block in path.blocks {
new_blocks.push(*block_map.get(&block).unwrap());
}
Path::create(conn, &path.name, target_block_group_id, new_edges);
Path::create(conn, &path.name, target_block_group_id, new_blocks);
}
}

Expand Down Expand Up @@ -356,7 +353,7 @@ impl BlockGroup {
// that means we have an edge with the chromosome index, that connects our start/end coordinates with the new block id

let path = Path::get(conn, path_id);
let graph = PathEdge::edges_to_graph(conn, path.id);
let graph = PathBlock::blocks_to_graph(conn, path.id);
println!("{path:?} {graph:?}");
let query = format!("SELECT id, sequence_hash, block_group_id, start, end, strand from block where id in ({block_ids})", block_ids = graph.nodes().map(|k| format!("{k}")).collect::<Vec<_>>().join(","));
let mut stmt = conn.prepare(&query).unwrap();
Expand All @@ -380,21 +377,12 @@ impl BlockGroup {
row = it.next().unwrap();
}
// TODO: probably don't need the graph, just get vector of source_ids.
let mut start_node = -1;
let start_edge = Edge::get(conn, path.edges[0]);
if let Some(value) = start_edge.source_id {
start_node = value
} else if let Some(value) = start_edge.target_id {
start_node = value
}
let mut dfs = Dfs::new(&graph, start_node as u32);
let mut path_start = 0;
let mut path_end = 0;
let mut new_edges = vec![];
let mut previous_block: Option<&Block> = None;
println!("{blocks:?}");
while let Some(nx) = dfs.next(&graph) {
let block = blocks.get(&(nx as i32)).unwrap();
for block_id in &path.blocks {
let block = blocks.get(block_id).unwrap();
let block_length = (block.end - block.start);
path_end += block_length;

Expand All @@ -406,9 +394,14 @@ impl BlockGroup {
// our range is fully contained w/in the block
// |----block------|
// |----range---|
let (left_block, right_block) =
Block::split(conn, block, start - path_start, chromosome_index, phased)
.unwrap();
let (left_block, right_block) = Block::split(
conn,
block,
block.start + start - path_start,
chromosome_index,
phased,
)
.unwrap();
Block::delete(conn, block.id);
// let left_block = Block::create(
// conn,
Expand All @@ -435,9 +428,14 @@ impl BlockGroup {
// our range is overlapping the end of the block
// |----block---|
// |----range---|
let (left_block, right_block) =
Block::split(conn, block, start - path_start, chromosome_index, phased)
.unwrap();
let (left_block, right_block) = Block::split(
conn,
block,
block.start + start - path_start,
chromosome_index,
phased,
)
.unwrap();
Block::delete(conn, block.id);
// let left_block = Block::create(
// conn,
Expand All @@ -457,8 +455,14 @@ impl BlockGroup {
// our range is overlapping the beginning of the block
// |----block---|
// |----range---|
let (left_block, right_block) =
Block::split(conn, block, end - path_start, chromosome_index, phased).unwrap();
let (left_block, right_block) = Block::split(
conn,
block,
block.start + end - path_start,
chromosome_index,
phased,
)
.unwrap();
Block::delete(conn, block.id);
// let right_block = Block::create(
// conn,
Expand Down Expand Up @@ -493,8 +497,6 @@ impl BlockGroup {
previous_block = Some(block);
}

println!("change is {path:?} {graph:?} {blocks:?} {new_edges:?}");

for new_edge in new_edges {
Edge::create(conn, new_edge.0, new_edge.1, chromosome_index, phased);
}
Expand Down Expand Up @@ -536,7 +538,7 @@ mod tests {
conn,
"chr1",
block_group.id,
vec![edge_0.id, edge_1.id, edge_2.id, edge_3.id, edge_4.id],
vec![a_block.id, t_block.id, c_block.id, g_block.id],
);
(block_group.id, path.id)
}
Expand Down Expand Up @@ -587,7 +589,8 @@ mod tests {
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string()
"AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
"AAAAAAANNNNTTTTGGGGGGGGG".to_string(),
])
)
}
Expand Down
43 changes: 42 additions & 1 deletion src/models/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::models::Path;
use rusqlite::{params_from_iter, types::Value, Connection};

use crate::models::edge::{Edge, UpdatedEdge};
use crate::models::path::PathBlock;

#[derive(Debug)]
pub struct Block {
Expand Down Expand Up @@ -131,7 +132,12 @@ impl Block {
phased: i32,
) -> Option<(Block, Block)> {
if coordinate < block.start || coordinate >= block.end {
println!("Coordinate is out of block bounds");
println!(
"Coordinate {coordinate} is out of block {block_id} bounds ({start}, {end})",
start = block.start,
end = block.end,
block_id = block.id
);
return None;
}
let new_left_block = Block::create(
Expand Down Expand Up @@ -183,6 +189,41 @@ impl Block {

Edge::bulk_update(conn, replacement_edges);

// replace paths using this block
let impacted_path_blocks = PathBlock::query(
conn,
"select * from path_blocks where source_block_id = ?1 OR target_block_id = ?1",
vec![Value::from(block.id)],
);

for path_block in impacted_path_blocks {
let path_id = path_block.path_id;
PathBlock::create(
conn,
path_id,
Some(new_left_block.id),
Some(new_right_block.id),
);
if let Some(source_block_id) = path_block.source_block_id {
if source_block_id == block.id {
PathBlock::update(
conn,
"update path_blocks set source_block_id = ?2 where id = ?1",
vec![Value::from(path_block.id), Value::from(new_right_block.id)],
);
}
}
if let Some(target_block_id) = path_block.target_block_id {
if target_block_id == block.id {
PathBlock::update(
conn,
"update path_blocks set target_block_id = ?2 where id = ?1",
vec![Value::from(path_block.id), Value::from(new_left_block.id)],
);
}
}
}

// TODO: Delete existing block? -- leave to caller atm

Some((new_left_block, new_right_block))
Expand Down
Loading

0 comments on commit e5b8996

Please sign in to comment.