Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-destructive block splitting #11

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# our test dbs
*.db-*
*.db

# Generated by Cargo
# will have compiled files and executables
Expand Down
46 changes: 46 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

196 changes: 196 additions & 0 deletions src/models/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,17 @@ impl Block {
phased: i32,
) -> Option<(Block, Block)> {
if coordinate < block.start || coordinate >= block.end {
<<<<<<< HEAD
println!(
"Coordinate {coordinate} is out of block {block_id} bounds ({start}, {end})",
start = block.start,
end = block.end,
block_id = block.id
);
return None;
=======
panic!("Coordinate {coordinate} is out of block bounds");
>>>>>>> 451892c (Add non-destructive block splitting)
}
let new_left_block = Block::create(
conn,
Expand Down Expand Up @@ -251,12 +255,69 @@ impl Block {
objs
}

<<<<<<< HEAD
pub fn get_sequence(conn: &Connection, block_id: i32) -> (String, String) {
let mut stmt = conn.prepare_cached("select substr(sequence.sequence, block.start + 1, block.end - block.start) as sequence, block.strand from sequence left join block on (block.sequence_hash = sequence.hash) where block.id = ?1").unwrap();
let mut rows = stmt
.query_map((block_id,), |row| Ok((row.get(0)?, row.get(1)?)))
.unwrap();
rows.next().unwrap().unwrap()
=======
pub fn create_prefix_block(conn: &Connection, block: Block, coordinate: i32) -> Block {
if coordinate < block.start || coordinate >= block.end {
panic!("Coordinate {coordinate} is out of block bounds");
}
let prefix_block = Block::create(
conn,
&block.sequence_hash,
block.block_group_id,
block.start,
coordinate,
&block.strand,
);

let edges_into = Block::edges_into(conn, block.id);

for edge in edges_into.iter() {
Edge::create(
conn,
edge.source_id,
Some(prefix_block.id),
edge.chromosome_index,
edge.phased,
);
}

prefix_block
}

pub fn create_suffix_block(conn: &Connection, block: Block, coordinate: i32) -> Block {
if coordinate < block.start || coordinate >= block.end {
panic!("Coordinate {coordinate} is out of block bounds");
}
let suffix_block = Block::create(
conn,
&block.sequence_hash,
block.block_group_id,
coordinate,
block.end,
&block.strand,
);

let edges_out_of = Block::edges_out_of(conn, block.id);

for edge in edges_out_of.iter() {
Edge::create(
conn,
Some(suffix_block.id),
edge.target_id,
edge.chromosome_index,
edge.phased,
);
}

suffix_block
>>>>>>> 451892c (Add non-destructive block splitting)
}
}

Expand Down Expand Up @@ -405,6 +466,7 @@ mod tests {
}

#[test]
#[should_panic]
fn test_split_block_bad_coordinate() {
let conn = &mut get_connection();
Collection::create(conn, "test collection");
Expand All @@ -420,6 +482,7 @@ mod tests {
}

#[test]
<<<<<<< HEAD
fn get_sequence() {
let conn = get_connection();
let sequence = "AAATTTCCCGGG".to_string();
Expand All @@ -437,5 +500,138 @@ mod tests {
Block::get_sequence(&conn, block.id),
("AAATTTCCC".to_string(), "1".to_string())
);
=======
fn test_create_prefix_block() {
let conn = &mut get_connection();
Collection::create(conn, &"test collection".to_string());
let block_group = BlockGroup::create(
conn,
&"test collection".to_string(),
None,
&"test block group".to_string(),
);
let sequence1_hash =
Sequence::create(conn, "DNA".to_string(), &"ATCGATCG".to_string(), true);
let block1 = Block::create(
conn,
&sequence1_hash,
block_group.id,
0,
8,
&"+".to_string(),
);
let sequence2_hash =
Sequence::create(conn, "DNA".to_string(), &"AAAAAAAA".to_string(), true);
let block2 = Block::create(
conn,
&sequence2_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let sequence3_hash =
Sequence::create(conn, "DNA".to_string(), &"CCCCCCCC".to_string(), true);
let block3 = Block::create(
conn,
&sequence3_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let sequence4_hash =
Sequence::create(conn, "DNA".to_string(), &"GGGGGGGG".to_string(), true);
let block4 = Block::create(
conn,
&sequence4_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let edge1 = Edge::create(conn, Some(block1.id), Some(block3.id), 0, 0);
let edge2 = Edge::create(conn, Some(block2.id), Some(block3.id), 0, 0);
let edge3 = Edge::create(conn, Some(block3.id), Some(block4.id), 0, 0);

let prefix_block = Block::create_prefix_block(conn, block3, 4);

let edges_into_prefix_block = Block::edges_into(conn, prefix_block.id);
assert_eq!(edges_into_prefix_block.len(), 2);

let mut actual_previous_block_ids = HashSet::new();
actual_previous_block_ids.insert(edges_into_prefix_block[0].source_id.unwrap());
actual_previous_block_ids.insert(edges_into_prefix_block[1].source_id.unwrap());
let mut expected_previous_block_ids = HashSet::new();
expected_previous_block_ids.insert(block1.id);
expected_previous_block_ids.insert(block2.id);
assert_eq!(actual_previous_block_ids, expected_previous_block_ids);

let edges_out_of_prefix_block = Block::edges_out_of(conn, prefix_block.id);
assert_eq!(edges_out_of_prefix_block.len(), 0);
}
#[test]
fn test_create_suffix_block() {
let conn = &mut get_connection();
Collection::create(conn, &"test collection".to_string());
let block_group = BlockGroup::create(
conn,
&"test collection".to_string(),
None,
&"test block group".to_string(),
);
let sequence1_hash =
Sequence::create(conn, "DNA".to_string(), &"ATCGATCG".to_string(), true);
let block1 = Block::create(
conn,
&sequence1_hash,
block_group.id,
0,
8,
&"+".to_string(),
);
let sequence2_hash =
Sequence::create(conn, "DNA".to_string(), &"AAAAAAAA".to_string(), true);
let block2 = Block::create(
conn,
&sequence2_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let sequence3_hash =
Sequence::create(conn, "DNA".to_string(), &"CCCCCCCC".to_string(), true);
let block3 = Block::create(
conn,
&sequence3_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let sequence4_hash =
Sequence::create(conn, "DNA".to_string(), &"GGGGGGGG".to_string(), true);
let block4 = Block::create(
conn,
&sequence4_hash,
block_group.id,
1,
8,
&"+".to_string(),
);
let edge1 = Edge::create(conn, Some(block1.id), Some(block3.id), 0, 0);
let edge2 = Edge::create(conn, Some(block2.id), Some(block3.id), 0, 0);
let edge3 = Edge::create(conn, Some(block3.id), Some(block4.id), 0, 0);

let suffix_block = Block::create_suffix_block(conn, block3, 4);

let edges_into_suffix_block = Block::edges_into(conn, suffix_block.id);
assert_eq!(edges_into_suffix_block.len(), 0);

let edges_out_of_suffix_block = Block::edges_out_of(conn, suffix_block.id);
assert_eq!(edges_out_of_suffix_block.len(), 1);
assert_eq!(edges_out_of_suffix_block[0].target_id.unwrap(), block4.id);
>>>>>>> 451892c (Add non-destructive block splitting)
}
}
Loading