Skip to content

Commit

Permalink
Allow specifying parent sample in vcf update
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris7 committed Oct 30, 2024
1 parent 5507725 commit c52c83d
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 4 deletions.
9 changes: 9 additions & 0 deletions fixtures/simple_iterative_engineering_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.1
##filedate=Tue Sep 4 13:12:57 2018
##reference=simple.fa
##contig=<ID=m123,length=34>
##phasing=none
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT f1
m123 3 . CGA CA 1611.92 . GT 1
m123 16 . GAT G 1611.92 . GT 1
10 changes: 10 additions & 0 deletions fixtures/simple_iterative_engineering_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
##fileformat=VCFv4.1
##filedate=Tue Sep 4 13:12:57 2018
##reference=simple.fa
##contig=<ID=m123,length=34>
##phasing=none
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT f2
m123 6 . T C 1611.92 . GT 1
m123 18 . GG GAATCAG 1611.92 . GT 1
m123 28 . GA G 1611.92 . GT 1
9 changes: 9 additions & 0 deletions fixtures/simple_iterative_engineering_3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.1
##filedate=Tue Sep 4 13:12:57 2018
##reference=simple.fa
##contig=<ID=m123,length=34>
##phasing=none
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT f3
m123 5 . A G 1611.92 . GT 1
m123 16 . CGGAA CA 1611.92 . GT 1
4 changes: 3 additions & 1 deletion src/exports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ pub fn export_gfa(
let boundary_edges = Edge::boundary_edges_from_sequences(&blocks);
edges.extend(boundary_edges.clone());

let (graph, edges_by_node_pair) = Edge::build_graph(&edges, &blocks);
let (mut graph, edges_by_node_pair) = Edge::build_graph(&edges, &blocks);

BlockGroup::prune_graph(&mut graph);

let file = File::create(filename).unwrap();
let mut writer = BufWriter::new(file);
Expand Down
7 changes: 6 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ enum Commands {
/// If no sample is provided, enter the sample to associate variants to
#[arg(short, long)]
sample: Option<String>,
/// Use the given sample as the parent sample for changes.
#[arg(long)]
parent_sample: Option<String>,
/// A CSV with combinatorial library information
#[arg(short, long)]
library: Option<String>,
Expand Down Expand Up @@ -237,6 +240,7 @@ fn main() {
path_name,
start,
end,
parent_sample,
}) => {
conn.execute("BEGIN TRANSACTION", []).unwrap();
let name = &name.clone().unwrap_or_else(|| {
Expand All @@ -256,12 +260,13 @@ fn main() {
);
} else if let Some(vcf_path) = vcf {
update_with_vcf(
&vcf.clone().unwrap(),
vcf_path,
name,
genotype.clone().unwrap_or("".to_string()),
sample.clone().unwrap_or("".to_string()),
&conn,
&operation_conn,
parent_sample.as_deref(),
);
} else {
panic!("Unknown file type provided for update.");
Expand Down
58 changes: 58 additions & 0 deletions src/models/block_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,64 @@ impl BlockGroup {
new_edges
}

pub fn insert_bg_changes(conn: &Connection, changes: &Vec<PathChange>) {
let mut new_edges_by_block_group = HashMap::<i64, Vec<EdgeData>>::new();
let mut new_accession_edges = HashMap::new();
let mut tree_map = HashMap::new();
for change in changes {
let tree = tree_map
.entry(change.block_group_id)
.or_insert_with(|| BlockGroup::intervaltree_for(conn, change.block_group_id, true));
let new_edges = BlockGroup::set_up_new_bg_edges(change, tree);
new_edges_by_block_group
.entry(change.block_group_id)
.and_modify(|new_edge_data| new_edge_data.extend(new_edges.clone()))
.or_insert_with(|| new_edges.clone());
if let Some(accession) = &change.path_accession {
new_accession_edges
.entry((&change.path, accession))
.and_modify(|new_edge_data: &mut Vec<EdgeData>| {
new_edge_data.extend(new_edges.clone())
})
.or_insert_with(|| new_edges.clone());
}
}

let mut edge_data_map = HashMap::new();

for (block_group_id, new_edges) in new_edges_by_block_group {
let edge_ids = Edge::bulk_create(conn, &new_edges);
for (i, edge_data) in new_edges.iter().enumerate() {
edge_data_map.insert(edge_data.clone(), edge_ids[i]);
}
BlockGroupEdge::bulk_create(conn, block_group_id, &edge_ids);
}

for ((path, accession_name), path_edges) in new_accession_edges {
match Accession::get(
conn,
"select * from accession where name = ?1 AND path_id = ?2",
vec![
SQLValue::from(accession_name.clone()),
SQLValue::from(path.id),
],
) {
Ok(_) => {
println!("accession already exists, consider a better matching algorithm to determine if this is an error.");
}
Err(_) => {
let acc_edges = AccessionEdge::bulk_create(
conn,
&path_edges.iter().map(AccessionEdgeData::from).collect(),
);
let acc = Accession::create(conn, accession_name, path.id, None)
.expect("Accession could not be created.");
AccessionPath::create(conn, acc.id, &acc_edges);
}
}
}
}

#[allow(clippy::ptr_arg)]
#[allow(clippy::needless_late_init)]
pub fn insert_bg_change(
Expand Down
18 changes: 18 additions & 0 deletions src/operation_management.rs
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
let edge_count = Edge::query(conn, "select * from edges", vec![]).len();
let node_count = Node::query(conn, "select * from nodes", vec![]).len();
Expand Down Expand Up @@ -1068,6 +1069,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);

let foo_bg_id = BlockGroup::get_id(conn, &collection, Some("foo"), "m123");
Expand Down Expand Up @@ -1107,6 +1109,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);

let foo_bg_id = BlockGroup::get_id(conn, &collection, Some("foo"), "m123");
Expand Down Expand Up @@ -1212,6 +1215,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
let edge_count = Edge::query(conn, "select * from edges", vec![]).len();
let node_count = Node::query(conn, "select * from nodes", vec![]).len();
Expand Down Expand Up @@ -1254,6 +1258,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
let edge_count = Edge::query(conn, "select * from edges", vec![]).len();
let node_count = Node::query(conn, "select * from nodes", vec![]).len();
Expand Down Expand Up @@ -1313,6 +1318,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1321,6 +1327,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1329,6 +1336,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1337,6 +1345,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);

let branch_id = OperationState::get_current_branch(operation_conn, &db_uuid).unwrap();
Expand Down Expand Up @@ -1398,6 +1407,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);

let branch_a = Branch::create(operation_conn, &db_uuid, "branch-a");
Expand All @@ -1409,6 +1419,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1417,6 +1428,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1425,6 +1437,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
OperationState::set_branch(operation_conn, &db_uuid, "main");
OperationState::set_operation(operation_conn, &db_uuid, 2);
Expand All @@ -1435,6 +1448,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1443,6 +1457,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
update_with_vcf(
&vcf_path.to_str().unwrap().to_string(),
Expand All @@ -1451,6 +1466,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
OperationState::set_branch(operation_conn, &db_uuid, "branch-a");
OperationState::set_operation(operation_conn, &db_uuid, 5);
Expand All @@ -1463,6 +1479,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);
OperationState::set_branch(operation_conn, &db_uuid, "branch-a");
OperationState::set_operation(operation_conn, &db_uuid, 5);
Expand All @@ -1473,6 +1490,7 @@ mod tests {
"".to_string(),
conn,
operation_conn,
None,
);

assert_eq!(
Expand Down
Loading

0 comments on commit c52c83d

Please sign in to comment.