From 1d9cf3c784416daefcfb80baca34ec004c442c43 Mon Sep 17 00:00:00 2001 From: hofer Date: Tue, 1 Oct 2024 14:24:27 -0400 Subject: [PATCH] Use 64 bits --- src/exports/gfa.rs | 24 ++--- src/imports/fasta.rs | 4 +- src/imports/gfa.rs | 20 ++-- src/lib.rs | 8 +- src/main.rs | 6 +- src/models/block_group.rs | 42 ++++---- src/models/block_group_edge.rs | 12 +-- src/models/edge.rs | 82 +++++++-------- src/models/node.rs | 27 +++-- src/models/operations.rs | 96 +++++++++--------- src/models/path.rs | 46 ++++----- src/models/path_edge.rs | 24 ++--- src/models/sequence.rs | 20 ++-- src/operation_management.rs | 179 ++++++++++++++++----------------- src/test_helpers.rs | 2 +- src/updates/vcf.rs | 40 ++++---- 16 files changed, 309 insertions(+), 323 deletions(-) diff --git a/src/exports/gfa.rs b/src/exports/gfa.rs index 3be4de7..a736fca 100644 --- a/src/exports/gfa.rs +++ b/src/exports/gfa.rs @@ -55,7 +55,7 @@ pub fn export_gfa(conn: &Connection, collection_name: &str, filename: &PathBuf) fn write_segments( writer: &mut BufWriter, blocks: &Vec, - terminal_block_ids: &HashSet, + terminal_block_ids: &HashSet, ) { for block in blocks { if terminal_block_ids.contains(&block.id) { @@ -78,9 +78,9 @@ fn segment_line(sequence: &str, index: usize) -> String { fn write_links( writer: &mut BufWriter, - graph: &DiGraphMap, - edges_by_node_pair: &HashMap<(i32, i32), Edge>, - terminal_block_ids: &HashSet, + graph: &DiGraphMap, + edges_by_node_pair: &HashMap<(i64, i64), Edge>, + terminal_block_ids: &HashSet, ) { for (source, target, ()) in graph.all_edges() { if terminal_block_ids.contains(&source) || terminal_block_ids.contains(&target) { @@ -101,9 +101,9 @@ fn write_links( } fn link_line( - source_index: i32, + source_index: i64, source_strand: Strand, - target_index: i32, + target_index: i64, target_strand: Strand, ) -> String { format!( @@ -123,9 +123,9 @@ fn link_line( fn nodes_for_edges( edge1: &Edge, edge2: &Edge, - blocks_by_node_and_start: &HashMap<(i32, i32), GroupBlock>, - blocks_by_node_and_end: &HashMap<(i32, i32), GroupBlock>, -) -> Vec { + blocks_by_node_and_start: &HashMap<(i64, i64), GroupBlock>, + blocks_by_node_and_end: &HashMap<(i64, i64), GroupBlock>, +) -> Vec { let mut current_block = blocks_by_node_and_start .get(&(edge1.target_node_id, edge1.target_coordinate)) .unwrap(); @@ -158,11 +158,11 @@ fn write_paths( let blocks_by_node_and_start = blocks .iter() .map(|block| ((block.node_id, block.start), block.clone())) - .collect::>(); + .collect::>(); let blocks_by_node_and_end = blocks .iter() .map(|block| ((block.node_id, block.end), block.clone())) - .collect::>(); + .collect::>(); for path in paths { let edges_for_path = edges_by_path_id.get(&path.id).unwrap(); @@ -187,7 +187,7 @@ fn write_paths( } } -fn path_line(path_name: &str, node_ids: &[i32], node_strands: &[Strand]) -> String { +fn path_line(path_name: &str, node_ids: &[i64], node_strands: &[Strand]) -> String { let nodes = node_ids .iter() .zip(node_strands.iter()) diff --git a/src/imports/fasta.rs b/src/imports/fasta.rs index 486a909..68d10a6 100644 --- a/src/imports/fasta.rs +++ b/src/imports/fasta.rs @@ -48,7 +48,7 @@ pub fn import_fasta( name: name.to_string(), } }; - let mut summary: HashMap = HashMap::new(); + let mut summary: HashMap = HashMap::new(); for result in reader.records() { let record = result.expect("Error during fasta record parsing"); @@ -56,7 +56,7 @@ pub fn import_fasta( .unwrap() .to_string(); let name = String::from_utf8(record.name().to_vec()).unwrap(); - let sequence_length = record.sequence().len() as i32; + let sequence_length = record.sequence().len() as i64; let seq = if shallow { Sequence::new() .sequence_type("DNA") diff --git a/src/imports/gfa.rs b/src/imports/gfa.rs index aaba44c..c8ef8f6 100644 --- a/src/imports/gfa.rs +++ b/src/imports/gfa.rs @@ -27,7 +27,7 @@ pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection) let block_group = BlockGroup::create(conn, collection_name, None, ""); let gfa: Gfa = Gfa::parse_gfa_file(gfa_path.to_str().unwrap()); let mut sequences_by_segment_id: HashMap = HashMap::new(); - let mut node_ids_by_segment_id: HashMap = HashMap::new(); + let mut node_ids_by_segment_id: HashMap = HashMap::new(); for segment in &gfa.segments { let input_sequence = segment.sequence.get_string(&gfa.sequence); @@ -198,10 +198,10 @@ pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection) } fn edge_data_from_fields( - source_node_id: i32, - source_coordinate: i32, + source_node_id: i64, + source_coordinate: i64, source_strand: Strand, - target_node_id: i32, + target_node_id: i64, target_strand: Strand, ) -> EdgeData { EdgeData { @@ -246,7 +246,7 @@ mod tests { let result = Path::sequence(conn, path); assert_eq!(result, "ATGGCATATTCGCAGCT"); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 6); } @@ -265,7 +265,7 @@ mod tests { HashSet::from_iter(vec!["AAAATTTTGGGGCCCC".to_string()]) ); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 6); } @@ -291,7 +291,7 @@ mod tests { let result = Path::sequence(conn, path); assert_eq!(result, "ACCTACAAATTCAAAC"); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 6); } @@ -317,7 +317,7 @@ mod tests { let result = Path::sequence(conn, path); assert_eq!(result, "TATGCCAGCTGCGAATA"); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 6); } @@ -422,7 +422,7 @@ mod tests { assert_eq!(all_sequences.len(), 1024); assert_eq!(all_sequences, expected_sequences); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 28); } @@ -452,7 +452,7 @@ mod tests { let all_sequences = BlockGroup::get_all_sequences(conn, block_group_id); assert_eq!(all_sequences, HashSet::from_iter(vec!["AA".to_string()])); - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; + let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64; assert_eq!(node_count, 4); } } diff --git a/src/lib.rs b/src/lib.rs index 63861bd..5850481 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,7 +39,7 @@ pub fn calculate_hash(t: &str) -> String { } pub struct Genotype { - pub allele: i32, + pub allele: i64, pub phasing: Phasing, } @@ -66,7 +66,7 @@ pub fn parse_genotype(gt: &str) -> Vec> { genotypes.push(None); } else { genotypes.push(Some(Genotype { - allele: allele.parse::().unwrap(), + allele: allele.parse::().unwrap(), phasing: phase, })); } @@ -76,7 +76,7 @@ pub fn parse_genotype(gt: &str) -> Vec> { genotypes } -pub fn get_overlap(a: i32, b: i32, x: i32, y: i32) -> (bool, bool, bool) { +pub fn get_overlap(a: i64, b: i64, x: i64, y: i64) -> (bool, bool, bool) { let contains_start = a <= x && x < b; let contains_end = a <= y && y < b; let overlap = a < y && x < b; @@ -99,7 +99,7 @@ mod tests { #[test] fn it_queries() { let conn = get_connection(None); - let sequence_count: i32 = conn + let sequence_count: i64 = conn .query_row( "SELECT count(*) from sequence where hash = 'foo'", [], diff --git a/src/main.rs b/src/main.rs index 0b1891f..538524e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -95,12 +95,12 @@ enum Commands { branch: Option, /// The operation id to move to #[clap(index = 1)] - id: Option, + id: Option, }, Reset { /// The operation id to reset to #[clap(index = 1)] - id: i32, + id: i64, }, /// View operations carried out against a database Operations { @@ -111,7 +111,7 @@ enum Commands { Apply { /// The operation id to apply #[clap(index = 1)] - id: i32, + id: i64, }, Export { /// The name of the collection to export diff --git a/src/models/block_group.rs b/src/models/block_group.rs index 8434248..85c84a9 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -15,7 +15,7 @@ use crate::models::strand::Strand; #[derive(Debug, Deserialize, Serialize)] pub struct BlockGroup { - pub id: i32, + pub id: i64, pub collection_name: String, pub sample_name: Option, pub name: String, @@ -30,18 +30,18 @@ pub struct BlockGroupData<'a> { #[derive(Clone, Debug)] pub struct PathChange { - pub block_group_id: i32, + pub block_group_id: i64, pub path: Path, - pub start: i32, - pub end: i32, + pub start: i64, + pub end: i64, pub block: PathBlock, - pub chromosome_index: i32, - pub phased: i32, + pub chromosome_index: i64, + pub phased: i64, } pub struct PathCache<'a> { pub cache: HashMap, - pub intervaltree_cache: HashMap>, + pub intervaltree_cache: HashMap>, pub conn: &'a Connection, } @@ -49,12 +49,12 @@ impl PathCache<'_> { pub fn new(conn: &Connection) -> PathCache { PathCache { cache: HashMap::::new(), - intervaltree_cache: HashMap::>::new(), + intervaltree_cache: HashMap::>::new(), conn, } } - pub fn lookup(path_cache: &mut PathCache, block_group_id: i32, name: String) -> Path { + pub fn lookup(path_cache: &mut PathCache, block_group_id: i64, name: String) -> Path { let path_key = PathData { name: name.clone(), block_group_id, @@ -80,7 +80,7 @@ impl PathCache<'_> { pub fn get_intervaltree<'a>( path_cache: &'a PathCache<'a>, path: &'a Path, - ) -> Option<&'a IntervalTree> { + ) -> Option<&'a IntervalTree> { path_cache.intervaltree_cache.get(path) } } @@ -159,7 +159,7 @@ impl BlockGroup { objs } - pub fn clone(conn: &Connection, source_block_group_id: i32, target_block_group_id: i32) { + pub fn clone(conn: &Connection, source_block_group_id: i64, target_block_group_id: i64) { let existing_paths = Path::get_paths( conn, "SELECT * from path where block_group_id = ?1", @@ -169,14 +169,14 @@ impl BlockGroup { let edge_ids = BlockGroupEdge::edges_for_block_group(conn, source_block_group_id) .iter() .map(|edge| edge.id) - .collect::>(); + .collect::>(); BlockGroupEdge::bulk_create(conn, target_block_group_id, &edge_ids); for path in existing_paths { let edge_ids = PathEdge::edges_for_path(conn, path.id) .into_iter() .map(|edge| edge.id) - .collect::>(); + .collect::>(); Path::create(conn, &path.name, target_block_group_id, &edge_ids); } } @@ -186,8 +186,8 @@ impl BlockGroup { collection_name: &str, sample_name: &str, group_name: &str, - ) -> i32 { - let mut bg_id : i32 = match conn.query_row( + ) -> i64 { + let mut bg_id : i64 = match conn.query_row( "select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3", (collection_name, sample_name, group_name), |row| row.get(0), @@ -227,7 +227,7 @@ impl BlockGroup { collection_name: &str, sample_name: Option<&str>, group_name: &str, - ) -> i32 { + ) -> i64 { let result = if sample_name.is_some() { conn.query_row( "select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3", @@ -251,7 +251,7 @@ impl BlockGroup { } } - pub fn get_all_sequences(conn: &Connection, block_group_id: i32) -> HashSet { + pub fn get_all_sequences(conn: &Connection, block_group_id: i64) -> HashSet { let mut edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id); let (blocks, boundary_edges) = Edge::blocks_from_edges(conn, &edges); edges.extend(boundary_edges.clone()); @@ -274,7 +274,7 @@ impl BlockGroup { .clone() .into_iter() .map(|block| (block.id, block)) - .collect::>(); + .collect::>(); let mut sequences = HashSet::::new(); for start_node in start_nodes { @@ -303,7 +303,7 @@ impl BlockGroup { } pub fn insert_changes(conn: &Connection, changes: &Vec, cache: &PathCache) { - let mut new_edges_by_block_group = HashMap::>::new(); + let mut new_edges_by_block_group = HashMap::>::new(); for change in changes { let tree = PathCache::get_intervaltree(cache, &change.path).unwrap(); let new_edges = BlockGroup::set_up_new_edges(change, tree); @@ -324,7 +324,7 @@ impl BlockGroup { pub fn insert_change( conn: &Connection, change: &PathChange, - tree: &IntervalTree, + tree: &IntervalTree, ) { let new_edges = BlockGroup::set_up_new_edges(change, tree); let edge_ids = Edge::bulk_create(conn, new_edges); @@ -333,7 +333,7 @@ impl BlockGroup { pub fn set_up_new_edges( change: &PathChange, - tree: &IntervalTree, + tree: &IntervalTree, ) -> Vec { let start_blocks: Vec<&PathBlock> = tree.query_point(change.start).map(|x| &x.value).collect(); diff --git a/src/models/block_group_edge.rs b/src/models/block_group_edge.rs index dd1bc63..d2761ff 100644 --- a/src/models/block_group_edge.rs +++ b/src/models/block_group_edge.rs @@ -4,13 +4,13 @@ use rusqlite::{params_from_iter, Connection}; #[derive(Clone, Debug)] pub struct BlockGroupEdge { - pub id: i32, - pub block_group_id: i32, - pub edge_id: i32, + pub id: i64, + pub block_group_id: i64, + pub edge_id: i64, } impl BlockGroupEdge { - pub fn bulk_create(conn: &Connection, block_group_id: i32, edge_ids: &[i32]) { + pub fn bulk_create(conn: &Connection, block_group_id: i64, edge_ids: &[i64]) { for chunk in edge_ids.chunks(100000) { let mut rows_to_insert = vec![]; for edge_id in chunk { @@ -28,7 +28,7 @@ impl BlockGroupEdge { } } - pub fn edges_for_block_group(conn: &Connection, block_group_id: i32) -> Vec { + pub fn edges_for_block_group(conn: &Connection, block_group_id: i64) -> Vec { let query = format!( "select * from block_group_edges where block_group_id = {};", block_group_id @@ -37,7 +37,7 @@ impl BlockGroupEdge { let edge_ids = block_group_edges .into_iter() .map(|block_group_edge| block_group_edge.edge_id) - .collect::>(); + .collect::>(); Edge::bulk_load(conn, &edge_ids) } diff --git a/src/models/edge.rs b/src/models/edge.rs index dba0ecc..29a5696 100644 --- a/src/models/edge.rs +++ b/src/models/edge.rs @@ -12,27 +12,27 @@ use crate::models::strand::Strand; #[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)] pub struct Edge { - pub id: i32, - pub source_node_id: i32, - pub source_coordinate: i32, + pub id: i64, + pub source_node_id: i64, + pub source_coordinate: i64, pub source_strand: Strand, - pub target_node_id: i32, - pub target_coordinate: i32, + pub target_node_id: i64, + pub target_coordinate: i64, pub target_strand: Strand, - pub chromosome_index: i32, - pub phased: i32, + pub chromosome_index: i64, + pub phased: i64, } #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct EdgeData { - pub source_node_id: i32, - pub source_coordinate: i32, + pub source_node_id: i64, + pub source_coordinate: i64, pub source_strand: Strand, - pub target_node_id: i32, - pub target_coordinate: i32, + pub target_node_id: i64, + pub target_coordinate: i64, pub target_strand: Strand, - pub chromosome_index: i32, - pub phased: i32, + pub chromosome_index: i64, + pub phased: i64, } impl From<&Edge> for EdgeData { @@ -52,31 +52,31 @@ impl From<&Edge> for EdgeData { #[derive(Eq, Hash, PartialEq)] pub struct BlockKey { - pub node_id: i32, - pub coordinate: i32, + pub node_id: i64, + pub coordinate: i64, } #[derive(Clone, Debug)] pub struct GroupBlock { - pub id: i32, - pub node_id: i32, + pub id: i64, + pub node_id: i64, pub sequence: String, - pub start: i32, - pub end: i32, + pub start: i64, + pub end: i64, } impl Edge { #[allow(clippy::too_many_arguments)] pub fn create( conn: &Connection, - source_node_id: i32, - source_coordinate: i32, + source_node_id: i64, + source_coordinate: i64, source_strand: Strand, - target_node_id: i32, - target_coordinate: i32, + target_node_id: i64, + target_coordinate: i64, target_strand: Strand, - chromosome_index: i32, - phased: i32, + chromosome_index: i64, + phased: i64, ) -> Edge { let query = "INSERT INTO edges (source_node_id, source_coordinate, source_strand, target_node_id, target_coordinate, target_strand, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) RETURNING *"; let id_query = "select id from edges where and source_node_id = ?1 and source_coordinate = ?2 and source_strand = ?3 and target_node_id = ?4 and target_coordinate = ?5 and target_strand = ?6 and chromosome_index = ?7 and phased = ?8"; @@ -146,7 +146,7 @@ impl Edge { }) } - pub fn bulk_load(conn: &Connection, edge_ids: &[i32]) -> Vec { + pub fn bulk_load(conn: &Connection, edge_ids: &[i64]) -> Vec { let formatted_edge_ids = edge_ids .iter() .map(|edge_id| edge_id.to_string()) @@ -168,9 +168,9 @@ impl Edge { edges } - pub fn bulk_create(conn: &Connection, edges: Vec) -> Vec { + pub fn bulk_create(conn: &Connection, edges: Vec) -> Vec { let mut edge_rows = vec![]; - let mut edge_map: HashMap = HashMap::new(); + let mut edge_map: HashMap = HashMap::new(); for edge in &edges { let source_strand = format!("\"{0}\"", edge.source_strand); let target_strand = format!("\"{0}\"", edge.target_strand); @@ -239,7 +239,7 @@ impl Edge { edges .iter() .map(|edge| *edge_map.get(edge).unwrap()) - .collect::>() + .collect::>() } pub fn to_data(edge: Edge) -> EdgeData { @@ -258,8 +258,8 @@ impl Edge { fn get_block_boundaries( source_edges: Option<&Vec<&Edge>>, target_edges: Option<&Vec<&Edge>>, - sequence_length: i32, - ) -> Vec { + sequence_length: i64, + ) -> Vec { let mut block_boundary_coordinates = HashSet::new(); if let Some(actual_source_edges) = source_edges { for source_edge in actual_source_edges { @@ -283,13 +283,13 @@ impl Edge { block_boundary_coordinates .into_iter() .sorted_by(|c1, c2| Ord::cmp(&c1, &c2)) - .collect::>() + .collect::>() } pub fn blocks_from_edges(conn: &Connection, edges: &Vec) -> (Vec, Vec) { let mut node_ids = HashSet::new(); - let mut edges_by_source_node_id: HashMap> = HashMap::new(); - let mut edges_by_target_node_id: HashMap> = HashMap::new(); + let mut edges_by_source_node_id: HashMap> = HashMap::new(); + let mut edges_by_target_node_id: HashMap> = HashMap::new(); for edge in edges { if edge.source_node_id != PATH_START_NODE_ID { node_ids.insert(edge.source_node_id); @@ -308,7 +308,7 @@ impl Edge { } let sequences_by_node_id = - Node::get_sequences_by_node_ids(conn, node_ids.into_iter().collect::>()); + Node::get_sequences_by_node_ids(conn, node_ids.into_iter().collect::>()); let mut blocks = vec![]; let mut block_index = 0; @@ -412,7 +412,7 @@ impl Edge { pub fn build_graph( edges: &Vec, blocks: &Vec, - ) -> (DiGraphMap, HashMap<(i32, i32), Edge>) { + ) -> (DiGraphMap, HashMap<(i64, i64), Edge>) { let blocks_by_start = blocks .clone() .into_iter() @@ -425,7 +425,7 @@ impl Edge { block.id, ) }) - .collect::>(); + .collect::>(); let blocks_by_end = blocks .clone() .into_iter() @@ -438,9 +438,9 @@ impl Edge { block.id, ) }) - .collect::>(); + .collect::>(); - let mut graph: DiGraphMap = DiGraphMap::new(); + let mut graph: DiGraphMap = DiGraphMap::new(); let mut edges_by_node_pair = HashMap::new(); for block in blocks { graph.add_node(block.id); @@ -529,7 +529,7 @@ mod tests { let edges_by_source_node_id = edges .into_iter() .map(|edge| (edge.source_node_id, edge)) - .collect::>(); + .collect::>(); let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap(); assert_eq!(edge_result1.source_coordinate, -1); @@ -690,7 +690,7 @@ mod tests { let edges_by_source_node_id = edges .into_iter() .map(|edge| (edge.source_node_id, edge)) - .collect::>(); + .collect::>(); let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap(); diff --git a/src/models/node.rs b/src/models/node.rs index 1e27f7b..5709cf7 100644 --- a/src/models/node.rs +++ b/src/models/node.rs @@ -4,17 +4,17 @@ use std::collections::HashMap; use crate::models::sequence::Sequence; -pub const PATH_START_NODE_ID: i32 = 1; -pub const PATH_END_NODE_ID: i32 = 2; +pub const PATH_START_NODE_ID: i64 = 1; +pub const PATH_END_NODE_ID: i64 = 2; #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Node { - pub id: i32, + pub id: i64, pub sequence_hash: String, } impl Node { - pub fn create(conn: &Connection, sequence_hash: &str) -> i32 { + pub fn create(conn: &Connection, sequence_hash: &str) -> i64 { let insert_statement = format!( "INSERT INTO nodes (sequence_hash) VALUES ('{}') RETURNING (id);", sequence_hash @@ -57,7 +57,7 @@ impl Node { objs } - pub fn get_nodes(conn: &Connection, node_ids: Vec) -> Vec { + pub fn get_nodes(conn: &Connection, node_ids: Vec) -> Vec { let mut nodes: Vec = vec![]; for chunk in node_ids.chunks(1000) { nodes.extend(Node::query( @@ -66,10 +66,7 @@ impl Node { "SELECT * FROM nodes WHERE id IN ({})", chunk.iter().map(|_| "?").collect::>().join(", ") ), - chunk - .iter() - .map(|id| SQLValue::Integer(*id as i64)) - .collect(), + chunk.iter().map(|id| SQLValue::Integer(*id)).collect(), )) } nodes @@ -77,13 +74,13 @@ impl Node { pub fn get_sequences_by_node_ids( conn: &Connection, - node_ids: Vec, - ) -> HashMap { - let nodes = Node::get_nodes(conn, node_ids.into_iter().collect::>()); + node_ids: Vec, + ) -> HashMap { + let nodes = Node::get_nodes(conn, node_ids.into_iter().collect::>()); let sequence_hashes_by_node_id = nodes .iter() .map(|node| (node.id, node.sequence_hash.clone())) - .collect::>(); + .collect::>(); let sequences_by_hash = Sequence::sequences_by_hash( conn, sequence_hashes_by_node_id @@ -100,10 +97,10 @@ impl Node { sequences_by_hash.get(&sequence_hash).unwrap().clone(), ) }) - .collect::>() + .collect::>() } - pub fn is_terminal(node_id: i32) -> bool { + pub fn is_terminal(node_id: i64) -> bool { node_id == PATH_START_NODE_ID || node_id == PATH_END_NODE_ID } } diff --git a/src/models/operations.rs b/src/models/operations.rs index 2e5cba4..639145b 100644 --- a/src/models/operations.rs +++ b/src/models/operations.rs @@ -10,13 +10,13 @@ use std::string::ToString; #[derive(Clone, Debug)] pub struct Operation { - pub id: i32, + pub id: i64, pub db_uuid: String, - pub parent_id: Option, - pub branch_id: i32, + pub parent_id: Option, + pub branch_id: i64, pub collection_name: Option, pub change_type: String, - pub change_id: i32, + pub change_id: i64, } impl Operation { @@ -25,7 +25,7 @@ impl Operation { db_uuid: &str, collection_name: impl Into>, change_type: &str, - change_id: i32, + change_id: i64, ) -> Operation { let collection_name = collection_name.into(); let current_op = OperationState::get_operation(conn, db_uuid); @@ -37,7 +37,7 @@ impl Operation { // of operations. We ensure there is no child operation in this branch of the current operation. if let Some(op_id) = current_op { - let count: i32 = conn + let count: i64 = conn .query_row( "select count(*) from operation where branch_id = ?1 AND parent_id = ?2 AND id not in (select operation_id from branch_masked_operations where branch_id = ?1);", (current_branch_id, op_id), @@ -80,7 +80,7 @@ impl Operation { operation } - pub fn get_upstream(conn: &Connection, operation_id: i32) -> Vec { + pub fn get_upstream(conn: &Connection, operation_id: i64) -> Vec { let query = "WITH RECURSIVE operations(operation_id) AS ( \ select ?1 UNION \ select parent_id from operation join operations ON id=operation_id \ @@ -89,11 +89,11 @@ impl Operation { stmt.query_map((operation_id,), |row| row.get(0)) .unwrap() .map(|id| id.unwrap()) - .collect::>() + .collect::>() } - pub fn get_operation_graph(conn: &Connection) -> DiGraphMap { - let mut graph: DiGraphMap = DiGraphMap::new(); + pub fn get_operation_graph(conn: &Connection) -> DiGraphMap { + let mut graph: DiGraphMap = DiGraphMap::new(); let operations = Operation::query(conn, "select * from operation;", vec![]); for op in operations.iter() { graph.add_node(op.id); @@ -107,11 +107,11 @@ impl Operation { pub fn get_path_between( conn: &Connection, - source_id: i32, - target_id: i32, - ) -> Vec<(i32, Direction, i32)> { + source_id: i64, + target_id: i64, + ) -> Vec<(i64, Direction, i64)> { let directed_graph = Operation::get_operation_graph(conn); - let mut undirected_graph: UnGraphMap = Default::default(); + let mut undirected_graph: UnGraphMap = Default::default(); for node in directed_graph.nodes() { undirected_graph.add_node(node); @@ -119,7 +119,7 @@ impl Operation { for (source, target, _weight) in directed_graph.all_edges() { undirected_graph.add_edge(source, target, ()); } - let mut patch_path: Vec<(i32, Direction, i32)> = vec![]; + let mut patch_path: Vec<(i64, Direction, i64)> = vec![]; for path in all_simple_paths(&undirected_graph, source_id, target_id) { let mut last_node = 0; for node in path { @@ -187,7 +187,7 @@ impl Operation { rows.next().unwrap().unwrap() } - pub fn get_by_id(conn: &Connection, op_id: i32) -> Operation { + pub fn get_by_id(conn: &Connection, op_id: i64) -> Operation { Operation::get( conn, "select * from operation where id = ?1", @@ -197,7 +197,7 @@ impl Operation { } pub struct FileAddition { - pub id: i32, + pub id: i64, pub file_path: String, pub file_type: FileTypes, } @@ -228,13 +228,13 @@ impl FileAddition { #[derive(Clone, Debug)] pub struct OperationSummary { - pub id: i32, - pub operation_id: i32, + pub id: i64, + pub operation_id: i64, pub summary: String, } impl OperationSummary { - pub fn create(conn: &Connection, operation_id: i32, summary: &str) -> OperationSummary { + pub fn create(conn: &Connection, operation_id: i64, summary: &str) -> OperationSummary { let query = "INSERT INTO operation_summary (operation_id, summary) VALUES (?1, ?2) RETURNING (id)"; let mut stmt = conn.prepare(query).unwrap(); @@ -277,11 +277,11 @@ impl OperationSummary { #[derive(Clone, Debug)] pub struct Branch { - pub id: i32, + pub id: i64, pub db_uuid: String, pub name: String, - pub start_operation_id: Option, - pub current_operation_id: Option, + pub start_operation_id: Option, + pub current_operation_id: Option, } impl Branch { @@ -370,7 +370,7 @@ impl Branch { branch } - pub fn get_by_id(conn: &Connection, branch_id: i32) -> Option { + pub fn get_by_id(conn: &Connection, branch_id: i64) -> Option { let mut branch: Option = None; for result in Branch::query( conn, @@ -384,7 +384,7 @@ impl Branch { branch } - pub fn set_current_operation(conn: &Connection, branch_id: i32, operation_id: i32) { + pub fn set_current_operation(conn: &Connection, branch_id: i64, operation_id: i64) { conn.execute( "UPDATE branch set current_operation_id = ?2 where id = ?1", (branch_id, operation_id), @@ -392,7 +392,7 @@ impl Branch { .unwrap(); } - pub fn get_operations(conn: &Connection, branch_id: i32) -> Vec { + pub fn get_operations(conn: &Connection, branch_id: i64) -> Vec { let branch = Branch::get_by_id(conn, branch_id) .unwrap_or_else(|| panic!("No branch with id {branch_id}.")); let mut graph = Operation::get_operation_graph(conn); @@ -411,7 +411,7 @@ impl Branch { operations.insert(0, Operation::get_by_id(conn, ancestor)); } - let mut branch_operations: HashSet = HashSet::from_iter( + let mut branch_operations: HashSet = HashSet::from_iter( Operation::query( conn, "select * from operation where branch_id = ?1;", @@ -419,9 +419,9 @@ impl Branch { ) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), ); - branch_operations.extend(operations.iter().map(|op| op.id).collect::>()); + branch_operations.extend(operations.iter().map(|op| op.id).collect::>()); // remove all nodes not in our branch operations. We do this here because upstream operations // may be created in a different branch_id but shared with this branch. @@ -444,11 +444,11 @@ impl Branch { operations } - pub fn mask_operation(conn: &Connection, branch_id: i32, operation_id: i32) { + pub fn mask_operation(conn: &Connection, branch_id: i64, operation_id: i64) { conn.execute("INSERT OR IGNORE into branch_masked_operations (branch_id, operation_id) values (?1, ?2);", (branch_id, operation_id)).unwrap(); } - pub fn get_masked_operations(conn: &Connection, branch_id: i32) -> Vec { + pub fn get_masked_operations(conn: &Connection, branch_id: i64) -> Vec { let mut stmt = conn .prepare("select operation_id from branch_masked_operations where branch_id = ?1") .unwrap(); @@ -456,14 +456,14 @@ impl Branch { stmt.query_map((branch_id,), |row| row.get(0)) .unwrap() .map(|res| res.unwrap()) - .collect::>() + .collect::>() } } pub struct OperationState {} impl OperationState { - pub fn set_operation(conn: &Connection, db_uuid: &str, op_id: i32) { + pub fn set_operation(conn: &Connection, db_uuid: &str, op_id: i64) { let mut stmt = conn .prepare( "INSERT INTO operation_state (db_uuid, operation_id) @@ -478,8 +478,8 @@ impl OperationState { Branch::set_current_operation(conn, branch_id, op_id); } - pub fn get_operation(conn: &Connection, db_uuid: &str) -> Option { - let mut id: Option = None; + pub fn get_operation(conn: &Connection, db_uuid: &str) -> Option { + let mut id: Option = None; let mut stmt = conn .prepare("SELECT operation_id from operation_state where db_uuid = ?1;") .unwrap(); @@ -517,8 +517,8 @@ impl OperationState { } } - pub fn get_current_branch(conn: &Connection, db_uuid: &str) -> Option { - let mut id: Option = None; + pub fn get_current_branch(conn: &Connection, db_uuid: &str) -> Option { + let mut id: Option = None; let mut stmt = conn .prepare("SELECT branch_id from operation_state where db_uuid = ?1;") .unwrap(); @@ -695,31 +695,31 @@ mod tests { let ops = Branch::get_operations(op_conn, branch_2_midpoint_1.id) .iter() .map(|f| f.id) - .collect::>(); + .collect::>(); assert_eq!(ops, vec![1, 6, 7, 12, 13]); let ops = Branch::get_operations(op_conn, branch_1.id) .iter() .map(|f| f.id) - .collect::>(); + .collect::>(); assert_eq!(ops, vec![1, 2, 3]); let ops = Branch::get_operations(op_conn, branch_2.id) .iter() .map(|f| f.id) - .collect::>(); + .collect::>(); assert_eq!(ops, vec![1, 6, 7, 8]); let ops = Branch::get_operations(op_conn, branch_1_sub_1.id) .iter() .map(|f| f.id) - .collect::>(); + .collect::>(); assert_eq!(ops, vec![1, 2, 3, 4, 5]); let ops = Branch::get_operations(op_conn, branch_2_sub_1.id) .iter() .map(|f| f.id) - .collect::>(); + .collect::>(); assert_eq!(ops, vec![1, 6, 7, 8, 9, 10, 11]); } @@ -741,7 +741,7 @@ mod tests { // branch-1 \-> 4 -> 5 // branch-2 \-> 6 - let mut expected_graph: DiGraphMap = DiGraphMap::new(); + let mut expected_graph: DiGraphMap = DiGraphMap::new(); expected_graph.add_edge(1, 2, ()); expected_graph.add_edge(2, 3, ()); expected_graph.add_edge(3, 4, ()); @@ -809,18 +809,18 @@ mod tests { let graph = Operation::get_operation_graph(op_conn); assert_eq!( - graph.nodes().collect::>(), - expected_graph.nodes().collect::>() + graph.nodes().collect::>(), + expected_graph.nodes().collect::>() ); assert_eq!( graph .all_edges() .map(|(src, dest, _)| (src, dest)) - .collect::>(), + .collect::>(), expected_graph .all_edges() .map(|(src, dest, _)| (src, dest)) - .collect::>() + .collect::>() ); } @@ -1102,7 +1102,7 @@ mod tests { ) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 5] ); } diff --git a/src/models/path.rs b/src/models/path.rs index c067974..04b8070 100644 --- a/src/models/path.rs +++ b/src/models/path.rs @@ -16,15 +16,15 @@ use crate::models::{ #[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)] pub struct Path { - pub id: i32, - pub block_group_id: i32, + pub id: i64, + pub block_group_id: i64, pub name: String, } #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct PathData { pub name: String, - pub block_group_id: i32, + pub block_group_id: i64, } // interesting gist here: https://gist.github.com/mbhall88/cd900add6335c96127efea0e0f6a9f48, see if we @@ -62,18 +62,18 @@ pub fn revcomp(seq: &str) -> String { #[derive(Clone, Debug)] pub struct PathBlock { - pub id: i32, - pub node_id: i32, + pub id: i64, + pub node_id: i64, pub block_sequence: String, - pub sequence_start: i32, - pub sequence_end: i32, - pub path_start: i32, - pub path_end: i32, + pub sequence_start: i64, + pub sequence_end: i64, + pub path_start: i64, + pub path_end: i64, pub strand: Strand, } impl Path { - pub fn create(conn: &Connection, name: &str, block_group_id: i32, edge_ids: &[i32]) -> Path { + pub fn create(conn: &Connection, name: &str, block_group_id: i64, edge_ids: &[i64]) -> Path { // TODO: Should we do something if edge_ids don't match here? Suppose we have a path // for a block group with edges 1,2,3. And then the same path is added again with edges // 5,6,7, should this be an error? Should we just keep adding edges? @@ -124,7 +124,7 @@ impl Path { path } - pub fn get(conn: &Connection, path_id: i32) -> Path { + pub fn get(conn: &Connection, path_id: i64) -> Path { let query = "SELECT id, block_group_id, name from path where id = ?1;"; let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt @@ -173,12 +173,12 @@ impl Path { } pub fn edge_pairs_to_block( - block_id: i32, + block_id: i64, path: &Path, into: Edge, out_of: Edge, - sequences_by_node_id: &HashMap, - current_path_length: i32, + sequences_by_node_id: &HashMap, + current_path_length: i64, ) -> PathBlock { if into.target_node_id != out_of.source_node_id { panic!( @@ -235,13 +235,13 @@ impl Path { } let sequences_by_node_id = Node::get_sequences_by_node_ids( conn, - sequence_node_ids.into_iter().collect::>(), + sequence_node_ids.into_iter().collect::>(), ); let mut blocks = vec![]; let mut path_length = 0; - // NOTE: Adding a "start block" for the dedicated start sequence with a range from i32::MIN + // NOTE: Adding a "start block" for the dedicated start sequence with a range from i64::MIN // to 0 makes interval tree lookups work better. If the point being looked up is -1 (or // below), it will return this block. blocks.push(PathBlock { @@ -250,26 +250,26 @@ impl Path { block_sequence: "".to_string(), sequence_start: 0, sequence_end: 0, - path_start: i32::MIN + 1, + path_start: i64::MIN + 1, path_end: 0, strand: Strand::Forward, }); for (index, (into, out_of)) in edges.into_iter().tuple_windows().enumerate() { let block = Path::edge_pairs_to_block( - index as i32, + index as i64, path, into, out_of, &sequences_by_node_id, path_length, ); - path_length += block.block_sequence.len() as i32; + path_length += block.block_sequence.len() as i64; blocks.push(block); } // NOTE: Adding an "end block" for the dedicated end sequence with a range from the path - // length to i32::MAX makes interval tree lookups work better. If the point being looked up + // length to i64::MAX makes interval tree lookups work better. If the point being looked up // is the path length (or higher), it will return this block. blocks.push(PathBlock { id: -2, @@ -278,16 +278,16 @@ impl Path { sequence_start: 0, sequence_end: 0, path_start: path_length, - path_end: i32::MAX - 1, + path_end: i64::MAX - 1, strand: Strand::Forward, }); blocks } - pub fn intervaltree_for(conn: &Connection, path: &Path) -> IntervalTree { + pub fn intervaltree_for(conn: &Connection, path: &Path) -> IntervalTree { let blocks = Path::blocks_for(conn, path); - let tree: IntervalTree = blocks + let tree: IntervalTree = blocks .into_iter() .map(|block| (block.path_start..block.path_end, block)) .collect(); diff --git a/src/models/path_edge.rs b/src/models/path_edge.rs index 750d9b0..ad168b8 100644 --- a/src/models/path_edge.rs +++ b/src/models/path_edge.rs @@ -7,14 +7,14 @@ use crate::models::edge::Edge; #[derive(Clone, Debug)] pub struct PathEdge { - pub id: i32, - pub path_id: i32, - pub index_in_path: i32, - pub edge_id: i32, + pub id: i64, + pub path_id: i64, + pub index_in_path: i64, + pub edge_id: i64, } impl PathEdge { - pub fn create(conn: &Connection, path_id: i32, index_in_path: i32, edge_id: i32) -> PathEdge { + pub fn create(conn: &Connection, path_id: i64, index_in_path: i64, edge_id: i64) -> PathEdge { let query = "INSERT INTO path_edges (path_id, index_in_path, edge_id) VALUES (?1, ?2, ?3) RETURNING (id)"; let mut stmt = conn.prepare(query).unwrap(); @@ -74,7 +74,7 @@ impl PathEdge { objs } - pub fn edges_for_path(conn: &Connection, path_id: i32) -> Vec { + pub fn edges_for_path(conn: &Connection, path_id: i64) -> Vec { let path_edges = PathEdge::query( conn, "select * from path_edges where path_id = ?1 order by index_in_path ASC", @@ -83,19 +83,19 @@ impl PathEdge { let edge_ids = path_edges .into_iter() .map(|path_edge| path_edge.edge_id) - .collect::>(); + .collect::>(); let edges = Edge::bulk_load(conn, &edge_ids); let edges_by_id = edges .into_iter() .map(|edge| (edge.id, edge)) - .collect::>(); + .collect::>(); edge_ids .into_iter() .map(|edge_id| edges_by_id[&edge_id].clone()) .collect::>() } - pub fn edges_for_paths(conn: &Connection, path_ids: Vec) -> HashMap> { + pub fn edges_for_paths(conn: &Connection, path_ids: Vec) -> HashMap> { let placeholder_string = path_ids.iter().map(|_| "?").join(","); let path_edges = PathEdge::query( conn, @@ -113,12 +113,12 @@ impl PathEdge { .clone() .into_iter() .map(|path_edge| path_edge.edge_id) - .collect::>(); + .collect::>(); let edges = Edge::bulk_load(conn, &edge_ids); let edges_by_id = edges .into_iter() .map(|edge| (edge.id, edge)) - .collect::>(); + .collect::>(); let path_edges_by_path_id = path_edges .into_iter() .map(|path_edge| (path_edge.path_id, path_edge.edge_id)) @@ -134,7 +134,7 @@ impl PathEdge { .collect::>(), ) }) - .collect::>>() + .collect::>>() } } diff --git a/src/models/sequence.rs b/src/models/sequence.rs index bec32ee..20c2447 100644 --- a/src/models/sequence.rs +++ b/src/models/sequence.rs @@ -17,7 +17,7 @@ pub struct Sequence { // these 2 fields are only relevant when the sequence is stored externally pub name: String, pub file_path: String, - pub length: i32, + pub length: i64, // indicates whether the sequence is stored externally, a quick flag instead of having to // check sequence or file_path and do the logic in function calls. pub external_sequence: bool, @@ -29,7 +29,7 @@ pub struct NewSequence<'a> { sequence: Option<&'a str>, name: Option<&'a str>, file_path: Option<&'a str>, - length: Option, + length: Option, shallow: bool, } @@ -64,7 +64,7 @@ impl<'a> NewSequence<'a> { pub fn sequence(mut self, sequence: &'a str) -> Self { self.sequence = Some(sequence); - self.length = Some(sequence.len() as i32); + self.length = Some(sequence.len() as i64); self } @@ -81,7 +81,7 @@ impl<'a> NewSequence<'a> { self } - pub fn length(mut self, length: i32) -> Self { + pub fn length(mut self, length: i64) -> Self { self.length = Some(length); self } @@ -136,7 +136,7 @@ impl<'a> NewSequence<'a> { } if self.length.is_none() { if let Some(v) = self.sequence { - length = v.len() as i32; + length = v.len() as i64; } else { // TODO: if name/path specified, grab length automatically panic!("Sequence length must be specified."); @@ -283,13 +283,13 @@ impl Sequence { pub fn get_sequence( &self, - start: impl Into>, - end: impl Into>, + start: impl Into>, + end: impl Into>, ) -> String { // todo: handle circles - let start: Option = start.into(); - let end: Option = end.into(); + let start: Option = start.into(); + let end: Option = end.into(); let start = start.unwrap_or(0) as usize; let end = end.unwrap_or(self.length) as usize; if self.external_sequence { @@ -303,7 +303,7 @@ impl Sequence { ); } } - if start == 0 && end as i32 == self.length { + if start == 0 && end as i64 == self.length { return self.sequence.clone(); } self.sequence[start..end].to_string() diff --git a/src/operation_management.rs b/src/operation_management.rs index f141b7d..7b443bd 100644 --- a/src/operation_management.rs +++ b/src/operation_management.rs @@ -99,20 +99,18 @@ pub fn get_changeset_dependencies(conn: &Connection, mut changes: &[u8]) -> Vec< created_sequences.insert(hash.to_string()); } "block_group" => { - let bg_pk = item.new_value(pk_column).unwrap().as_i64().unwrap() as i32; + let bg_pk = item.new_value(pk_column).unwrap().as_i64().unwrap(); created_block_groups.insert(bg_pk); } "path" => { - created_paths - .insert(item.new_value(pk_column).unwrap().as_i64().unwrap() as i32); - let bg_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; + created_paths.insert(item.new_value(pk_column).unwrap().as_i64().unwrap()); + let bg_id = item.new_value(1).unwrap().as_i64().unwrap(); if !created_block_groups.contains(&bg_id) { previous_block_groups.insert(bg_id); } } "nodes" => { - created_nodes - .insert(item.new_value(pk_column).unwrap().as_i64().unwrap() as i32); + created_nodes.insert(item.new_value(pk_column).unwrap().as_i64().unwrap()); let sequence_hash = str::from_utf8(item.new_value(1).unwrap().as_bytes().unwrap()) .unwrap() @@ -122,9 +120,9 @@ pub fn get_changeset_dependencies(conn: &Connection, mut changes: &[u8]) -> Vec< } } "edges" => { - let edge_pk = item.new_value(pk_column).unwrap().as_i64().unwrap() as i32; - let source_node_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; - let target_node_id = item.new_value(4).unwrap().as_i64().unwrap() as i32; + let edge_pk = item.new_value(pk_column).unwrap().as_i64().unwrap(); + let source_node_id = item.new_value(1).unwrap().as_i64().unwrap(); + let target_node_id = item.new_value(4).unwrap().as_i64().unwrap(); created_edges.insert(edge_pk); let nodes = Node::get_nodes(conn, vec![source_node_id, target_node_id]); if !created_nodes.contains(&source_node_id) { @@ -135,8 +133,8 @@ pub fn get_changeset_dependencies(conn: &Connection, mut changes: &[u8]) -> Vec< } } "path_edges" => { - let path_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; - let edge_id = item.new_value(3).unwrap().as_i64().unwrap() as i32; + let path_id = item.new_value(1).unwrap().as_i64().unwrap(); + let edge_id = item.new_value(3).unwrap().as_i64().unwrap(); if !created_paths.contains(&path_id) { previous_paths.insert(path_id); } @@ -146,8 +144,8 @@ pub fn get_changeset_dependencies(conn: &Connection, mut changes: &[u8]) -> Vec< } "block_group_edges" => { // make sure blockgroup_map has blockgroups for bg ids made in external changes. - let bg_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; - let edge_id = item.new_value(2).unwrap().as_i64().unwrap() as i32; + let bg_id = item.new_value(1).unwrap().as_i64().unwrap(); + let edge_id = item.new_value(2).unwrap().as_i64().unwrap(); if !created_edges.contains(&edge_id) { previous_edges.insert(edge_id); } @@ -272,10 +270,10 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { let input: &mut dyn Read = &mut contents.as_slice(); let mut iter = ChangesetIter::start_strm(&input).unwrap(); - let mut blockgroup_map: HashMap = HashMap::new(); - let mut edge_map: HashMap = HashMap::new(); - let mut node_map: HashMap = HashMap::new(); - let mut path_edges: HashMap> = HashMap::new(); + let mut blockgroup_map: HashMap = HashMap::new(); + let mut edge_map: HashMap = HashMap::new(); + let mut node_map: HashMap = HashMap::new(); + let mut path_edges: HashMap> = HashMap::new(); let mut insert_paths = vec![]; let mut insert_block_group_edges = vec![]; @@ -313,11 +311,11 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { .file_path( str::from_utf8(item.new_value(4).unwrap().as_bytes().unwrap()).unwrap(), ) - .length(item.new_value(5).unwrap().as_i64().unwrap() as i32) + .length(item.new_value(5).unwrap().as_i64().unwrap()) .save(conn); } "block_group" => { - let bg_pk = item.new_value(pk_column).unwrap().as_i64().unwrap() as i32; + let bg_pk = item.new_value(pk_column).unwrap().as_i64().unwrap(); if let Some(v) = dep_bg_map.get(&bg_pk) { blockgroup_map.insert(bg_pk, *v); } else { @@ -338,15 +336,15 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { "path" => { // defer path creation until edges are made insert_paths.push(Path { - id: item.new_value(pk_column).unwrap().as_i64().unwrap() as i32, - block_group_id: item.new_value(1).unwrap().as_i64().unwrap() as i32, + id: item.new_value(pk_column).unwrap().as_i64().unwrap(), + block_group_id: item.new_value(1).unwrap().as_i64().unwrap(), name: str::from_utf8(item.new_value(2).unwrap().as_bytes().unwrap()) .unwrap() .to_string(), }); } "nodes" => { - let node_pk = item.new_value(pk_column).unwrap().as_i64().unwrap() as i32; + let node_pk = item.new_value(pk_column).unwrap().as_i64().unwrap(); node_map.insert( node_pk, str::from_utf8(item.new_value(1).unwrap().as_bytes().unwrap()) @@ -355,28 +353,28 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { ); } "edges" => { - let edge_pk = item.new_value(pk_column).unwrap().as_i64().unwrap() as i32; + let edge_pk = item.new_value(pk_column).unwrap().as_i64().unwrap(); edge_map.insert( edge_pk, EdgeData { - source_node_id: item.new_value(1).unwrap().as_i64().unwrap() as i32, - source_coordinate: item.new_value(2).unwrap().as_i64().unwrap() as i32, + source_node_id: item.new_value(1).unwrap().as_i64().unwrap(), + source_coordinate: item.new_value(2).unwrap().as_i64().unwrap(), source_strand: Strand::column_result(item.new_value(3).unwrap()) .unwrap(), - target_node_id: item.new_value(4).unwrap().as_i64().unwrap() as i32, - target_coordinate: item.new_value(5).unwrap().as_i64().unwrap() as i32, + target_node_id: item.new_value(4).unwrap().as_i64().unwrap(), + target_coordinate: item.new_value(5).unwrap().as_i64().unwrap(), target_strand: Strand::column_result(item.new_value(6).unwrap()) .unwrap(), - chromosome_index: item.new_value(7).unwrap().as_i64().unwrap() as i32, - phased: item.new_value(8).unwrap().as_i64().unwrap() as i32, + chromosome_index: item.new_value(7).unwrap().as_i64().unwrap(), + phased: item.new_value(8).unwrap().as_i64().unwrap(), }, ); } "path_edges" => { - let path_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; - let path_index = item.new_value(2).unwrap().as_i64().unwrap() as i32; + let path_id = item.new_value(1).unwrap().as_i64().unwrap(); + let path_index = item.new_value(2).unwrap().as_i64().unwrap(); // the edge_id here may not be valid and in this database may have a different pk - let edge_id = item.new_value(3).unwrap().as_i64().unwrap() as i32; + let edge_id = item.new_value(3).unwrap().as_i64().unwrap(); path_edges .entry(path_id) .or_default() @@ -384,8 +382,8 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { } "block_group_edges" => { // make sure blockgroup_map has blockgroups for bg ids made in external changes. - let bg_id = item.new_value(1).unwrap().as_i64().unwrap() as i32; - let edge_id = item.new_value(2).unwrap().as_i64().unwrap() as i32; + let bg_id = item.new_value(1).unwrap().as_i64().unwrap(); + let edge_id = item.new_value(2).unwrap().as_i64().unwrap(); insert_block_group_edges.push((bg_id, edge_id)); } "collection" => { @@ -402,7 +400,7 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { } } - let mut node_id_map: HashMap = HashMap::new(); + let mut node_id_map: HashMap = HashMap::new(); for (node_id, sequence_hash) in node_map { let new_node_id = Node::create(conn, &sequence_hash); node_id_map.insert(node_id, new_node_id); @@ -439,7 +437,7 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { .keys() .copied() .sorted() - .collect::>(); + .collect::>(); let created_edges = Edge::bulk_create( conn, sorted_edge_ids @@ -447,7 +445,7 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { .map(|id| updated_edge_map[id].clone()) .collect::>(), ); - let mut edge_id_map: HashMap = HashMap::new(); + let mut edge_id_map: HashMap = HashMap::new(); for (index, edge_id) in created_edges.iter().enumerate() { edge_id_map.insert(sorted_edge_ids[index], *edge_id); } @@ -468,7 +466,7 @@ pub fn apply_changeset(conn: &Connection, operation: &Operation) { Path::create(conn, &path.name, path.block_group_id, &sorted_edges); } - let mut block_group_edges: HashMap> = HashMap::new(); + let mut block_group_edges: HashMap> = HashMap::new(); for (bg_id, edge_id) in insert_block_group_edges { let bg_id = *dep_bg_map @@ -507,11 +505,11 @@ pub fn revert_changeset(conn: &Connection, operation: &Operation) { conn.pragma_update(None, "foreign_keys", "1").unwrap(); } -pub fn reset(conn: &Connection, operation_conn: &Connection, db_uuid: &str, op_id: i32) { +pub fn reset(conn: &Connection, operation_conn: &Connection, db_uuid: &str, op_id: i64) { let current_op = OperationState::get_operation(operation_conn, db_uuid).unwrap(); let current_branch_id = OperationState::get_current_branch(operation_conn, db_uuid).unwrap(); let current_branch = Branch::get_by_id(operation_conn, current_branch_id).unwrap(); - let branch_operations: Vec = Branch::get_operations(operation_conn, current_branch_id) + let branch_operations: Vec = Branch::get_operations(operation_conn, current_branch_id) .iter() .map(|b| b.id) .collect(); @@ -549,7 +547,7 @@ pub fn reset(conn: &Connection, operation_conn: &Connection, db_uuid: &str, op_i OperationState::set_operation(operation_conn, db_uuid, op_id); } -pub fn apply(conn: &Connection, operation_conn: &Connection, db_uuid: &str, op_id: i32) { +pub fn apply(conn: &Connection, operation_conn: &Connection, db_uuid: &str, op_id: i64) { let mut session = session::Session::new(conn).unwrap(); attach_session(&mut session); let change = FileAddition::create(operation_conn, &format!("{op_id}.cs"), FileTypes::Changeset); @@ -620,7 +618,7 @@ pub fn checkout( operation_conn: &Connection, db_uuid: &str, branch_name: &Option, - operation_id: Option, + operation_id: Option, ) { let mut dest_op_id = operation_id.unwrap_or(0); if let Some(name) = branch_name { @@ -754,11 +752,10 @@ mod tests { conn, operation_conn, ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 2); assert_eq!(node_count, 3); assert_eq!(sample_count, 0); @@ -771,11 +768,10 @@ mod tests { conn, operation_conn, ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); // NOTE: The edge count is 14 because of the following: // * 1 edge from the source node to the node created by the fasta import // * 1 edge from the node created by the fasta import to the sink node @@ -803,11 +799,10 @@ mod tests { ), ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 2); assert_eq!(node_count, 3); assert_eq!(sample_count, 0); @@ -820,11 +815,10 @@ mod tests { OperationState::get_operation(operation_conn, &db_uuid).unwrap(), ), ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 14); assert_eq!(node_count, 9); assert_eq!(sample_count, 3); @@ -977,11 +971,10 @@ mod tests { conn, operation_conn, ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 2); assert_eq!(node_count, 3); assert_eq!(sample_count, 0); @@ -1005,11 +998,10 @@ mod tests { conn, operation_conn, ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 14); assert_eq!(node_count, 9); assert_eq!(sample_count, 3); @@ -1030,11 +1022,10 @@ mod tests { ); // ensure branch 1 operations have been undone - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 2); assert_eq!(node_count, 3); assert_eq!(sample_count, 0); @@ -1049,11 +1040,10 @@ mod tests { conn, operation_conn, ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 6); assert_eq!(node_count, 5); assert_eq!(sample_count, 1); @@ -1072,11 +1062,10 @@ mod tests { branch_1.id ); - let edge_count = Edge::query(conn, "select * from edges", vec![]).len() as i32; - let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32; - let sample_count = Sample::query(conn, "select * from sample", vec![]).len() as i32; - let op_count = - Operation::query(operation_conn, "select * from operation", vec![]).len() as i32; + let edge_count = Edge::query(conn, "select * from edges", vec![]).len(); + let node_count = Node::query(conn, "select * from nodes", vec![]).len(); + let sample_count = Sample::query(conn, "select * from sample", vec![]).len(); + let op_count = Operation::query(operation_conn, "select * from operation", vec![]).len(); assert_eq!(edge_count, 14); assert_eq!(node_count, 9); assert_eq!(sample_count, 3); @@ -1142,7 +1131,7 @@ mod tests { Branch::get_operations(operation_conn, branch_id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 3, 4, 5] ); @@ -1155,7 +1144,7 @@ mod tests { Branch::get_operations(operation_conn, branch_id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2] ); } @@ -1275,21 +1264,21 @@ mod tests { Branch::get_operations(operation_conn, main_branch.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 6, 7, 8] ); assert_eq!( Branch::get_operations(operation_conn, branch_a.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 3, 4, 5, 10] ); assert_eq!( Branch::get_operations(operation_conn, branch_b.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 3, 4, 5, 9] ); reset(conn, operation_conn, &db_uuid, 2); @@ -1301,21 +1290,21 @@ mod tests { Branch::get_operations(operation_conn, main_branch.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 6, 7, 8] ); assert_eq!( Branch::get_operations(operation_conn, branch_a.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2] ); assert_eq!( Branch::get_operations(operation_conn, branch_b.id) .iter() .map(|op| op.id) - .collect::>(), + .collect::>(), vec![1, 2, 3, 4, 5, 9] ); } diff --git a/src/test_helpers.rs b/src/test_helpers.rs index 79b9684..858d65e 100644 --- a/src/test_helpers.rs +++ b/src/test_helpers.rs @@ -58,7 +58,7 @@ pub fn setup_gen_dir() { get_or_create_gen_dir(); } -pub fn setup_block_group(conn: &Connection) -> (i32, Path) { +pub fn setup_block_group(conn: &Connection) -> (i64, Path) { let a_seq = Sequence::new() .sequence_type("DNA") .sequence("AAAAAAAAAA") diff --git a/src/updates/vcf.rs b/src/updates/vcf.rs index ae53fee..0cf3dc6 100644 --- a/src/updates/vcf.rs +++ b/src/updates/vcf.rs @@ -24,14 +24,14 @@ use rusqlite::{session, Connection}; #[derive(Debug)] struct BlockGroupCache<'a> { - pub cache: HashMap, i32>, + pub cache: HashMap, i64>, pub conn: &'a Connection, } impl<'a> BlockGroupCache<'_> { pub fn new(conn: &Connection) -> BlockGroupCache { BlockGroupCache { - cache: HashMap::::new(), + cache: HashMap::::new(), conn, } } @@ -41,7 +41,7 @@ impl<'a> BlockGroupCache<'_> { collection_name: &'a str, sample_name: &'a str, name: String, - ) -> i32 { + ) -> i64 { let block_group_key = BlockGroupData { collection_name, sample_name: Some(sample_name), @@ -113,15 +113,15 @@ impl<'a> SequenceCache<'_> { #[allow(clippy::too_many_arguments)] fn prepare_change( - sample_bg_id: i32, + sample_bg_id: i64, sample_path: &Path, - ref_start: i32, - ref_end: i32, - chromosome_index: i32, - phased: i32, + ref_start: i64, + ref_end: i64, + chromosome_index: i64, + phased: i64, block_sequence: String, - sequence_length: i32, - node_id: i32, + sequence_length: i64, + node_id: i64, ) -> PathChange { // TODO: new sequence may not be real and be or some sort. Handle these. let new_block = PathBlock { @@ -146,12 +146,12 @@ fn prepare_change( } struct VcfEntry<'a> { - block_group_id: i32, + block_group_id: i64, sample_name: String, path: Path, alt_seq: &'a str, - chromosome_index: i32, - phased: i32, + chromosome_index: i64, + phased: i64, } pub fn update_with_vcf( @@ -231,7 +231,7 @@ pub fn update_with_vcf( path: sample_path.clone(), sample_name: fixed_sample.clone(), alt_seq, - chromosome_index: chromosome_index as i32, + chromosome_index: chromosome_index as i64, phased, }); } @@ -266,7 +266,7 @@ pub fn update_with_vcf( path: sample_path.clone(), sample_name: sample_names[sample_index].clone(), alt_seq, - chromosome_index: chromosome_index as i32, + chromosome_index: chromosome_index as i64, phased, }); } @@ -285,12 +285,12 @@ pub fn update_with_vcf( let change = prepare_change( vcf_entry.block_group_id, &vcf_entry.path, - ref_start as i32, - ref_end as i32, + ref_start as i64, + ref_end as i64, vcf_entry.chromosome_index, vcf_entry.phased, sequence_string.clone(), - sequence_string.len() as i32, + sequence_string.len() as i64, node_id, ); changes @@ -299,14 +299,14 @@ pub fn update_with_vcf( .push(change); } } - let mut summary: HashMap> = HashMap::new(); + let mut summary: HashMap> = HashMap::new(); for ((path, sample_name), path_changes) in changes { BlockGroup::insert_changes(conn, &path_changes, &path_cache); summary .entry(sample_name) .or_default() .entry(path.name) - .or_insert(path_changes.len() as i32); + .or_insert(path_changes.len() as i64); } let mut summary_str = "".to_string(); for (sample_name, sample_changes) in summary.iter() {