Skip to content

Commit

Permalink
Merge pull request #59 from ginkgobioworks/64-bit-upgrade
Browse files Browse the repository at this point in the history
Use 64 bits
  • Loading branch information
dkhofer authored Oct 2, 2024
2 parents 90b00cc + 1d9cf3c commit 1555734
Show file tree
Hide file tree
Showing 16 changed files with 309 additions and 323 deletions.
24 changes: 12 additions & 12 deletions src/exports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub fn export_gfa(conn: &Connection, collection_name: &str, filename: &PathBuf)
fn write_segments(
writer: &mut BufWriter<File>,
blocks: &Vec<GroupBlock>,
terminal_block_ids: &HashSet<i32>,
terminal_block_ids: &HashSet<i64>,
) {
for block in blocks {
if terminal_block_ids.contains(&block.id) {
Expand All @@ -78,9 +78,9 @@ fn segment_line(sequence: &str, index: usize) -> String {

fn write_links(
writer: &mut BufWriter<File>,
graph: &DiGraphMap<i32, ()>,
edges_by_node_pair: &HashMap<(i32, i32), Edge>,
terminal_block_ids: &HashSet<i32>,
graph: &DiGraphMap<i64, ()>,
edges_by_node_pair: &HashMap<(i64, i64), Edge>,
terminal_block_ids: &HashSet<i64>,
) {
for (source, target, ()) in graph.all_edges() {
if terminal_block_ids.contains(&source) || terminal_block_ids.contains(&target) {
Expand All @@ -101,9 +101,9 @@ fn write_links(
}

fn link_line(
source_index: i32,
source_index: i64,
source_strand: Strand,
target_index: i32,
target_index: i64,
target_strand: Strand,
) -> String {
format!(
Expand All @@ -123,9 +123,9 @@ fn link_line(
fn nodes_for_edges(
edge1: &Edge,
edge2: &Edge,
blocks_by_node_and_start: &HashMap<(i32, i32), GroupBlock>,
blocks_by_node_and_end: &HashMap<(i32, i32), GroupBlock>,
) -> Vec<i32> {
blocks_by_node_and_start: &HashMap<(i64, i64), GroupBlock>,
blocks_by_node_and_end: &HashMap<(i64, i64), GroupBlock>,
) -> Vec<i64> {
let mut current_block = blocks_by_node_and_start
.get(&(edge1.target_node_id, edge1.target_coordinate))
.unwrap();
Expand Down Expand Up @@ -158,11 +158,11 @@ fn write_paths(
let blocks_by_node_and_start = blocks
.iter()
.map(|block| ((block.node_id, block.start), block.clone()))
.collect::<HashMap<(i32, i32), GroupBlock>>();
.collect::<HashMap<(i64, i64), GroupBlock>>();
let blocks_by_node_and_end = blocks
.iter()
.map(|block| ((block.node_id, block.end), block.clone()))
.collect::<HashMap<(i32, i32), GroupBlock>>();
.collect::<HashMap<(i64, i64), GroupBlock>>();

for path in paths {
let edges_for_path = edges_by_path_id.get(&path.id).unwrap();
Expand All @@ -187,7 +187,7 @@ fn write_paths(
}
}

fn path_line(path_name: &str, node_ids: &[i32], node_strands: &[Strand]) -> String {
fn path_line(path_name: &str, node_ids: &[i64], node_strands: &[Strand]) -> String {
let nodes = node_ids
.iter()
.zip(node_strands.iter())
Expand Down
4 changes: 2 additions & 2 deletions src/imports/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ pub fn import_fasta(
name: name.to_string(),
}
};
let mut summary: HashMap<String, i32> = HashMap::new();
let mut summary: HashMap<String, i64> = HashMap::new();

for result in reader.records() {
let record = result.expect("Error during fasta record parsing");
let sequence = str::from_utf8(record.sequence().as_ref())
.unwrap()
.to_string();
let name = String::from_utf8(record.name().to_vec()).unwrap();
let sequence_length = record.sequence().len() as i32;
let sequence_length = record.sequence().len() as i64;
let seq = if shallow {
Sequence::new()
.sequence_type("DNA")
Expand Down
20 changes: 10 additions & 10 deletions src/imports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection)
let block_group = BlockGroup::create(conn, collection_name, None, "");
let gfa: Gfa<u64, (), ()> = Gfa::parse_gfa_file(gfa_path.to_str().unwrap());
let mut sequences_by_segment_id: HashMap<u64, Sequence> = HashMap::new();
let mut node_ids_by_segment_id: HashMap<u64, i32> = HashMap::new();
let mut node_ids_by_segment_id: HashMap<u64, i64> = HashMap::new();

for segment in &gfa.segments {
let input_sequence = segment.sequence.get_string(&gfa.sequence);
Expand Down Expand Up @@ -198,10 +198,10 @@ pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection)
}

fn edge_data_from_fields(
source_node_id: i32,
source_coordinate: i32,
source_node_id: i64,
source_coordinate: i64,
source_strand: Strand,
target_node_id: i32,
target_node_id: i64,
target_strand: Strand,
) -> EdgeData {
EdgeData {
Expand Down Expand Up @@ -246,7 +246,7 @@ mod tests {
let result = Path::sequence(conn, path);
assert_eq!(result, "ATGGCATATTCGCAGCT");

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 6);
}

Expand All @@ -265,7 +265,7 @@ mod tests {
HashSet::from_iter(vec!["AAAATTTTGGGGCCCC".to_string()])
);

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 6);
}

Expand All @@ -291,7 +291,7 @@ mod tests {
let result = Path::sequence(conn, path);
assert_eq!(result, "ACCTACAAATTCAAAC");

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 6);
}

Expand All @@ -317,7 +317,7 @@ mod tests {
let result = Path::sequence(conn, path);
assert_eq!(result, "TATGCCAGCTGCGAATA");

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 6);
}

Expand Down Expand Up @@ -422,7 +422,7 @@ mod tests {
assert_eq!(all_sequences.len(), 1024);
assert_eq!(all_sequences, expected_sequences);

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 28);
}

Expand Down Expand Up @@ -452,7 +452,7 @@ mod tests {
let all_sequences = BlockGroup::get_all_sequences(conn, block_group_id);
assert_eq!(all_sequences, HashSet::from_iter(vec!["AA".to_string()]));

let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i32;
let node_count = Node::query(conn, "select * from nodes", vec![]).len() as i64;
assert_eq!(node_count, 4);
}
}
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn calculate_hash(t: &str) -> String {
}

pub struct Genotype {
pub allele: i32,
pub allele: i64,
pub phasing: Phasing,
}

Expand All @@ -66,7 +66,7 @@ pub fn parse_genotype(gt: &str) -> Vec<Option<Genotype>> {
genotypes.push(None);
} else {
genotypes.push(Some(Genotype {
allele: allele.parse::<i32>().unwrap(),
allele: allele.parse::<i64>().unwrap(),
phasing: phase,
}));
}
Expand All @@ -76,7 +76,7 @@ pub fn parse_genotype(gt: &str) -> Vec<Option<Genotype>> {
genotypes
}

pub fn get_overlap(a: i32, b: i32, x: i32, y: i32) -> (bool, bool, bool) {
pub fn get_overlap(a: i64, b: i64, x: i64, y: i64) -> (bool, bool, bool) {
let contains_start = a <= x && x < b;
let contains_end = a <= y && y < b;
let overlap = a < y && x < b;
Expand All @@ -99,7 +99,7 @@ mod tests {
#[test]
fn it_queries() {
let conn = get_connection(None);
let sequence_count: i32 = conn
let sequence_count: i64 = conn
.query_row(
"SELECT count(*) from sequence where hash = 'foo'",
[],
Expand Down
6 changes: 3 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ enum Commands {
branch: Option<String>,
/// The operation id to move to
#[clap(index = 1)]
id: Option<i32>,
id: Option<i64>,
},
Reset {
/// The operation id to reset to
#[clap(index = 1)]
id: i32,
id: i64,
},
/// View operations carried out against a database
Operations {
Expand All @@ -111,7 +111,7 @@ enum Commands {
Apply {
/// The operation id to apply
#[clap(index = 1)]
id: i32,
id: i64,
},
Export {
/// The name of the collection to export
Expand Down
42 changes: 21 additions & 21 deletions src/models/block_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::models::strand::Strand;

#[derive(Debug, Deserialize, Serialize)]
pub struct BlockGroup {
pub id: i32,
pub id: i64,
pub collection_name: String,
pub sample_name: Option<String>,
pub name: String,
Expand All @@ -30,31 +30,31 @@ pub struct BlockGroupData<'a> {

#[derive(Clone, Debug)]
pub struct PathChange {
pub block_group_id: i32,
pub block_group_id: i64,
pub path: Path,
pub start: i32,
pub end: i32,
pub start: i64,
pub end: i64,
pub block: PathBlock,
pub chromosome_index: i32,
pub phased: i32,
pub chromosome_index: i64,
pub phased: i64,
}

pub struct PathCache<'a> {
pub cache: HashMap<PathData, Path>,
pub intervaltree_cache: HashMap<Path, IntervalTree<i32, PathBlock>>,
pub intervaltree_cache: HashMap<Path, IntervalTree<i64, PathBlock>>,
pub conn: &'a Connection,
}

impl PathCache<'_> {
pub fn new(conn: &Connection) -> PathCache {
PathCache {
cache: HashMap::<PathData, Path>::new(),
intervaltree_cache: HashMap::<Path, IntervalTree<i32, PathBlock>>::new(),
intervaltree_cache: HashMap::<Path, IntervalTree<i64, PathBlock>>::new(),
conn,
}
}

pub fn lookup(path_cache: &mut PathCache, block_group_id: i32, name: String) -> Path {
pub fn lookup(path_cache: &mut PathCache, block_group_id: i64, name: String) -> Path {
let path_key = PathData {
name: name.clone(),
block_group_id,
Expand All @@ -80,7 +80,7 @@ impl PathCache<'_> {
pub fn get_intervaltree<'a>(
path_cache: &'a PathCache<'a>,
path: &'a Path,
) -> Option<&'a IntervalTree<i32, PathBlock>> {
) -> Option<&'a IntervalTree<i64, PathBlock>> {
path_cache.intervaltree_cache.get(path)
}
}
Expand Down Expand Up @@ -159,7 +159,7 @@ impl BlockGroup {
objs
}

pub fn clone(conn: &Connection, source_block_group_id: i32, target_block_group_id: i32) {
pub fn clone(conn: &Connection, source_block_group_id: i64, target_block_group_id: i64) {
let existing_paths = Path::get_paths(
conn,
"SELECT * from path where block_group_id = ?1",
Expand All @@ -169,14 +169,14 @@ impl BlockGroup {
let edge_ids = BlockGroupEdge::edges_for_block_group(conn, source_block_group_id)
.iter()
.map(|edge| edge.id)
.collect::<Vec<i32>>();
.collect::<Vec<i64>>();
BlockGroupEdge::bulk_create(conn, target_block_group_id, &edge_ids);

for path in existing_paths {
let edge_ids = PathEdge::edges_for_path(conn, path.id)
.into_iter()
.map(|edge| edge.id)
.collect::<Vec<i32>>();
.collect::<Vec<i64>>();
Path::create(conn, &path.name, target_block_group_id, &edge_ids);
}
}
Expand All @@ -186,8 +186,8 @@ impl BlockGroup {
collection_name: &str,
sample_name: &str,
group_name: &str,
) -> i32 {
let mut bg_id : i32 = match conn.query_row(
) -> i64 {
let mut bg_id : i64 = match conn.query_row(
"select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
(collection_name, sample_name, group_name),
|row| row.get(0),
Expand Down Expand Up @@ -227,7 +227,7 @@ impl BlockGroup {
collection_name: &str,
sample_name: Option<&str>,
group_name: &str,
) -> i32 {
) -> i64 {
let result = if sample_name.is_some() {
conn.query_row(
"select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
Expand All @@ -251,7 +251,7 @@ impl BlockGroup {
}
}

pub fn get_all_sequences(conn: &Connection, block_group_id: i32) -> HashSet<String> {
pub fn get_all_sequences(conn: &Connection, block_group_id: i64) -> HashSet<String> {
let mut edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
let (blocks, boundary_edges) = Edge::blocks_from_edges(conn, &edges);
edges.extend(boundary_edges.clone());
Expand All @@ -274,7 +274,7 @@ impl BlockGroup {
.clone()
.into_iter()
.map(|block| (block.id, block))
.collect::<HashMap<i32, GroupBlock>>();
.collect::<HashMap<i64, GroupBlock>>();
let mut sequences = HashSet::<String>::new();

for start_node in start_nodes {
Expand Down Expand Up @@ -303,7 +303,7 @@ impl BlockGroup {
}

pub fn insert_changes(conn: &Connection, changes: &Vec<PathChange>, cache: &PathCache) {
let mut new_edges_by_block_group = HashMap::<i32, Vec<EdgeData>>::new();
let mut new_edges_by_block_group = HashMap::<i64, Vec<EdgeData>>::new();
for change in changes {
let tree = PathCache::get_intervaltree(cache, &change.path).unwrap();
let new_edges = BlockGroup::set_up_new_edges(change, tree);
Expand All @@ -324,7 +324,7 @@ impl BlockGroup {
pub fn insert_change(
conn: &Connection,
change: &PathChange,
tree: &IntervalTree<i32, PathBlock>,
tree: &IntervalTree<i64, PathBlock>,
) {
let new_edges = BlockGroup::set_up_new_edges(change, tree);
let edge_ids = Edge::bulk_create(conn, new_edges);
Expand All @@ -333,7 +333,7 @@ impl BlockGroup {

pub fn set_up_new_edges(
change: &PathChange,
tree: &IntervalTree<i32, PathBlock>,
tree: &IntervalTree<i64, PathBlock>,
) -> Vec<EdgeData> {
let start_blocks: Vec<&PathBlock> =
tree.query_point(change.start).map(|x| &x.value).collect();
Expand Down
Loading

0 comments on commit 1555734

Please sign in to comment.