From ca972d8d2074d81f76a36bf1e3b77f9939463748 Mon Sep 17 00:00:00 2001 From: chris Mitchell Date: Thu, 19 Sep 2024 12:06:48 -0400 Subject: [PATCH] Composite fk example --- migrations/core/01-initial/up.sql | 56 ++++++++----------------------- src/exports/gfa.rs | 2 +- src/imports/fasta.rs | 2 +- src/imports/gfa.rs | 2 +- src/models/block_group.rs | 36 ++++++++++---------- src/models/block_group_edge.rs | 3 +- src/models/edge.rs | 29 +++++++++------- src/models/path.rs | 8 ++--- src/models/path_edge.rs | 6 ++-- 9 files changed, 60 insertions(+), 84 deletions(-) diff --git a/migrations/core/01-initial/up.sql b/migrations/core/01-initial/up.sql index 7e5b6be..f9f17cb 100644 --- a/migrations/core/01-initial/up.sql +++ b/migrations/core/01-initial/up.sql @@ -20,49 +20,22 @@ CREATE TABLE sequence ( ) STRICT; CREATE TABLE block_group ( - id INTEGER PRIMARY KEY NOT NULL, collection_name TEXT NOT NULL, - sample_name TEXT, + sample_name TEXT NOT NULL, name TEXT NOT NULL, FOREIGN KEY(collection_name) REFERENCES collection(name), - FOREIGN KEY(sample_name) REFERENCES sample(name) + FOREIGN KEY(sample_name) REFERENCES sample(name), + PRIMARY KEY(collection_name, sample_name, name) ) STRICT; -CREATE UNIQUE INDEX block_group_uidx ON block_group(collection_name, sample_name, name) WHERE sample_name is not null; -CREATE UNIQUE INDEX block_group_null_sample_uidx ON block_group(collection_name, name) WHERE sample_name is null; CREATE TABLE path ( - id INTEGER PRIMARY KEY NOT NULL, block_group_id INTEGER NOT NULL, name TEXT NOT NULL, - FOREIGN KEY(block_group_id) REFERENCES block_group(id) -) STRICT; -CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name); - --- an operation from a vcf can impact multiple paths and samples, so operation is not faceted on that -CREATE TABLE operation ( - id INTEGER PRIMARY KEY NOT NULL, - parent_id INTEGER, - collection_name TEXT NOT NULL, - change_type TEXT NOT NULL, - change_id INTEGER NOT NULL, - FOREIGN KEY(parent_id) REFERENCES operation(id) -) STRICT; - -CREATE TABLE file_addition ( - id INTEGER PRIMARY KEY NOT NULL, - file_path TEXT NOT NULL, - file_type TEXT NOT NULL -) STRICT; - -CREATE TABLE operation_summary ( - id INTEGER PRIMARY KEY NOT NULL, - operation_id INTEGER NOT NULL, - summary TEXT NOT NULL, - FOREIGN KEY(operation_id) REFERENCES operation(id) +-- FOREIGN KEY(block_group_id) REFERENCES block_group(rowid), + PRIMARY KEY(block_group_id, name) ) STRICT; CREATE TABLE edges ( - id INTEGER PRIMARY KEY NOT NULL, source_hash TEXT NOT NULL, source_coordinate INTEGER NOT NULL, source_strand TEXT NOT NULL, @@ -73,28 +46,26 @@ CREATE TABLE edges ( phased INTEGER NOT NULL, FOREIGN KEY(source_hash) REFERENCES sequence(hash), FOREIGN KEY(target_hash) REFERENCES sequence(hash), - constraint chk_phased check (phased in (0, 1)) + constraint chk_phased check (phased in (0, 1)), + PRIMARY KEY(source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) ) STRICT; -CREATE UNIQUE INDEX edge_uidx ON edges(source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased); CREATE TABLE path_edges ( - id INTEGER PRIMARY KEY NOT NULL, path_id INTEGER NOT NULL, index_in_path INTEGER NOT NULL, edge_id INTEGER NOT NULL, - FOREIGN KEY(edge_id) REFERENCES edges(id), - FOREIGN KEY(path_id) REFERENCES path(id) +-- FOREIGN KEY(edge_id) REFERENCES edges(rowid), +-- FOREIGN KEY(path_id) REFERENCES path(rowid), + PRIMARY KEY(path_id, edge_id) ) STRICT; -CREATE UNIQUE INDEX path_edges_uidx ON path_edges(path_id, edge_id); CREATE TABLE block_group_edges ( - id INTEGER PRIMARY KEY NOT NULL, block_group_id INTEGER NOT NULL, edge_id INTEGER NOT NULL, - FOREIGN KEY(block_group_id) REFERENCES block_group(id), - FOREIGN KEY(edge_id) REFERENCES edges(id) +-- FOREIGN KEY(block_group_id) REFERENCES block_group(rowid), +-- FOREIGN KEY(edge_id) REFERENCES edges(rowid), + PRIMARY KEY(block_group_id, edge_id) ) STRICT; -CREATE UNIQUE INDEX block_group_edges_uidx ON block_group_edges(block_group_id, edge_id); INSERT INTO sequence (hash, sequence_type, sequence, name, file_path, "length") values ("start-node-yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", "OTHER", "start-node-yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", "", "", 64), ("end-node-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", "OTHER", "end-node-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", "", "", 64); INSERT INTO gen_metadata (db_uuid) values (lower( @@ -104,3 +75,4 @@ INSERT INTO gen_metadata (db_uuid) values (lower( substr(hex(randomblob(2)), 2) || '-' || hex(randomblob(6)) )); +INSERT INTO sample (name) values (""); \ No newline at end of file diff --git a/src/exports/gfa.rs b/src/exports/gfa.rs index e0bf431..0f98e46 100644 --- a/src/exports/gfa.rs +++ b/src/exports/gfa.rs @@ -113,7 +113,7 @@ mod tests { let collection_name = "test collection"; let collection = Collection::create(&conn, collection_name); - let block_group = BlockGroup::create(&conn, collection_name, None, "test block group"); + let block_group = BlockGroup::create(&conn, collection_name, "", "test block group"); let sequence1 = Sequence::new() .sequence_type("DNA") .sequence("AAAA") diff --git a/src/imports/fasta.rs b/src/imports/fasta.rs index efc1269..f06e086 100644 --- a/src/imports/fasta.rs +++ b/src/imports/fasta.rs @@ -58,7 +58,7 @@ pub fn import_fasta( .sequence(&sequence) .save(conn) }; - let block_group = BlockGroup::create(conn, &collection.name, None, &name); + let block_group = BlockGroup::create(conn, &collection.name, "", &name); let edge_into = Edge::create( conn, Sequence::PATH_START_HASH.to_string(), diff --git a/src/imports/gfa.rs b/src/imports/gfa.rs index 82a84cf..8af1fc3 100644 --- a/src/imports/gfa.rs +++ b/src/imports/gfa.rs @@ -25,7 +25,7 @@ fn bool_to_strand(direction: bool) -> Strand { pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection) { Collection::create(conn, collection_name); - let block_group = BlockGroup::create(conn, collection_name, None, ""); + let block_group = BlockGroup::create(conn, collection_name, "", ""); let gfa: Gfa = Gfa::parse_gfa_file(gfa_path.to_str().unwrap()); let mut sequences_by_segment_id: HashMap = HashMap::new(); diff --git a/src/models/block_group.rs b/src/models/block_group.rs index ccd98eb..ee94c70 100644 --- a/src/models/block_group.rs +++ b/src/models/block_group.rs @@ -16,7 +16,7 @@ use crate::models::strand::Strand; pub struct BlockGroup { pub id: i32, pub collection_name: String, - pub sample_name: Option, + pub sample_name: String, pub name: String, } @@ -88,17 +88,17 @@ impl BlockGroup { pub fn create( conn: &Connection, collection_name: &str, - sample_name: Option<&str>, + sample_name: &str, name: &str, ) -> BlockGroup { - let query = "INSERT INTO block_group (collection_name, sample_name, name) VALUES (?1, ?2, ?3) RETURNING *"; + let query = "INSERT INTO block_group (collection_name, sample_name, name) VALUES (?1, ?2, ?3) RETURNING (rowid)"; let mut stmt = conn.prepare(query).unwrap(); match stmt.query_row((collection_name, sample_name, name), |row| { Ok(BlockGroup { id: row.get(0)?, - collection_name: row.get(1)?, - sample_name: row.get(2)?, - name: row.get(3)?, + collection_name: collection_name.to_string(), + sample_name: sample_name.to_string(), + name: name.to_string(), }) }) { Ok(res) => res, @@ -108,20 +108,20 @@ impl BlockGroup { BlockGroup { id: conn .query_row( - "select id from block_group where collection_name = ?1 and sample_name is null and name = ?2", + "select rowid from block_group where collection_name = ?1 and sample_name = \"\" and name = ?2", (collection_name, name), |row| row.get(0), ) .unwrap(), collection_name: collection_name.to_string(), - sample_name: sample_name.map(|s| s.to_string()), + sample_name: sample_name.to_string(), name: name.to_string() } } else { panic!("something bad happened querying the database") } } - Err(_) => { + Err(err) => { panic!("something bad happened querying the database") } } @@ -175,7 +175,7 @@ impl BlockGroup { group_name: &str, ) -> i32 { let mut bg_id : i32 = match conn.query_row( - "select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3", + "select rowid from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3", (collection_name, sample_name, group_name), |row| row.get(0), ) { @@ -190,7 +190,7 @@ impl BlockGroup { } else { // use the base reference group if it exists bg_id = match conn.query_row( - "select id from block_group where collection_name = ?1 AND sample_name IS null AND name = ?2", + "select rowid from block_group where collection_name = ?1 AND sample_name = \"\" AND name = ?2", (collection_name, group_name), |row| row.get(0), ) { @@ -201,7 +201,7 @@ impl BlockGroup { } } } - let new_bg_id = BlockGroup::create(conn, collection_name, Some(sample_name), group_name); + let new_bg_id = BlockGroup::create(conn, collection_name, sample_name, group_name); // clone parent blocks/edges/path BlockGroup::clone(conn, bg_id, new_bg_id.id); @@ -223,7 +223,7 @@ impl BlockGroup { ) } else { conn.query_row( - "select id from block_group where collection_name = ?1 AND sample_name IS NULL AND name = ?2", + "select id from block_group where collection_name = ?1 AND sample_name = \"\" AND name = ?2", (collection_name, group_name.clone()), |row| row.get(0), ) @@ -437,7 +437,7 @@ mod tests { .sequence("GGGGGGGGGG") .save(conn); let _collection = Collection::create(conn, "test"); - let block_group = BlockGroup::create(conn, "test", None, "hg19"); + let block_group = BlockGroup::create(conn, "test", "", "hg19"); let edge0 = Edge::create( conn, Sequence::PATH_START_HASH.to_string(), @@ -511,14 +511,14 @@ mod tests { fn test_blockgroup_create() { let conn = &get_connection(None); Collection::create(conn, "test"); - let bg1 = BlockGroup::create(conn, "test", None, "hg19"); + let bg1 = BlockGroup::create(conn, "test", "", "hg19"); assert_eq!(bg1.collection_name, "test"); assert_eq!(bg1.name, "hg19"); Sample::create(conn, "sample"); - let bg2 = BlockGroup::create(conn, "test", Some("sample"), "hg19"); + let bg2 = BlockGroup::create(conn, "test", "sample", "hg19"); assert_eq!(bg2.collection_name, "test"); assert_eq!(bg2.name, "hg19"); - assert_eq!(bg2.sample_name, Some("sample".to_string())); + assert_eq!(bg2.sample_name, "sample".to_string()); assert_ne!(bg1.id, bg2.id); } @@ -526,7 +526,7 @@ mod tests { fn test_blockgroup_clone() { let conn = &get_connection(None); Collection::create(conn, "test"); - let bg1 = BlockGroup::create(conn, "test", None, "hg19"); + let bg1 = BlockGroup::create(conn, "test", "", "hg19"); assert_eq!(bg1.collection_name, "test"); assert_eq!(bg1.name, "hg19"); Sample::create(conn, "sample"); diff --git a/src/models/block_group_edge.rs b/src/models/block_group_edge.rs index ad5e522..9d560f9 100644 --- a/src/models/block_group_edge.rs +++ b/src/models/block_group_edge.rs @@ -25,13 +25,14 @@ impl BlockGroupEdge { "INSERT OR IGNORE INTO block_group_edges (block_group_id, edge_id) VALUES {0};", formatted_rows_to_insert ); + println!("{insert_statement:?}"); let _ = conn.execute(&insert_statement, ()); } } pub fn edges_for_block_group(conn: &Connection, block_group_id: i32) -> Vec { let query = format!( - "select * from block_group_edges where block_group_id = {};", + "select rowid as id, * from block_group_edges where block_group_id = {};", block_group_id ); let block_group_edges = BlockGroupEdge::query(conn, &query, vec![]); diff --git a/src/models/edge.rs b/src/models/edge.rs index 6e8c6c7..4313064 100644 --- a/src/models/edge.rs +++ b/src/models/edge.rs @@ -60,7 +60,7 @@ impl Edge { chromosome_index: i32, phased: i32, ) -> Edge { - let query = "INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) RETURNING *"; + let query = "INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) RETURNING (rowid)"; let id_query = "select id from edges where source_hash = ?1 and source_coordinate = ?2 and source_strand = ?3 and target_hash = ?4 and target_coordinate = ?5 and target_strand = ?6 and chromosome_index = ?7 and phased = ?8"; let placeholders: Vec = vec![ source_hash.clone().into(), @@ -77,17 +77,20 @@ impl Edge { match stmt.query_row(params_from_iter(&placeholders), |row| { Ok(Edge { id: row.get(0)?, - source_hash: row.get(1)?, - source_coordinate: row.get(2)?, - source_strand: row.get(3)?, - target_hash: row.get(4)?, - target_coordinate: row.get(5)?, - target_strand: row.get(6)?, - chromosome_index: row.get(7)?, - phased: row.get(8)?, + source_hash: source_hash.clone(), + source_coordinate, + source_strand, + target_hash: target_hash.clone(), + target_coordinate, + target_strand, + chromosome_index, + phased, }) }) { - Ok(edge) => edge, + Ok(edge) => { + println!("made edge {edge:?}"); + edge + } Err(rusqlite::Error::SqliteFailure(err, details)) => { if err.code == rusqlite::ErrorCode::ConstraintViolation { println!("{err:?} {details:?}"); @@ -120,7 +123,7 @@ impl Edge { .map(|edge_id| edge_id.to_string()) .collect::>() .join(","); - let query = format!("select id, source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased from edges where id in ({});", formatted_edge_ids); + let query = format!("select rowid as id, source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased from edges where rowid in ({});", formatted_edge_ids); Edge::query(conn, &query, vec![]) } @@ -170,7 +173,7 @@ impl Edge { } let formatted_edge_rows = edge_rows.join(", "); - let select_statement = format!("SELECT * FROM edges WHERE (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) in ({0});", formatted_edge_rows); + let select_statement = format!("SELECT rowid as id, * FROM edges WHERE (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) in ({0});", formatted_edge_rows); let existing_edges = Edge::query(conn, &select_statement, vec![]); let mut existing_edge_ids: Vec = existing_edges .clone() @@ -215,7 +218,7 @@ impl Edge { for chunk in edge_rows_to_insert.chunks(100000) { let formatted_edge_rows_to_insert = chunk.join(", "); - let insert_statement = format!("INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES {0} RETURNING (id);", formatted_edge_rows_to_insert); + let insert_statement = format!("INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES {0} RETURNING (rowid);", formatted_edge_rows_to_insert); let mut stmt = conn.prepare(&insert_statement).unwrap(); let rows = stmt.query_map([], |row| row.get(0)).unwrap(); let mut edge_ids: Vec = vec![]; diff --git a/src/models/path.rs b/src/models/path.rs index 5932ed6..a7a4f53 100644 --- a/src/models/path.rs +++ b/src/models/path.rs @@ -65,7 +65,7 @@ pub struct NewBlock { impl Path { pub fn create(conn: &Connection, name: &str, block_group_id: i32, edge_ids: &[i32]) -> Path { - let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (id)"; + let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (rowid)"; let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt .query_map((name, block_group_id), |row| { @@ -263,7 +263,7 @@ mod tests { fn test_gets_sequence() { let conn = &mut get_connection(None); Collection::create(conn, "test collection"); - let block_group = BlockGroup::create(conn, "test collection", None, "test block group"); + let block_group = BlockGroup::create(conn, "test collection", "", "test block group"); let sequence1 = Sequence::new() .sequence_type("DNA") .sequence("ATCGATCG") @@ -349,7 +349,7 @@ mod tests { fn test_gets_sequence_with_rc() { let conn = &mut get_connection(None); Collection::create(conn, "test collection"); - let block_group = BlockGroup::create(conn, "test collection", None, "test block group"); + let block_group = BlockGroup::create(conn, "test collection", "", "test block group"); let sequence1 = Sequence::new() .sequence_type("DNA") .sequence("ATCGATCG") @@ -442,7 +442,7 @@ mod tests { fn test_intervaltree() { let conn = &mut get_connection(None); Collection::create(conn, "test collection"); - let block_group = BlockGroup::create(conn, "test collection", None, "test block group"); + let block_group = BlockGroup::create(conn, "test collection", "", "test block group"); let sequence1 = Sequence::new() .sequence_type("DNA") .sequence("ATCGATCG") diff --git a/src/models/path_edge.rs b/src/models/path_edge.rs index a7a67a5..6c8b0cf 100644 --- a/src/models/path_edge.rs +++ b/src/models/path_edge.rs @@ -14,7 +14,7 @@ pub struct PathEdge { impl PathEdge { pub fn create(conn: &Connection, path_id: i32, index_in_path: i32, edge_id: i32) -> PathEdge { let query = - "INSERT INTO path_edges (path_id, index_in_path, edge_id) VALUES (?1, ?2, ?3) RETURNING (id)"; + "INSERT INTO path_edges (path_id, index_in_path, edge_id) VALUES (?1, ?2, ?3) RETURNING (rowid)"; let mut stmt = conn.prepare(query).unwrap(); let mut rows = stmt .query_map((path_id, index_in_path, edge_id), |row| { @@ -32,7 +32,7 @@ impl PathEdge { if err.code == rusqlite::ErrorCode::ConstraintViolation { println!("{err:?} {details:?}"); let mut placeholders = vec![path_id]; - let query = "SELECT id from path_edges where path_id = ?1 AND edge_id = ?2;"; + let query = "SELECT rowid from path_edges where path_id = ?1 AND edge_id = ?2;"; placeholders.push(edge_id); println!("{query} {placeholders:?}"); PathEdge { @@ -75,7 +75,7 @@ impl PathEdge { pub fn edges_for(conn: &Connection, path_id: i32) -> Vec { let path_edges = PathEdge::query( conn, - "select * from path_edges where path_id = ?1 order by index_in_path ASC", + "select rowid as id, * from path_edges where path_id = ?1 order by index_in_path ASC", vec![Value::from(path_id)], ); let edge_ids = path_edges