Skip to content

Commit

Permalink
Composite fk example
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris7 committed Sep 19, 2024
1 parent 2116ba6 commit ca972d8
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 84 deletions.
56 changes: 14 additions & 42 deletions migrations/core/01-initial/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,49 +20,22 @@ CREATE TABLE sequence (
) STRICT;

CREATE TABLE block_group (
id INTEGER PRIMARY KEY NOT NULL,
collection_name TEXT NOT NULL,
sample_name TEXT,
sample_name TEXT NOT NULL,
name TEXT NOT NULL,
FOREIGN KEY(collection_name) REFERENCES collection(name),
FOREIGN KEY(sample_name) REFERENCES sample(name)
FOREIGN KEY(sample_name) REFERENCES sample(name),
PRIMARY KEY(collection_name, sample_name, name)
) STRICT;
CREATE UNIQUE INDEX block_group_uidx ON block_group(collection_name, sample_name, name) WHERE sample_name is not null;
CREATE UNIQUE INDEX block_group_null_sample_uidx ON block_group(collection_name, name) WHERE sample_name is null;

CREATE TABLE path (
id INTEGER PRIMARY KEY NOT NULL,
block_group_id INTEGER NOT NULL,
name TEXT NOT NULL,
FOREIGN KEY(block_group_id) REFERENCES block_group(id)
) STRICT;
CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name);

-- an operation from a vcf can impact multiple paths and samples, so operation is not faceted on that
CREATE TABLE operation (
id INTEGER PRIMARY KEY NOT NULL,
parent_id INTEGER,
collection_name TEXT NOT NULL,
change_type TEXT NOT NULL,
change_id INTEGER NOT NULL,
FOREIGN KEY(parent_id) REFERENCES operation(id)
) STRICT;

CREATE TABLE file_addition (
id INTEGER PRIMARY KEY NOT NULL,
file_path TEXT NOT NULL,
file_type TEXT NOT NULL
) STRICT;

CREATE TABLE operation_summary (
id INTEGER PRIMARY KEY NOT NULL,
operation_id INTEGER NOT NULL,
summary TEXT NOT NULL,
FOREIGN KEY(operation_id) REFERENCES operation(id)
-- FOREIGN KEY(block_group_id) REFERENCES block_group(rowid),
PRIMARY KEY(block_group_id, name)
) STRICT;

CREATE TABLE edges (
id INTEGER PRIMARY KEY NOT NULL,
source_hash TEXT NOT NULL,
source_coordinate INTEGER NOT NULL,
source_strand TEXT NOT NULL,
Expand All @@ -73,28 +46,26 @@ CREATE TABLE edges (
phased INTEGER NOT NULL,
FOREIGN KEY(source_hash) REFERENCES sequence(hash),
FOREIGN KEY(target_hash) REFERENCES sequence(hash),
constraint chk_phased check (phased in (0, 1))
constraint chk_phased check (phased in (0, 1)),
PRIMARY KEY(source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased)
) STRICT;
CREATE UNIQUE INDEX edge_uidx ON edges(source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased);

CREATE TABLE path_edges (
id INTEGER PRIMARY KEY NOT NULL,
path_id INTEGER NOT NULL,
index_in_path INTEGER NOT NULL,
edge_id INTEGER NOT NULL,
FOREIGN KEY(edge_id) REFERENCES edges(id),
FOREIGN KEY(path_id) REFERENCES path(id)
-- FOREIGN KEY(edge_id) REFERENCES edges(rowid),
-- FOREIGN KEY(path_id) REFERENCES path(rowid),
PRIMARY KEY(path_id, edge_id)
) STRICT;
CREATE UNIQUE INDEX path_edges_uidx ON path_edges(path_id, edge_id);

CREATE TABLE block_group_edges (
id INTEGER PRIMARY KEY NOT NULL,
block_group_id INTEGER NOT NULL,
edge_id INTEGER NOT NULL,
FOREIGN KEY(block_group_id) REFERENCES block_group(id),
FOREIGN KEY(edge_id) REFERENCES edges(id)
-- FOREIGN KEY(block_group_id) REFERENCES block_group(rowid),
-- FOREIGN KEY(edge_id) REFERENCES edges(rowid),
PRIMARY KEY(block_group_id, edge_id)
) STRICT;
CREATE UNIQUE INDEX block_group_edges_uidx ON block_group_edges(block_group_id, edge_id);

INSERT INTO sequence (hash, sequence_type, sequence, name, file_path, "length") values ("start-node-yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", "OTHER", "start-node-yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", "", "", 64), ("end-node-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", "OTHER", "end-node-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", "", "", 64);
INSERT INTO gen_metadata (db_uuid) values (lower(
Expand All @@ -104,3 +75,4 @@ INSERT INTO gen_metadata (db_uuid) values (lower(
substr(hex(randomblob(2)), 2) || '-' ||
hex(randomblob(6))
));
INSERT INTO sample (name) values ("");
2 changes: 1 addition & 1 deletion src/exports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ mod tests {

let collection_name = "test collection";
let collection = Collection::create(&conn, collection_name);
let block_group = BlockGroup::create(&conn, collection_name, None, "test block group");
let block_group = BlockGroup::create(&conn, collection_name, "", "test block group");
let sequence1 = Sequence::new()
.sequence_type("DNA")
.sequence("AAAA")
Expand Down
2 changes: 1 addition & 1 deletion src/imports/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub fn import_fasta(
.sequence(&sequence)
.save(conn)
};
let block_group = BlockGroup::create(conn, &collection.name, None, &name);
let block_group = BlockGroup::create(conn, &collection.name, "", &name);
let edge_into = Edge::create(
conn,
Sequence::PATH_START_HASH.to_string(),
Expand Down
2 changes: 1 addition & 1 deletion src/imports/gfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn bool_to_strand(direction: bool) -> Strand {

pub fn import_gfa(gfa_path: &FilePath, collection_name: &str, conn: &Connection) {
Collection::create(conn, collection_name);
let block_group = BlockGroup::create(conn, collection_name, None, "");
let block_group = BlockGroup::create(conn, collection_name, "", "");
let gfa: Gfa<u64, (), ()> = Gfa::parse_gfa_file(gfa_path.to_str().unwrap());
let mut sequences_by_segment_id: HashMap<u64, Sequence> = HashMap::new();

Expand Down
36 changes: 18 additions & 18 deletions src/models/block_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::models::strand::Strand;
pub struct BlockGroup {
pub id: i32,
pub collection_name: String,
pub sample_name: Option<String>,
pub sample_name: String,
pub name: String,
}

Expand Down Expand Up @@ -88,17 +88,17 @@ impl BlockGroup {
pub fn create(
conn: &Connection,
collection_name: &str,
sample_name: Option<&str>,
sample_name: &str,
name: &str,
) -> BlockGroup {
let query = "INSERT INTO block_group (collection_name, sample_name, name) VALUES (?1, ?2, ?3) RETURNING *";
let query = "INSERT INTO block_group (collection_name, sample_name, name) VALUES (?1, ?2, ?3) RETURNING (rowid)";
let mut stmt = conn.prepare(query).unwrap();
match stmt.query_row((collection_name, sample_name, name), |row| {
Ok(BlockGroup {
id: row.get(0)?,
collection_name: row.get(1)?,
sample_name: row.get(2)?,
name: row.get(3)?,
collection_name: collection_name.to_string(),
sample_name: sample_name.to_string(),
name: name.to_string(),
})
}) {
Ok(res) => res,
Expand All @@ -108,20 +108,20 @@ impl BlockGroup {
BlockGroup {
id: conn
.query_row(
"select id from block_group where collection_name = ?1 and sample_name is null and name = ?2",
"select rowid from block_group where collection_name = ?1 and sample_name = \"\" and name = ?2",
(collection_name, name),
|row| row.get(0),
)
.unwrap(),
collection_name: collection_name.to_string(),
sample_name: sample_name.map(|s| s.to_string()),
sample_name: sample_name.to_string(),
name: name.to_string()
}
} else {
panic!("something bad happened querying the database")
}
}
Err(_) => {
Err(err) => {
panic!("something bad happened querying the database")
}
}
Expand Down Expand Up @@ -175,7 +175,7 @@ impl BlockGroup {
group_name: &str,
) -> i32 {
let mut bg_id : i32 = match conn.query_row(
"select id from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
"select rowid from block_group where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
(collection_name, sample_name, group_name),
|row| row.get(0),
) {
Expand All @@ -190,7 +190,7 @@ impl BlockGroup {
} else {
// use the base reference group if it exists
bg_id = match conn.query_row(
"select id from block_group where collection_name = ?1 AND sample_name IS null AND name = ?2",
"select rowid from block_group where collection_name = ?1 AND sample_name = \"\" AND name = ?2",
(collection_name, group_name),
|row| row.get(0),
) {
Expand All @@ -201,7 +201,7 @@ impl BlockGroup {
}
}
}
let new_bg_id = BlockGroup::create(conn, collection_name, Some(sample_name), group_name);
let new_bg_id = BlockGroup::create(conn, collection_name, sample_name, group_name);

// clone parent blocks/edges/path
BlockGroup::clone(conn, bg_id, new_bg_id.id);
Expand All @@ -223,7 +223,7 @@ impl BlockGroup {
)
} else {
conn.query_row(
"select id from block_group where collection_name = ?1 AND sample_name IS NULL AND name = ?2",
"select id from block_group where collection_name = ?1 AND sample_name = \"\" AND name = ?2",
(collection_name, group_name.clone()),
|row| row.get(0),
)
Expand Down Expand Up @@ -437,7 +437,7 @@ mod tests {
.sequence("GGGGGGGGGG")
.save(conn);
let _collection = Collection::create(conn, "test");
let block_group = BlockGroup::create(conn, "test", None, "hg19");
let block_group = BlockGroup::create(conn, "test", "", "hg19");
let edge0 = Edge::create(
conn,
Sequence::PATH_START_HASH.to_string(),
Expand Down Expand Up @@ -511,22 +511,22 @@ mod tests {
fn test_blockgroup_create() {
let conn = &get_connection(None);
Collection::create(conn, "test");
let bg1 = BlockGroup::create(conn, "test", None, "hg19");
let bg1 = BlockGroup::create(conn, "test", "", "hg19");
assert_eq!(bg1.collection_name, "test");
assert_eq!(bg1.name, "hg19");
Sample::create(conn, "sample");
let bg2 = BlockGroup::create(conn, "test", Some("sample"), "hg19");
let bg2 = BlockGroup::create(conn, "test", "sample", "hg19");
assert_eq!(bg2.collection_name, "test");
assert_eq!(bg2.name, "hg19");
assert_eq!(bg2.sample_name, Some("sample".to_string()));
assert_eq!(bg2.sample_name, "sample".to_string());
assert_ne!(bg1.id, bg2.id);
}

#[test]
fn test_blockgroup_clone() {
let conn = &get_connection(None);
Collection::create(conn, "test");
let bg1 = BlockGroup::create(conn, "test", None, "hg19");
let bg1 = BlockGroup::create(conn, "test", "", "hg19");
assert_eq!(bg1.collection_name, "test");
assert_eq!(bg1.name, "hg19");
Sample::create(conn, "sample");
Expand Down
3 changes: 2 additions & 1 deletion src/models/block_group_edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ impl BlockGroupEdge {
"INSERT OR IGNORE INTO block_group_edges (block_group_id, edge_id) VALUES {0};",
formatted_rows_to_insert
);
println!("{insert_statement:?}");
let _ = conn.execute(&insert_statement, ());
}
}

pub fn edges_for_block_group(conn: &Connection, block_group_id: i32) -> Vec<Edge> {
let query = format!(
"select * from block_group_edges where block_group_id = {};",
"select rowid as id, * from block_group_edges where block_group_id = {};",
block_group_id
);
let block_group_edges = BlockGroupEdge::query(conn, &query, vec![]);
Expand Down
29 changes: 16 additions & 13 deletions src/models/edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ impl Edge {
chromosome_index: i32,
phased: i32,
) -> Edge {
let query = "INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) RETURNING *";
let query = "INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) RETURNING (rowid)";
let id_query = "select id from edges where source_hash = ?1 and source_coordinate = ?2 and source_strand = ?3 and target_hash = ?4 and target_coordinate = ?5 and target_strand = ?6 and chromosome_index = ?7 and phased = ?8";
let placeholders: Vec<Value> = vec![
source_hash.clone().into(),
Expand All @@ -77,17 +77,20 @@ impl Edge {
match stmt.query_row(params_from_iter(&placeholders), |row| {
Ok(Edge {
id: row.get(0)?,
source_hash: row.get(1)?,
source_coordinate: row.get(2)?,
source_strand: row.get(3)?,
target_hash: row.get(4)?,
target_coordinate: row.get(5)?,
target_strand: row.get(6)?,
chromosome_index: row.get(7)?,
phased: row.get(8)?,
source_hash: source_hash.clone(),
source_coordinate,
source_strand,
target_hash: target_hash.clone(),
target_coordinate,
target_strand,
chromosome_index,
phased,
})
}) {
Ok(edge) => edge,
Ok(edge) => {
println!("made edge {edge:?}");
edge
}
Err(rusqlite::Error::SqliteFailure(err, details)) => {
if err.code == rusqlite::ErrorCode::ConstraintViolation {
println!("{err:?} {details:?}");
Expand Down Expand Up @@ -120,7 +123,7 @@ impl Edge {
.map(|edge_id| edge_id.to_string())
.collect::<Vec<_>>()
.join(",");
let query = format!("select id, source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased from edges where id in ({});", formatted_edge_ids);
let query = format!("select rowid as id, source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased from edges where rowid in ({});", formatted_edge_ids);
Edge::query(conn, &query, vec![])
}

Expand Down Expand Up @@ -170,7 +173,7 @@ impl Edge {
}
let formatted_edge_rows = edge_rows.join(", ");

let select_statement = format!("SELECT * FROM edges WHERE (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) in ({0});", formatted_edge_rows);
let select_statement = format!("SELECT rowid as id, * FROM edges WHERE (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) in ({0});", formatted_edge_rows);
let existing_edges = Edge::query(conn, &select_statement, vec![]);
let mut existing_edge_ids: Vec<i32> = existing_edges
.clone()
Expand Down Expand Up @@ -215,7 +218,7 @@ impl Edge {
for chunk in edge_rows_to_insert.chunks(100000) {
let formatted_edge_rows_to_insert = chunk.join(", ");

let insert_statement = format!("INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES {0} RETURNING (id);", formatted_edge_rows_to_insert);
let insert_statement = format!("INSERT INTO edges (source_hash, source_coordinate, source_strand, target_hash, target_coordinate, target_strand, chromosome_index, phased) VALUES {0} RETURNING (rowid);", formatted_edge_rows_to_insert);
let mut stmt = conn.prepare(&insert_statement).unwrap();
let rows = stmt.query_map([], |row| row.get(0)).unwrap();
let mut edge_ids: Vec<i32> = vec![];
Expand Down
8 changes: 4 additions & 4 deletions src/models/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pub struct NewBlock {

impl Path {
pub fn create(conn: &Connection, name: &str, block_group_id: i32, edge_ids: &[i32]) -> Path {
let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (id)";
let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (rowid)";
let mut stmt = conn.prepare(query).unwrap();
let mut rows = stmt
.query_map((name, block_group_id), |row| {
Expand Down Expand Up @@ -263,7 +263,7 @@ mod tests {
fn test_gets_sequence() {
let conn = &mut get_connection(None);
Collection::create(conn, "test collection");
let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
let block_group = BlockGroup::create(conn, "test collection", "", "test block group");
let sequence1 = Sequence::new()
.sequence_type("DNA")
.sequence("ATCGATCG")
Expand Down Expand Up @@ -349,7 +349,7 @@ mod tests {
fn test_gets_sequence_with_rc() {
let conn = &mut get_connection(None);
Collection::create(conn, "test collection");
let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
let block_group = BlockGroup::create(conn, "test collection", "", "test block group");
let sequence1 = Sequence::new()
.sequence_type("DNA")
.sequence("ATCGATCG")
Expand Down Expand Up @@ -442,7 +442,7 @@ mod tests {
fn test_intervaltree() {
let conn = &mut get_connection(None);
Collection::create(conn, "test collection");
let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
let block_group = BlockGroup::create(conn, "test collection", "", "test block group");
let sequence1 = Sequence::new()
.sequence_type("DNA")
.sequence("ATCGATCG")
Expand Down
6 changes: 3 additions & 3 deletions src/models/path_edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub struct PathEdge {
impl PathEdge {
pub fn create(conn: &Connection, path_id: i32, index_in_path: i32, edge_id: i32) -> PathEdge {
let query =
"INSERT INTO path_edges (path_id, index_in_path, edge_id) VALUES (?1, ?2, ?3) RETURNING (id)";
"INSERT INTO path_edges (path_id, index_in_path, edge_id) VALUES (?1, ?2, ?3) RETURNING (rowid)";
let mut stmt = conn.prepare(query).unwrap();
let mut rows = stmt
.query_map((path_id, index_in_path, edge_id), |row| {
Expand All @@ -32,7 +32,7 @@ impl PathEdge {
if err.code == rusqlite::ErrorCode::ConstraintViolation {
println!("{err:?} {details:?}");
let mut placeholders = vec![path_id];
let query = "SELECT id from path_edges where path_id = ?1 AND edge_id = ?2;";
let query = "SELECT rowid from path_edges where path_id = ?1 AND edge_id = ?2;";
placeholders.push(edge_id);
println!("{query} {placeholders:?}");
PathEdge {
Expand Down Expand Up @@ -75,7 +75,7 @@ impl PathEdge {
pub fn edges_for(conn: &Connection, path_id: i32) -> Vec<Edge> {
let path_edges = PathEdge::query(
conn,
"select * from path_edges where path_id = ?1 order by index_in_path ASC",
"select rowid as id, * from path_edges where path_id = ?1 order by index_in_path ASC",
vec![Value::from(path_id)],
);
let edge_ids = path_edges
Expand Down

0 comments on commit ca972d8

Please sign in to comment.