From b1a09d94ce13c2cb92a0a4118e436261ade05a45 Mon Sep 17 00:00:00 2001 From: hofer Date: Mon, 19 Aug 2024 11:25:05 -0400 Subject: [PATCH] Add new edge and path models --- migrations/01-initial/up.sql | 27 +++++++++- src/models.rs | 2 + src/models/new_edge.rs | 88 ++++++++++++++++++++++++++++++ src/models/path.rs | 19 ++++++- src/models/path_edge.rs | 102 +++++++++++++++++++++++++++++++++++ 5 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 src/models/new_edge.rs create mode 100644 src/models/path_edge.rs diff --git a/migrations/01-initial/up.sql b/migrations/01-initial/up.sql index 36d3c7d..3227709 100644 --- a/migrations/01-initial/up.sql +++ b/migrations/01-initial/up.sql @@ -79,4 +79,29 @@ CREATE TABLE change_log ( FOREIGN KEY(path_id) REFERENCES path(id), FOREIGN KEY(sequence_hash) REFERENCES sequence(hash) ); -CREATE UNIQUE INDEX change_log_uidx ON change_log(hash); \ No newline at end of file +CREATE UNIQUE INDEX change_log_uidx ON change_log(hash); + +CREATE TABLE new_edges ( + id INTEGER PRIMARY KEY NOT NULL, + source_hash TEXT, + source_coordinate INTEGER, + target_hash TEXT, + target_coordinate INTEGER, + chromosome_index INTEGER NOT NULL, + phased INTEGER NOT NULL, + FOREIGN KEY(source_hash) REFERENCES sequence(hash), + FOREIGN KEY(target_hash) REFERENCES sequence(hash), + constraint chk_phased check (phased in (0, 1)) +); +CREATE UNIQUE INDEX new_edge_uidx ON new_edges(source_hash, source_coordinate, target_hash, target_coordinate, chromosome_index, phased); + +CREATE TABLE path_edges ( + id INTEGER PRIMARY KEY NOT NULL, + path_id INTEGER NOT NULL, + source_edge_id INTEGER, + target_edge_id INTEGER, + FOREIGN KEY(source_edge_id) REFERENCES new_edges(id), + FOREIGN KEY(target_edge_id) REFERENCES new_edges(id), + FOREIGN KEY(path_id) REFERENCES path(id) +); +CREATE UNIQUE INDEX path_edges_uidx ON path_edges(path_id, source_edge_id, target_edge_id); diff --git a/src/models.rs b/src/models.rs index 2f20026..86d6294 100644 --- a/src/models.rs +++ b/src/models.rs @@ -8,7 +8,9 @@ use std::fmt::*; pub mod block; pub mod edge; +pub mod new_edge; pub mod path; +pub mod path_edge; pub mod sequence; use crate::graph::all_simple_paths; diff --git a/src/models/new_edge.rs b/src/models/new_edge.rs new file mode 100644 index 0000000..b31c16f --- /dev/null +++ b/src/models/new_edge.rs @@ -0,0 +1,88 @@ +use rusqlite::types::Value; +use rusqlite::{params_from_iter, Connection}; + +#[derive(Debug)] +pub struct NewEdge { + pub id: i32, + pub source_hash: Option, + pub source_coordinate: Option, + pub target_hash: Option, + pub target_coordinate: Option, + pub chromosome_index: i32, + pub phased: i32, +} + +impl NewEdge { + pub fn create( + conn: &Connection, + source_hash: Option, + source_coordinate: Option, + target_hash: Option, + target_coordinate: Option, + chromosome_index: i32, + phased: i32, + ) -> NewEdge { + let query; + let id_query; + let mut placeholders: Vec = vec![]; + if target_hash.is_some() && source_hash.is_some() { + query = "INSERT INTO new_edges (source_hash, source_coordinate, target_hash, target_coordinate, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6) RETURNING *"; + id_query = "select id from new_edges where source_hash = ?1 and source_coordinate = ?2 and target_hash = ?3 and target_coordinate = ?4 and chromosome_index = ?5 and phased = ?6"; + placeholders.push(source_hash.clone().unwrap().into()); + placeholders.push(source_coordinate.unwrap().into()); + placeholders.push(target_hash.clone().unwrap().into()); + placeholders.push(target_coordinate.unwrap().into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); + } else if target_hash.is_some() { + id_query = "select id from new_edges where target_hash = ?1 and target_coordinate = ?2 and source_hash is null and chromosome_index = ?3 and phased = ?4"; + query = "INSERT INTO new_edges (target_hash, target_coordinate, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4) RETURNING *"; + placeholders.push(target_hash.clone().unwrap().into()); + placeholders.push(target_coordinate.unwrap().into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); + } else { + id_query = "select id from new_edges where source_hash = ?1 and source_coordinate = ?2 and target_id is null and chromosome_index = ?3 and phased = ?4"; + query = "INSERT INTO new_edges (source_hash, source_coordinate, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4) RETURNING *"; + placeholders.push(source_hash.clone().unwrap().into()); + placeholders.push(source_coordinate.unwrap().into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); + } + let mut stmt = conn.prepare(query).unwrap(); + match stmt.query_row(params_from_iter(&placeholders), |row| { + Ok(NewEdge { + id: row.get(0)?, + source_hash: row.get(1)?, + source_coordinate: row.get(2)?, + target_hash: row.get(3)?, + target_coordinate: row.get(4)?, + chromosome_index: row.get(5)?, + phased: row.get(6)?, + }) + }) { + Ok(edge) => edge, + Err(rusqlite::Error::SqliteFailure(err, details)) => { + if err.code == rusqlite::ErrorCode::ConstraintViolation { + println!("{err:?} {details:?}"); + NewEdge { + id: conn + .query_row(id_query, params_from_iter(&placeholders), |row| row.get(0)) + .unwrap(), + source_hash, + source_coordinate, + target_hash, + target_coordinate, + chromosome_index, + phased, + } + } else { + panic!("something bad happened querying the database") + } + } + Err(_) => { + panic!("something bad happened querying the database") + } + } + } +} diff --git a/src/models/path.rs b/src/models/path.rs index d561089..f2339d9 100644 --- a/src/models/path.rs +++ b/src/models/path.rs @@ -1,5 +1,4 @@ -use crate::models::block::Block; -use crate::models::edge::Edge; +use crate::models::{block::Block, edge::Edge, path_edge::PathEdge}; use petgraph::graphmap::DiGraphMap; use petgraph::prelude::Dfs; use petgraph::Direction; @@ -47,6 +46,16 @@ pub fn revcomp(seq: &str) -> String { .unwrap() } +#[derive(Clone, Debug)] +pub struct NewBlock { + pub id: i32, + pub sequence_hash: String, + pub block_sequence: String, + pub start: i32, + pub end: i32, + pub strand: String, +} + impl Path { pub fn create(conn: &Connection, name: &str, block_group_id: i32, blocks: Vec) -> Path { let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (id)"; @@ -124,6 +133,12 @@ impl Path { } sequence } + + pub fn get_new_blocks(conn: &Connection, path_id: i32) -> Vec { + let mut new_blocks = vec![]; + let edges = PathEdge::edges_for(conn, path_id); + new_blocks + } } #[derive(Debug)] diff --git a/src/models/path_edge.rs b/src/models/path_edge.rs new file mode 100644 index 0000000..06da710 --- /dev/null +++ b/src/models/path_edge.rs @@ -0,0 +1,102 @@ +use crate::models::new_edge::NewEdge; +use rusqlite::types::Value; +use rusqlite::{params_from_iter, Connection}; + +#[derive(Debug)] +pub struct PathEdge { + pub id: i32, + pub path_id: i32, + pub source_edge_id: Option, + pub target_edge_id: Option, +} + +impl PathEdge { + pub fn create( + conn: &Connection, + path_id: i32, + source_edge_id: Option, + target_edge_id: Option, + ) -> PathEdge { + let query = + "INSERT INTO path_edges (path_id, source_edge_id, target_edge_id) VALUES (?1, ?2, ?3) RETURNING (id)"; + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map((path_id, source_edge_id, target_edge_id), |row| { + Ok(PathEdge { + id: row.get(0)?, + path_id, + source_edge_id, + target_edge_id, + }) + }) + .unwrap(); + match rows.next().unwrap() { + Ok(res) => res, + Err(rusqlite::Error::SqliteFailure(err, details)) => { + if err.code == rusqlite::ErrorCode::ConstraintViolation { + println!("{err:?} {details:?}"); + let query; + let mut placeholders = vec![path_id]; + if let Some(s) = source_edge_id { + if let Some(t) = target_edge_id { + query = "SELECT id from path_edges where path_id = ?1 AND source_edge_id = ?2 AND target_edge_id = ?3;"; + placeholders.push(s); + placeholders.push(t); + } else { + query = "SELECT id from path_edges where path_id = ?1 AND source_edge_id = ?2 AND target_edge_id is null;"; + placeholders.push(s); + } + } else if let Some(t) = target_edge_id { + query = "SELECT id from path_edges where path_id = ?1 AND source_edge_id is null AND target_edge_id = ?2;"; + placeholders.push(t); + } else { + panic!("No edge ids passed"); + } + println!("{query} {placeholders:?}"); + PathEdge { + id: conn + .query_row(query, params_from_iter(&placeholders), |row| row.get(0)) + .unwrap(), + path_id, + source_edge_id, + target_edge_id, + } + } else { + panic!("something bad happened querying the database") + } + } + Err(_) => { + panic!("something bad happened querying the database") + } + } + } + + pub fn query(conn: &Connection, query: &str, placeholders: Vec) -> Vec { + let mut stmt = conn.prepare(query).unwrap(); + let rows = stmt + .query_map(params_from_iter(placeholders), |row| { + Ok(PathEdge { + id: row.get(0)?, + path_id: row.get(1)?, + source_edge_id: row.get(2)?, + target_edge_id: row.get(3)?, + }) + }) + .unwrap(); + let mut objs = vec![]; + for row in rows { + objs.push(row.unwrap()); + } + objs + } + + pub fn edges_for(conn: &Connection, path_id: i32) -> Vec { + let edges = vec![]; + let path_edges = PathEdge::query( + conn, + "select * from path_edges where path_id = ?1", + vec![Value::from(path_id)], + ); + edges + } +}