Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Path example #8

Merged
merged 9 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,722 changes: 0 additions & 1,722 deletions Cargo.lock

This file was deleted.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ rusqlite = { version = "0.31.0", features = ["bundled", "array"] }
rusqlite_migration = { version = "1.2.0" , features = ["from-directory"]}
sha2 = "0.10.8"
noodles = { version = "0.78.0", features = ["vcf", "fasta", "async"] }
petgraph = "0.6.5"
28 changes: 20 additions & 8 deletions migrations/01-initial/up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@ CREATE TABLE sequence (
"length" INTEGER NOT NULL
);

CREATE TABLE path (
id INTEGER PRIMARY KEY NOT NULL,
name TEXT NOT NULL,
path_index INTEGER NOT NULL DEFAULT 0
);

CREATE TABLE block_group (
id INTEGER PRIMARY KEY NOT NULL,
collection_name TEXT NOT NULL,
Expand All @@ -44,13 +38,31 @@ CREATE UNIQUE INDEX block_uidx ON block(sequence_hash, block_group_id, start, en

CREATE TABLE edges (
id INTEGER PRIMARY KEY NOT NULL,
source_id INTEGER NOT NULL,
source_id INTEGER,
target_id INTEGER,
chromosome_index INTEGER NOT NULL,
phased INTEGER NOT NULL,
FOREIGN KEY(source_id) REFERENCES block(id),
FOREIGN KEY(target_id) REFERENCES block(id),
constraint chk_phased check (phased in (0, 1))
);

CREATE UNIQUE INDEX edge_uidx ON edges(source_id, target_id, chromosome_index, phased);

CREATE TABLE path (
id INTEGER PRIMARY KEY NOT NULL,
block_group_id INTEGER NOT NULL,
name TEXT NOT NULL,
FOREIGN KEY(block_group_id) REFERENCES block_group(id)
);
CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name);

CREATE TABLE path_edges (
id INTEGER PRIMARY KEY NOT NULL,
path_id INTEGER NOT NULL,
source_edge_id INTEGER,
target_edge_id INTEGER,
FOREIGN KEY(source_edge_id) REFERENCES edges(id),
FOREIGN KEY(target_edge_id) REFERENCES edges(id),
FOREIGN KEY(path_id) REFERENCES path(id)
);
CREATE UNIQUE INDEX path_edge_uidx ON path_edges(path_id, source_edge_id, target_edge_id);
38 changes: 29 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ use std::path::PathBuf;
use bio::io::fasta;
use gen::get_connection;
use gen::migrations::run_migrations;
use gen::models::{self, block::Block, edge::Edge, sequence::Sequence, BlockGroup};
use gen::models::{self, block::Block, edge::Edge, path::Path, sequence::Sequence, BlockGroup};
use noodles::vcf;
use noodles::vcf::variant::record::samples::series::value::genotype::Phasing;
use noodles::vcf::variant::record::samples::series::Value;
use noodles::vcf::variant::record::samples::{Sample, Series};
use noodles::vcf::variant::record::{AlternateBases, ReferenceBases, Samples};
use noodles::vcf::variant::Record;
use rusqlite::Connection;
use rusqlite::{types::Value as SQLValue, Connection};
use std::io;

#[derive(Parser)]
Expand Down Expand Up @@ -80,7 +80,14 @@ fn import_fasta(fasta: &String, name: &String, shallow: bool, conn: &mut Connect
(sequence.len() as i32),
&"1".to_string(),
);
let edge = Edge::create(conn, block.id, None, 0, 0);
let edge_1 = Edge::create(conn, None, Some(block.id), 0, 0);
let edge_2 = Edge::create(conn, Some(block.id), None, 0, 0);
Path::create(
conn,
record.id(),
block_group.id,
vec![edge_1.id, edge_2.id],
);
}
println!("Created it");
} else {
Expand Down Expand Up @@ -108,10 +115,8 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne
let ref_end = record.variant_end(&header).unwrap().get();
let alt_bases = record.alternate_bases();
let alt_alleles: Vec<_> = alt_bases.iter().collect::<io::Result<_>>().unwrap();
let mut created: HashSet<i32> = HashSet::new();
for (sample_index, sample) in record.samples().iter().enumerate() {
let genotype = sample.get(&header, "GT");
let mut allele_blocks: HashMap<i32, i32> = HashMap::new();
if genotype.is_some() {
if let Value::Genotype(genotypes) = genotype.unwrap().unwrap().unwrap() {
for (chromosome_index, gt) in genotypes.iter().enumerate() {
Expand All @@ -137,6 +142,14 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne
&sample_names[sample_index],
&seq_name,
);
let sample_path_id = Path::get_paths(
conn,
"select * from path where block_group_id = ?1 AND name = ?2",
vec![
SQLValue::from(sample_bg_id),
SQLValue::from(seq_name.clone()),
],
);
let new_block_id = Block::create(
conn,
&new_sequence_hash,
Expand All @@ -145,10 +158,9 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne
alt_seq.len() as i32,
&"1".to_string(),
);
println!("{sample_bg_id} {new_block_id:?} {chromosome_index} {phased} {allele}");
BlockGroup::insert_change(
conn,
sample_bg_id,
sample_path_id[0].id,
ref_start as i32,
ref_end as i32,
new_block_id.id,
Expand Down Expand Up @@ -227,8 +239,16 @@ mod tests {
);
update_with_vcf(&vcf_path.to_str().unwrap().to_string(), &collection, conn);
assert_eq!(
BlockGroup::sequence(conn, &collection, Some(&"foo".to_string()), "m123"),
"ATCATCGATCGATCGATCGGGAACACACAGAGA"
BlockGroup::get_all_sequences(conn, 1),
HashSet::from_iter(vec!["ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string()])
);
assert_eq!(
BlockGroup::get_all_sequences(conn, 2),
HashSet::from_iter(vec!["ATCATCGATAGAGATCGATCGGGAACACACAGAGA".to_string()])
);
assert_eq!(
BlockGroup::get_all_sequences(conn, 3),
HashSet::from_iter(vec!["ATCATCGATCGATCGATCGGGAACACACAGAGA".to_string()])
);
}
}
Loading
Loading