Skip to content

Commit

Permalink
Move library functionality out of main
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris7 committed Sep 6, 2024
1 parent a400fa8 commit d14e3c0
Show file tree
Hide file tree
Showing 6 changed files with 496 additions and 455 deletions.
1 change: 1 addition & 0 deletions src/imports.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod fasta;
107 changes: 107 additions & 0 deletions src/imports/fasta.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use std::collections::HashSet;
use std::fmt::Debug;
use std::path::PathBuf;
use std::str;

use crate::models::{
self, block_group::BlockGroup, block_group_edge::BlockGroupEdge, edge::Edge, path::Path,
sequence::Sequence, strand::Strand,
};
use noodles::fasta;
use noodles::vcf::variant::record::samples::{Sample, Series};
use noodles::vcf::variant::record::{AlternateBases, ReferenceBases, Samples};
use noodles::vcf::variant::Record;
use rusqlite::Connection;

pub fn import_fasta(fasta: &String, name: &str, shallow: bool, conn: &mut Connection) {
// TODO: support gz
let mut reader = fasta::io::reader::Builder.build_from_path(fasta).unwrap();

if !models::Collection::exists(conn, name) {
let collection = models::Collection::create(conn, name);

for result in reader.records() {
let record = result.expect("Error during fasta record parsing");
let sequence = str::from_utf8(record.sequence().as_ref())
.unwrap()
.to_string();
let name = String::from_utf8(record.name().to_vec()).unwrap();
let sequence_length = record.sequence().len() as i32;
let seq = if shallow {
Sequence::new()
.sequence_type("DNA")
.name(&name)
.file_path(fasta)
.save(conn)
} else {
Sequence::new()
.sequence_type("DNA")
.sequence(&sequence)
.save(conn)
};
let block_group = BlockGroup::create(conn, &collection.name, None, &name);
let edge_into = Edge::create(
conn,
Edge::PATH_START_HASH.to_string(),
0,
Strand::Forward,
seq.hash.to_string(),
0,
Strand::Forward,
0,
0,
);
let edge_out_of = Edge::create(
conn,
seq.hash.to_string(),
sequence_length,
Strand::Forward,
Edge::PATH_END_HASH.to_string(),
0,
Strand::Forward,
0,
0,
);
BlockGroupEdge::bulk_create(conn, block_group.id, vec![edge_into.id, edge_out_of.id]);
Path::create(
conn,
&name,
block_group.id,
vec![edge_into.id, edge_out_of.id],
);
}
println!("Created it");
} else {
println!("Collection {:1} already exists", name);
}
}

#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
use super::*;
use crate::test_helpers::get_connection;

#[test]
fn test_add_fasta() {
let mut fasta_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
fasta_path.push("fixtures/simple.fa");
let mut conn = get_connection(None);
import_fasta(
&fasta_path.to_str().unwrap().to_string(),
"test",
false,
&mut conn,
);
assert_eq!(
BlockGroup::get_all_sequences(&conn, 1),
HashSet::from_iter(vec!["ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string()])
);

let path = Path::get(&conn, 1);
assert_eq!(
Path::sequence(&conn, path),
"ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string()
);
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use std::str;

pub mod graph;
pub mod imports;
pub mod migrations;
pub mod models;
pub mod test_helpers;
pub mod updates;

use crate::migrations::run_migrations;
use noodles::vcf::variant::record::samples::series::value::genotype::Phasing;
Expand Down
Loading

0 comments on commit d14e3c0

Please sign in to comment.