-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move library functionality out of main
- Loading branch information
Showing
6 changed files
with
496 additions
and
455 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pub mod fasta; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
use std::collections::HashSet; | ||
use std::fmt::Debug; | ||
use std::path::PathBuf; | ||
use std::str; | ||
|
||
use crate::models::{ | ||
self, block_group::BlockGroup, block_group_edge::BlockGroupEdge, edge::Edge, path::Path, | ||
sequence::Sequence, strand::Strand, | ||
}; | ||
use noodles::fasta; | ||
use noodles::vcf::variant::record::samples::{Sample, Series}; | ||
use noodles::vcf::variant::record::{AlternateBases, ReferenceBases, Samples}; | ||
use noodles::vcf::variant::Record; | ||
use rusqlite::Connection; | ||
|
||
pub fn import_fasta(fasta: &String, name: &str, shallow: bool, conn: &mut Connection) { | ||
// TODO: support gz | ||
let mut reader = fasta::io::reader::Builder.build_from_path(fasta).unwrap(); | ||
|
||
if !models::Collection::exists(conn, name) { | ||
let collection = models::Collection::create(conn, name); | ||
|
||
for result in reader.records() { | ||
let record = result.expect("Error during fasta record parsing"); | ||
let sequence = str::from_utf8(record.sequence().as_ref()) | ||
.unwrap() | ||
.to_string(); | ||
let name = String::from_utf8(record.name().to_vec()).unwrap(); | ||
let sequence_length = record.sequence().len() as i32; | ||
let seq = if shallow { | ||
Sequence::new() | ||
.sequence_type("DNA") | ||
.name(&name) | ||
.file_path(fasta) | ||
.save(conn) | ||
} else { | ||
Sequence::new() | ||
.sequence_type("DNA") | ||
.sequence(&sequence) | ||
.save(conn) | ||
}; | ||
let block_group = BlockGroup::create(conn, &collection.name, None, &name); | ||
let edge_into = Edge::create( | ||
conn, | ||
Edge::PATH_START_HASH.to_string(), | ||
0, | ||
Strand::Forward, | ||
seq.hash.to_string(), | ||
0, | ||
Strand::Forward, | ||
0, | ||
0, | ||
); | ||
let edge_out_of = Edge::create( | ||
conn, | ||
seq.hash.to_string(), | ||
sequence_length, | ||
Strand::Forward, | ||
Edge::PATH_END_HASH.to_string(), | ||
0, | ||
Strand::Forward, | ||
0, | ||
0, | ||
); | ||
BlockGroupEdge::bulk_create(conn, block_group.id, vec![edge_into.id, edge_out_of.id]); | ||
Path::create( | ||
conn, | ||
&name, | ||
block_group.id, | ||
vec![edge_into.id, edge_out_of.id], | ||
); | ||
} | ||
println!("Created it"); | ||
} else { | ||
println!("Collection {:1} already exists", name); | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
// Note this useful idiom: importing names from outer (for mod tests) scope. | ||
use super::*; | ||
use crate::test_helpers::get_connection; | ||
|
||
#[test] | ||
fn test_add_fasta() { | ||
let mut fasta_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); | ||
fasta_path.push("fixtures/simple.fa"); | ||
let mut conn = get_connection(None); | ||
import_fasta( | ||
&fasta_path.to_str().unwrap().to_string(), | ||
"test", | ||
false, | ||
&mut conn, | ||
); | ||
assert_eq!( | ||
BlockGroup::get_all_sequences(&conn, 1), | ||
HashSet::from_iter(vec!["ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string()]) | ||
); | ||
|
||
let path = Path::get(&conn, 1); | ||
assert_eq!( | ||
Path::sequence(&conn, path), | ||
"ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string() | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.