diff --git a/src/main.rs b/src/main.rs index 1d901b2..52da6ab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -118,6 +118,7 @@ fn update_with_vcf( if !fixed_genotype.is_empty() { genotype = parse_genotype(&fixed_genotype); } + for result in reader.records() { let record = result.unwrap(); let seq_name = record.reference_sequence_name().to_string(); @@ -127,6 +128,7 @@ fn update_with_vcf( let ref_end = record.variant_end(&header).unwrap().get(); let alt_bases = record.alternate_bases(); let alt_alleles: Vec<_> = alt_bases.iter().collect::>().unwrap(); + // TODO: fix this duplication of handling an insert if !fixed_sample.is_empty() && !genotype.is_empty() { for (chromosome_index, genotype) in genotype.iter().enumerate() { if let Some(gt) = genotype { diff --git a/src/models/path.rs b/src/models/path.rs index 3ca9a00..19d9c2a 100644 --- a/src/models/path.rs +++ b/src/models/path.rs @@ -22,11 +22,25 @@ pub fn revcomp(seq: &str) -> String { seq.chars() .rev() .map(|c| -> u8 { + let is_upper = c.is_ascii_uppercase(); let rc = c as u8; - if rc & 2 != 0 { + let v = if rc == 78 { + // N + rc + } else if rc == 110 { + // n + rc + } else if rc & 2 != 0 { + // CG rc ^ 4 } else { + // AT rc ^ 21 + }; + if is_upper { + v + } else { + v.to_ascii_lowercase() } }) .collect(), @@ -324,4 +338,11 @@ mod tests { ); assert_eq!(Path::sequence(conn, path.id), "GGGGGGGTTTTTTTCGATCGAT"); } + + #[test] + fn test_reverse_complement() { + assert_eq!(revcomp("ATCCGG"), "CCGGAT"); + assert_eq!(revcomp("CNNNNA"), "TNNNNG"); + assert_eq!(revcomp("cNNgnAt"), "aTncNNg"); + } }