From 8fe51041f9acb42144e3aef694e1b1164deae396 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Thu, 19 Dec 2024 10:15:10 -0800 Subject: [PATCH] limit usage of serde_json across the codebase --- src/core/benches/minhash.rs | 4 +-- src/core/src/collection.rs | 16 +++++----- src/core/src/ffi/signature.rs | 8 +++-- src/core/src/signature.rs | 32 +++++++++++++------- src/core/src/sketch/minhash.rs | 43 +++++++++++++++++++++++++++ src/core/src/wasm.rs | 11 ++++--- src/core/tests/minhash.rs | 53 +++++++++++++++++----------------- src/core/tests/storage.rs | 2 +- 8 files changed, 114 insertions(+), 55 deletions(-) diff --git a/src/core/benches/minhash.rs b/src/core/benches/minhash.rs index e495185bdd..3452db60f7 100644 --- a/src/core/benches/minhash.rs +++ b/src/core/benches/minhash.rs @@ -13,7 +13,7 @@ fn intersection(c: &mut Criterion) { filename.push("../../tests/test-data/gather-abund/genome-s10.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let mut sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let mut sigs = Signature::from_reader(reader).expect("Loading error"); let mh = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] { mh.clone() } else { @@ -24,7 +24,7 @@ fn intersection(c: &mut Criterion) { filename.push("../../tests/test-data/gather-abund/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let mut sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let mut sigs = Signature::from_reader(reader).expect("Loading error"); let mh2 = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] { mh.clone() } else { diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index baf8268e97..9716265588 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -267,7 +267,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(2000); @@ -293,7 +293,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(500); @@ -314,7 +314,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); // create Selection object let mut selection = Selection::default(); @@ -336,7 +336,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sigs_copy = sigs.clone(); assert_eq!(sigs.len(), 4); // create Selection object @@ -366,7 +366,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 6); // create Selection object let mut selection = Selection::default(); @@ -388,7 +388,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); // load sigs into collection + select compatible signatures let mut cl = Collection::from_sigs(sigs).unwrap(); @@ -413,7 +413,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(2000); @@ -480,7 +480,7 @@ mod test { .push("../../tests/test-data/prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_moltype(HashFunctions::Murmur64Hp); diff --git a/src/core/src/ffi/signature.rs b/src/core/src/ffi/signature.rs index 06a0bd9fe5..a8075a1253 100644 --- a/src/core/src/ffi/signature.rs +++ b/src/core/src/ffi/signature.rs @@ -13,6 +13,7 @@ use crate::sketch::Sketch; use crate::ffi::cmd::compute::SourmashComputeParameters; use crate::ffi::minhash::SourmashKmerMinHash; use crate::ffi::utils::{ForeignObject, SourmashStr}; +use crate::prelude::ToWriter; pub struct SourmashSignature; @@ -193,8 +194,9 @@ unsafe fn signature_eq(ptr: *const SourmashSignature, other: *const SourmashSign ffi_fn! { unsafe fn signature_save_json(ptr: *const SourmashSignature) -> Result { let sig = SourmashSignature::as_rust(ptr); - let st = serde_json::to_string(sig)?; - Ok(SourmashStr::from_string(st)) + let mut st: Vec = vec![]; + sig.to_writer(&mut st)?; + Ok(SourmashStr::from_string(String::from_utf8_unchecked(st))) } } @@ -248,7 +250,7 @@ unsafe fn signatures_save_buffer(ptr: *const *const SourmashSignature, size: usi } else { Box::new(&mut buffer) }; - serde_json::to_writer(&mut writer, &rsigs)?; + rsigs.to_writer(&mut writer)?; } let b = buffer.into_boxed_slice(); diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 5a0d39f61f..a3971a8637 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -792,6 +792,16 @@ impl ToWriter for Signature { } } +impl ToWriter for Vec<&Signature> { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl Select for Signature { fn select(mut self, selection: &Selection) -> Result { self.signatures.retain(|s| { @@ -949,7 +959,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); @@ -1072,7 +1082,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1088,7 +1098,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1112,7 +1122,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1137,7 +1147,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1161,7 +1171,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1187,7 +1197,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1207,7 +1217,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1227,7 +1237,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1248,7 +1258,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1266,7 +1276,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs index 438294e098..8da28ca948 100644 --- a/src/core/src/sketch/minhash.rs +++ b/src/core/src/sketch/minhash.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet}; use std::f64::consts::PI; use std::fmt::Write; +use std::io; use std::iter::Peekable; use std::str; use std::sync::Mutex; @@ -13,6 +14,7 @@ use serde::{Deserialize, Serialize}; use typed_builder::TypedBuilder; use crate::encodings::HashFunctions; +use crate::prelude::ToWriter; use crate::signature::SigsTrait; use crate::sketch::hyperloglog::HyperLogLog; use crate::Error; @@ -183,6 +185,16 @@ impl<'de> Deserialize<'de> for KmerMinHash { } } +impl ToWriter for KmerMinHash { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl KmerMinHash { pub fn new( scaled: ScaledType, @@ -856,6 +868,17 @@ impl KmerMinHash { Ok((abundances, total_abundance)) } + + pub fn from_reader(rdr: R) -> Result + where + R: std::io::Read, + { + let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; + + let mh: KmerMinHash = serde_json::from_reader(rdr)?; + Ok(mh) + } + } impl SigsTrait for KmerMinHash { @@ -1113,6 +1136,16 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree { } } +impl ToWriter for KmerMinHashBTree { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl KmerMinHashBTree { pub fn new( scaled: ScaledType, @@ -1594,6 +1627,16 @@ impl KmerMinHashBTree { self.size() as u64 } } + + pub fn from_reader(rdr: R) -> Result + where + R: std::io::Read, + { + let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; + + let mh: KmerMinHashBTree = serde_json::from_reader(rdr)?; + Ok(mh) + } } impl SigsTrait for KmerMinHashBTree { diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs index c10eda4e6e..a2b15c70e1 100644 --- a/src/core/src/wasm.rs +++ b/src/core/src/wasm.rs @@ -9,6 +9,7 @@ use wasm_bindgen::prelude::*; use crate::cmd::ComputeParameters as _ComputeParameters; use crate::encodings::HashFunctions; +use crate::prelude::ToWriter; use crate::signature::Signature as _Signature; use crate::signature::SigsTrait; use crate::sketch::minhash::KmerMinHash as _KmerMinHash; @@ -66,8 +67,9 @@ impl KmerMinHash { #[wasm_bindgen] pub fn to_json(&mut self) -> Result { - let json = serde_json::to_string(&self.0)?; - Ok(json) + let mut st: Vec = vec![]; + self.0.to_writer(&mut st)?; + Ok(unsafe { String::from_utf8_unchecked(st) }) } } @@ -160,8 +162,9 @@ impl Signature { #[wasm_bindgen] pub fn to_json(&mut self) -> Result { - let json = serde_json::to_string(&self.0)?; - Ok(json) + let mut st: Vec = vec![]; + self.0.to_writer(&mut st)?; + Ok(unsafe { String::from_utf8_unchecked(st) }) } pub fn size(&self) -> usize { diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs index bdbba0cc20..a5a9526d49 100644 --- a/src/core/tests/minhash.rs +++ b/src/core/tests/minhash.rs @@ -13,6 +13,7 @@ use sourmash::sketch::minhash::{ }; use sourmash::sketch::Sketch; use sourmash::ScaledType; +use sourmash::prelude::ToWriter; // TODO: use f64::EPSILON when we bump MSRV const EPSILON: f64 = 0.01; @@ -385,7 +386,7 @@ fn load_save_minhash_sketches() { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sig = sigs.get(0).unwrap(); let sketches = sig.sketches(); @@ -394,11 +395,11 @@ fn load_save_minhash_sketches() { if let Sketch::MinHash(mh) = &sketches[0] { let bmh: KmerMinHashBTree = mh.clone().into(); { - serde_json::to_writer(&mut buffer, &bmh).unwrap(); + bmh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -432,11 +433,11 @@ fn load_save_minhash_sketches() { buffer.clear(); let imh: KmerMinHash = bmh.clone().into(); { - serde_json::to_writer(&mut buffer, &imh).unwrap(); + imh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -486,7 +487,7 @@ fn load_save_minhash_sketches_abund() { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sig = sigs.get(0).unwrap(); let sketches = sig.sketches(); @@ -495,11 +496,11 @@ fn load_save_minhash_sketches_abund() { if let Sketch::MinHash(mh) = &sketches[0] { let bmh: KmerMinHashBTree = mh.clone().into(); { - serde_json::to_writer(&mut buffer, &bmh).unwrap(); + bmh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -543,11 +544,11 @@ fn load_save_minhash_sketches_abund() { buffer.clear(); let imh: KmerMinHash = bmh.clone().into(); { - serde_json::to_writer(&mut buffer, &imh).unwrap(); + imh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -673,14 +674,14 @@ fn load_save_minhash_dayhoff(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); @@ -701,14 +702,14 @@ fn load_save_minhash_hp(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); @@ -729,14 +730,14 @@ fn load_save_minhash_dna(seq in "ACGTN{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index 68a04ccc74..985c1eb12a 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -69,7 +69,7 @@ fn zipstorage_parallel_access() -> Result<(), Box> { .par_iter() .map(|path| { let data = zs.load(path).unwrap(); - let sigs: Vec = serde_json::from_reader(&data[..]).expect("Loading error"); + let sigs = Signature::from_reader(&data[..]).expect("Loading error"); sigs.iter() .map(|v| v.sketches().iter().map(|mh| mh.size()).sum::()) .sum::()