From eb471c829003a56f0c2c03acf895e5501268d615 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Thu, 19 Dec 2024 10:30:56 -0800 Subject: [PATCH] use simd_json instead of serde_json --- Cargo.lock | 120 ++++++++++++++++++++++++++++++++- src/core/Cargo.toml | 2 +- src/core/src/errors.rs | 2 +- src/core/src/signature.rs | 6 +- src/core/src/sketch/minhash.rs | 8 +-- src/core/src/wasm.rs | 2 +- src/core/tests/minhash.rs | 2 +- src/core/tests/storage.rs | 4 +- 8 files changed, 130 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ab13a4631..4a388f5fd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,12 +19,30 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aliasable" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -627,6 +645,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + [[package]] name = "funty" version = "2.0.0" @@ -670,13 +697,33 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "halfbrown" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8588661a8607108a5ca69cab034063441a0413a0b041c13618a7dd348021ef6f" +dependencies = [ + "hashbrown 0.14.5", + "serde", +] + [[package]] name = "hashbrown" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" dependencies = [ - "ahash", + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", ] [[package]] @@ -1386,6 +1433,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "ref-cast" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "regex" version = "1.5.6" @@ -1425,7 +1492,7 @@ dependencies = [ "bitvec", "bytecheck", "bytes", - "hashbrown", + "hashbrown 0.12.1", "ptr_meta", "rend", "rkyv_derive", @@ -1609,6 +1676,21 @@ dependencies = [ "wide", ] +[[package]] +name = "simd-json" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2bcf6c6e164e81bc7a5d49fc6988b3d515d9e8c07457d7b74ffb9324b9cd40" +dependencies = [ + "getrandom", + "halfbrown", + "ref-cast", + "serde", + "serde_json", + "simdutf8", + "value-trait", +] + [[package]] name = "simdutf8" version = "0.1.4" @@ -1668,7 +1750,7 @@ dependencies = [ "rocksdb", "roots", "serde", - "serde_json", + "simd-json", "statrs", "streaming-stats", "tempfile", @@ -1884,6 +1966,18 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +[[package]] +name = "value-trait" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9170e001f458781e92711d2ad666110f153e4e50bfd5cbd02db6547625714187" +dependencies = [ + "float-cmp", + "halfbrown", + "itoa", + "ryu", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -2243,6 +2337,26 @@ version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1367295b8f788d371ce2dbc842c7b709c73ee1364d30351dd300ec2203b12377" +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd-sys" version = "2.0.7+zstd.1.5.4" diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 221fd240a5..88a7fb9bc0 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -55,8 +55,8 @@ rayon = { version = "1.10.0", optional = true } rkyv = { version = "0.7.44", optional = true } roaring = "0.10.9" roots = "0.0.8" +simd-json = "0.14.3" serde = { version = "1.0.216", features = ["derive"] } -serde_json = "1.0.133" statrs = "0.18.0" streaming-stats = "0.2.3" thiserror = "2.0" diff --git a/src/core/src/errors.rs b/src/core/src/errors.rs index e8ce3e68aa..c5ba2d5b4b 100644 --- a/src/core/src/errors.rs +++ b/src/core/src/errors.rs @@ -68,7 +68,7 @@ pub enum SourmashError { StorageError(#[from] crate::storage::StorageError), #[error(transparent)] - SerdeError(#[from] serde_json::error::Error), + SerdeError(#[from] simd_json::Error), #[error(transparent)] NifflerError(#[from] niffler::Error), diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index a3971a8637..c1c0859d2b 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -576,7 +576,7 @@ impl Signature { { let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; - let sigs: Vec = serde_json::from_reader(rdr)?; + let sigs: Vec = simd_json::from_reader(rdr)?; Ok(sigs) } @@ -787,7 +787,7 @@ impl ToWriter for Signature { where W: io::Write, { - serde_json::to_writer(writer, &vec![&self])?; + simd_json::to_writer(writer, &vec![&self])?; Ok(()) } } @@ -797,7 +797,7 @@ impl ToWriter for Vec<&Signature> { where W: io::Write, { - serde_json::to_writer(writer, &self)?; + simd_json::to_writer(writer, &self)?; Ok(()) } } diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs index f8db721465..91185389cc 100644 --- a/src/core/src/sketch/minhash.rs +++ b/src/core/src/sketch/minhash.rs @@ -190,7 +190,7 @@ impl ToWriter for KmerMinHash { where W: io::Write, { - serde_json::to_writer(writer, &self)?; + simd_json::to_writer(writer, &self)?; Ok(()) } } @@ -875,7 +875,7 @@ impl KmerMinHash { { let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; - let mh: KmerMinHash = serde_json::from_reader(rdr)?; + let mh: KmerMinHash = simd_json::from_reader(rdr)?; Ok(mh) } } @@ -1140,7 +1140,7 @@ impl ToWriter for KmerMinHashBTree { where W: io::Write, { - serde_json::to_writer(writer, &self)?; + simd_json::to_writer(writer, &self)?; Ok(()) } } @@ -1633,7 +1633,7 @@ impl KmerMinHashBTree { { let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; - let mh: KmerMinHashBTree = serde_json::from_reader(rdr)?; + let mh: KmerMinHashBTree = simd_json::from_reader(rdr)?; Ok(mh) } } diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs index a2b15c70e1..f4a572c82f 100644 --- a/src/core/src/wasm.rs +++ b/src/core/src/wasm.rs @@ -178,7 +178,7 @@ pub enum JsErrors { SourmashError(#[from] crate::Error), #[error(transparent)] - SerdeError(#[from] serde_json::error::Error), + SerdeError(#[from] simd_json::Error), #[error(transparent)] NifflerError(#[from] niffler::Error), diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs index ef3d720d85..df323bd793 100644 --- a/src/core/tests/minhash.rs +++ b/src/core/tests/minhash.rs @@ -7,6 +7,7 @@ use proptest::num::u64; use proptest::proptest; use sourmash::encodings::HashFunctions; use sourmash::prelude::ToWriter; +use sourmash::prelude::ToWriter; use sourmash::signature::SeqToHashes; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::{ @@ -14,7 +15,6 @@ use sourmash::sketch::minhash::{ }; use sourmash::sketch::Sketch; use sourmash::ScaledType; -use sourmash::prelude::ToWriter; // TODO: use f64::EPSILON when we bump MSRV const EPSILON: f64 = 0.01; diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index 985c1eb12a..ab0e4a336e 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -12,9 +12,9 @@ fn zipstorage_load_file() -> Result<(), Box> { let zs = ZipStorage::from_file(filename.to_str().unwrap())?; - let data = zs.load("v6.sbt.json")?; + let mut data = zs.load("v6.sbt.json")?; - let description: serde_json::Value = serde_json::from_slice(&data[..])?; + let description: simd_json::borrowed::Value = simd_json::from_slice(&mut data[..])?; assert_eq!(description["version"], 6); Ok(())