diff --git a/Cargo.toml b/Cargo.toml index 6c5790c..a822e46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ all-features = true [features] default = [] -disktree = [ +hexdb = [ "byteorder", "memmap", "serde", diff --git a/benches/benches.rs b/benches/benches.rs index d5e4215..625cb6c 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -48,13 +48,13 @@ fn set_lookup(c: &mut Criterion) { } } -#[cfg(not(feature = "disktree"))] +#[cfg(not(feature = "hexdb"))] fn disk_set_lookup(_c: &mut Criterion) {} -#[cfg(feature = "disktree")] +#[cfg(feature = "hexdb")] fn disk_set_lookup(c: &mut Criterion) { - use hextree::disktree::DiskTreeMap; - let mut group = c.benchmark_group("US915 DiskTreeSet lookup"); + use hextree::hexdb::HexDb; + let mut group = c.benchmark_group("US915 HexDbSet lookup"); let us915_disk_set = { let us915_set: HexTreeSet = PLAIN_US915_INDICES @@ -63,9 +63,9 @@ fn disk_set_lookup(c: &mut Criterion) { .collect(); let mut file = tempfile::tempfile().unwrap(); us915_set - .to_disktree(&mut file, |_, _| Ok::<(), std::io::Error>(())) + .to_hexdb(&mut file, |_, _| Ok::<(), std::io::Error>(())) .unwrap(); - DiskTreeMap::memmap(&file).unwrap() + HexDb::memmap(&file).unwrap() }; let tarpon_springs = coord! {x: -82.753822, y: 28.15215}; diff --git a/src/error.rs b/src/error.rs index ba8110a..9b807b3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,31 +9,31 @@ pub enum Error { Index(u64), /// An io error. - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Io(std::io::Error), - /// Not a disktree. - #[cfg(feature = "disktree")] - NotDisktree, + /// Not a hexdb. + #[cfg(feature = "hexdb")] + NotHexDb, /// Unsupported version. - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Version(u8), - /// Invalid value tag found in disktree. - #[cfg(feature = "disktree")] + /// Invalid value tag found in hexdb. + #[cfg(feature = "hexdb")] InvalidTag(u8, u64), - /// Invalid value size bytes found in disktree header. - #[cfg(feature = "disktree")] + /// Invalid value size bytes found in hexdb header. + #[cfg(feature = "hexdb")] Varint(u32), /// User-provided serializer failed. - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Writer(Box), } -#[cfg(feature = "disktree")] +#[cfg(feature = "hexdb")] impl std::convert::From for Error { fn from(other: std::io::Error) -> Self { Error::Io(other) @@ -45,22 +45,22 @@ impl std::error::Error for Error { match self { Error::Index(_) => None, - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Io(inner) => inner.source(), - #[cfg(feature = "disktree")] - Error::NotDisktree => None, + #[cfg(feature = "hexdb")] + Error::NotHexDb => None, - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Version(_) => None, - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::InvalidTag(_, _) => None, - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Varint(_) => None, - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Writer(inner) => inner.source(), } } @@ -71,30 +71,30 @@ impl std::fmt::Display for Error { match self { Error::Index(bits) => write!(f, "raw u64 is not a valid H3 index: {bits}"), - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Io(io_error) => io_error.fmt(f), - #[cfg(feature = "disktree")] - Error::NotDisktree => { + #[cfg(feature = "hexdb")] + Error::NotHexDb => { write!(f, "file missing magic header") } - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Version(version) => { write!(f, "unsupported version, got {version}") } - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::InvalidTag(tag, pos) => { write!(f, "invalid tag, got {tag}, pos {pos}") } - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Varint(val) => { write!(f, "byte sequence is not a valid varint, got {val}") } - #[cfg(feature = "disktree")] + #[cfg(feature = "hexdb")] Error::Writer(writer_error) => { write!(f, "provided writer returned an error, got {writer_error}") } diff --git a/src/disktree/dtseek.rs b/src/hexdb/dbseek.rs similarity index 87% rename from src/disktree/dtseek.rs rename to src/hexdb/dbseek.rs index 3cc2689..bedce2a 100644 --- a/src/disktree/dtseek.rs +++ b/src/hexdb/dbseek.rs @@ -1,6 +1,6 @@ -use crate::disktree::dptr::Dp; +use crate::hexdb::dptr::Dp; -pub(crate) trait DtSeek { +pub(crate) trait DbSeek { fn pos(&mut self) -> std::io::Result; fn seek(&mut self, dp: Dp) -> std::io::Result; @@ -8,7 +8,7 @@ pub(crate) trait DtSeek { fn fast_forward(&mut self) -> std::io::Result; } -impl DtSeek for S +impl DbSeek for S where S: std::io::Seek, { diff --git a/src/disktree/dptr.rs b/src/hexdb/dptr.rs similarity index 100% rename from src/disktree/dptr.rs rename to src/hexdb/dptr.rs diff --git a/src/disktree/iter.rs b/src/hexdb/iter.rs similarity index 85% rename from src/disktree/iter.rs rename to src/hexdb/iter.rs index d66ccd5..989f75b 100644 --- a/src/disktree/iter.rs +++ b/src/hexdb/iter.rs @@ -1,7 +1,7 @@ use crate::{ cell::CellStack, - disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_SZ, varint}, error::{Error, Result}, + hexdb::{dbseek::DbSeek, dptr::Dp, tree::HDR_SZ, varint}, Cell, }; use byteorder::ReadBytesExt; @@ -10,8 +10,8 @@ use std::io::Cursor; pub(crate) struct Iter<'a> { cell_stack: CellStack, curr_node: Option<(u8, Dp)>, - disktree_buf: &'a [u8], - disktree_csr: Cursor<&'a [u8]>, + hexdb_buf: &'a [u8], + hexdb_csr: Cursor<&'a [u8]>, node_stack: Vec>, recycle_bin: Vec>, } @@ -40,13 +40,13 @@ impl<'a> Iter<'a> { // `pos` is a position in the file of this node's tag. fn read_node(&mut self, dptr: Dp) -> Result { let dptr = self.seek(dptr)?; - let node_tag = self.disktree_csr.read_u8()?; + let node_tag = self.hexdb_csr.read_u8()?; if 0 == node_tag & 0b1000_0000 { Ok(Node::Leaf(dptr)) } else { let mut children = self.node_buf(); let n_children = (node_tag & 0b0111_1111).count_ones() as usize; - let child_dptrs = Dp::read_n(&mut self.disktree_csr, n_children)?; + let child_dptrs = Dp::read_n(&mut self.hexdb_csr, n_children)?; children.extend( (0..7) .rev() @@ -86,12 +86,12 @@ impl<'a> Iter<'a> { self.curr_node = None; } - pub(crate) fn new(disktree_buf: &'a [u8]) -> Result> { - let mut disktree_csr = Cursor::new(disktree_buf); + pub(crate) fn new(hexdb_buf: &'a [u8]) -> Result> { + let mut hexdb_csr = Cursor::new(hexdb_buf); let mut cell_stack = CellStack::new(); let mut node_stack = Vec::new(); let recycle_bin = Vec::new(); - let mut base_nodes = Self::read_base_nodes(&mut disktree_csr)?; + let mut base_nodes = Self::read_base_nodes(&mut hexdb_csr)?; let curr_node = base_nodes.pop(); node_stack.push(base_nodes); if let Some((digit, _)) = curr_node { @@ -100,8 +100,8 @@ impl<'a> Iter<'a> { Ok(Self { cell_stack, curr_node, - disktree_buf, - disktree_csr, + hexdb_buf, + hexdb_csr, recycle_bin, node_stack, }) @@ -148,14 +148,14 @@ impl<'a> Iterator for Iter<'a> { self.stop_yielding(); return Some(Err(Error::from(e))); } - match varint::read(&mut self.disktree_csr) { + match varint::read(&mut self.hexdb_csr) { Err(e) => { self.stop_yielding(); return Some(Err(e)); } Ok((val_len, _n_read)) => { - let pos = self.disktree_csr.position() as usize; - let val_buf = &self.disktree_buf[pos..][..val_len as usize]; + let pos = self.hexdb_csr.position() as usize; + let val_buf = &self.hexdb_buf[pos..][..val_len as usize]; return Some(Ok(( *self.cell_stack.cell().expect("corrupted cell-stack"), val_buf, @@ -169,16 +169,16 @@ impl<'a> Iterator for Iter<'a> { } } -impl<'a> DtSeek for Iter<'a> { +impl<'a> DbSeek for Iter<'a> { fn pos(&mut self) -> std::io::Result { - self.disktree_csr.pos() + self.hexdb_csr.pos() } fn seek(&mut self, dp: Dp) -> std::io::Result { - self.disktree_csr.seek(dp) + self.hexdb_csr.seek(dp) } fn fast_forward(&mut self) -> std::io::Result { - self.disktree_csr.fast_forward() + self.hexdb_csr.fast_forward() } } diff --git a/src/disktree/mod.rs b/src/hexdb/mod.rs similarity index 77% rename from src/disktree/mod.rs rename to src/hexdb/mod.rs index 65d0d4b..7347e4f 100644 --- a/src/disktree/mod.rs +++ b/src/hexdb/mod.rs @@ -1,12 +1,12 @@ //! An on-disk hextree. #[cfg(not(target_pointer_width = "64"))] -compile_warning!("disktree may silently fail on non-64bit systems"); +compile_warning!("hexdb may silently fail on non-64bit systems"); -pub use tree::DiskTreeMap; +pub use tree::HexDb; +mod dbseek; mod dptr; -mod dtseek; mod iter; mod node; mod tree; @@ -51,11 +51,11 @@ mod tests { let file = tempfile::NamedTempFile::new().unwrap(); let (mut file, path) = file.keep().unwrap(); - println!("disktree path: {path:?}"); + println!("hexdb path: {path:?}"); monaco - .to_disktree(&mut file, |wtr, val| bincode::serialize_into(wtr, val)) + .to_hexdb(&mut file, |wtr, val| bincode::serialize_into(wtr, val)) .unwrap(); - let monaco_disktree = DiskTreeMap::open(path).unwrap(); + let monaco_hexdb = HexDb::open(path).unwrap(); assert_eq!(monaco.get(point_2), None); assert_eq!( @@ -70,13 +70,13 @@ mod tests { )); assert!(matches!( - monaco_disktree.get_raw(point_1_res8).unwrap(), - Some((cell, crate::disktree::node::Node::Parent(_))) if cell == point_1_res8 + monaco_hexdb.get_raw(point_1_res8).unwrap(), + Some((cell, crate::hexdb::node::Node::Parent(_))) if cell == point_1_res8 )); for (ht_cell, &ht_val) in monaco.iter() { let now = std::time::Instant::now(); - let (dt_cell, val_buf) = monaco_disktree.get(ht_cell).unwrap().unwrap(); + let (dt_cell, val_buf) = monaco_hexdb.get(ht_cell).unwrap().unwrap(); let dt_val = bincode::deserialize_from(val_buf).unwrap(); let lookup_duration = now.elapsed(); println!("loookup of {dt_cell} took {lookup_duration:?}"); @@ -141,34 +141,34 @@ mod tests { map }; - let monaco_disktree: DiskTreeMap = { + let monaco_hexdb: HexDb = { let file = tempfile::NamedTempFile::new().unwrap(); let (mut file, path) = file.keep().unwrap(); monaco_hextree - .to_disktree(&mut file, |wtr, val| wtr.write_all(val)) + .to_hexdb(&mut file, |wtr, val| wtr.write_all(val)) .unwrap(); let _ = file; - DiskTreeMap::open(path).unwrap() + HexDb::open(path).unwrap() }; - // Assert neither hashmap nor disktree contain reserved cells. + // Assert neither hashmap nor hexdb contain reserved cells. for cell in test_cells { assert!(monaco_hashmap.get(&cell).is_none()); - assert!(!monaco_disktree.contains(cell).unwrap()); + assert!(!monaco_hexdb.contains(cell).unwrap()); } - // Assert disktree contains all the same values as the + // Assert hexdb contains all the same values as the // hashmap. for (cell, val) in monaco_hashmap .iter() .map(|(cell, vec)| (**cell, vec.as_slice())) { - assert_eq!((cell, val), monaco_disktree.get(cell).unwrap().unwrap()) + assert_eq!((cell, val), monaco_hexdb.get(cell).unwrap().unwrap()) } // Assert hashmap contains all the same values as the - // disktree. - for (cell, val) in monaco_disktree.iter().unwrap().map(|entry| entry.unwrap()) { + // hexdb. + for (cell, val) in monaco_hexdb.iter().unwrap().map(|entry| entry.unwrap()) { assert_eq!( (cell, val), ( @@ -198,19 +198,19 @@ mod tests { let file = tempfile::NamedTempFile::new().unwrap(); let (mut file, path) = file.keep().unwrap(); - println!("disktree path: {path:?}"); + println!("hexdb path: {path:?}"); monaco - .to_disktree(&mut file, |wtr, val| bincode::serialize_into(wtr, val)) + .to_hexdb(&mut file, |wtr, val| bincode::serialize_into(wtr, val)) .unwrap(); - let monaco_disktree = DiskTreeMap::open(path).unwrap(); + let monaco_hexdb = HexDb::open(path).unwrap(); // Create the iterator with the user-defined deserialzer. - let disktree_iter = monaco_disktree.iter().unwrap(); + let hexdb_iter = monaco_hexdb.iter().unwrap(); let start = std::time::Instant::now(); - let mut disktree_collection = Vec::new(); - for res in disktree_iter { + let mut hexdb_collection = Vec::new(); + for res in hexdb_iter { let (cell, val_buf) = res.unwrap(); - disktree_collection.push((cell, bincode::deserialize_from(val_buf).unwrap())); + hexdb_collection.push((cell, bincode::deserialize_from(val_buf).unwrap())); } let elapsed = start.elapsed(); println!("{elapsed:?}"); @@ -221,20 +221,20 @@ mod tests { assert_eq!( hextree_collection, - disktree_collection, - "iterating a disktree should yield identically ordered elements as the hextree tree it was derived from" + hexdb_collection, + "iterating a hexdb should yield identically ordered elements as the hextree tree it was derived from" ); } #[test] - fn test_empty_disktree() { + fn test_empty_hexdb() { use crate::HexTreeMap; use std::io::Cursor; let mut wtr = vec![]; HexTreeMap::<&[u8]>::new() - .to_disktree(Cursor::new(&mut wtr), |wtr, val| wtr.write_all(val)) + .to_hexdb(Cursor::new(&mut wtr), |wtr, val| wtr.write_all(val)) .unwrap(); - let disktree = DiskTreeMap::with_buf(wtr).unwrap(); - assert_eq!(0, disktree.iter().unwrap().count()); + let hexdb = HexDb::with_buf(wtr).unwrap(); + assert_eq!(0, hexdb.iter().unwrap().count()); } } diff --git a/src/disktree/node.rs b/src/hexdb/node.rs similarity index 94% rename from src/disktree/node.rs rename to src/hexdb/node.rs index 6981083..dd7e88c 100644 --- a/src/disktree/node.rs +++ b/src/hexdb/node.rs @@ -1,6 +1,6 @@ use crate::{ - disktree::{dptr::Dp, dtseek::DtSeek, varint}, error::Result, + hexdb::{dbseek::DbSeek, dptr::Dp, varint}, }; use byteorder::ReadBytesExt; use std::{io::Read, mem::size_of, ops::Range}; @@ -19,7 +19,7 @@ pub(crate) enum Node { impl Node { pub(crate) fn read(rdr: &mut R) -> Result where - R: Read + DtSeek, + R: Read + DbSeek, { let start_pos = rdr.pos()?; let mut buf = [0u8; NODE_BUF_SZ]; diff --git a/src/disktree/tree.rs b/src/hexdb/tree.rs similarity index 90% rename from src/disktree/tree.rs rename to src/hexdb/tree.rs index ac7897e..166c951 100755 --- a/src/disktree/tree.rs +++ b/src/hexdb/tree.rs @@ -1,7 +1,7 @@ use crate::{ digits::Digits, - disktree::{dptr::Dp, iter::Iter, node::Node}, error::Result, + hexdb::{dptr::Dp, iter::Iter, node::Node}, Cell, Error, }; use byteorder::ReadBytesExt; @@ -17,23 +17,23 @@ pub(crate) const HDR_MAGIC: &[u8] = b"hextree\0"; pub(crate) const HDR_SZ: usize = HDR_MAGIC.len() + 1; /// An on-disk hextree map. -pub struct DiskTreeMap(pub(crate) Box + Send + Sync + 'static>); +pub struct HexDb(pub(crate) Box + Send + Sync + 'static>); -impl DiskTreeMap { - /// Opens a `DiskTree` at the specified path. +impl HexDb { + /// Opens a `HexDb` at the specified path. pub fn open>(path: P) -> Result { let file = File::open(path)?; Self::memmap(&file) } - /// Memory maps the provided disktree-containing file. + /// Memory maps the provided hexdb-containing file. pub fn memmap(file: &File) -> Result { #[allow(unsafe_code)] let mm = unsafe { MmapOptions::new().map(file)? }; Self::with_buf(mm) } - /// Opens a `DiskTree` with a provided buffer. + /// Opens a `HexDb` with a provided buffer. pub fn with_buf(buf: B) -> Result where B: AsRef<[u8]> + Send + Sync + 'static, @@ -45,7 +45,7 @@ impl DiskTreeMap { buf }; if magic != HDR_MAGIC { - return Err(Error::NotDisktree); + return Err(Error::NotHexDb); } let version = { diff --git a/src/disktree/varint.rs b/src/hexdb/varint.rs similarity index 100% rename from src/disktree/varint.rs rename to src/hexdb/varint.rs diff --git a/src/disktree/writer.rs b/src/hexdb/writer.rs similarity index 91% rename from src/disktree/writer.rs rename to src/hexdb/writer.rs index 11cfd9e..fc9d72e 100644 --- a/src/disktree/writer.rs +++ b/src/hexdb/writer.rs @@ -1,7 +1,7 @@ use crate::{ compaction::Compactor, - disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_MAGIC, varint}, error::{Error, Result}, + hexdb::{dbseek::DbSeek, dptr::Dp, tree::HDR_MAGIC, varint, HexDb}, node::Node, HexTreeMap, }; @@ -12,30 +12,30 @@ impl HexTreeMap where C: Compactor, { - /// Write self to disk. - pub fn to_disktree(&self, wtr: W, f: F) -> Result + /// Encode self as a [HexDb] to the provided writer. + pub fn to_hexdb(&self, wtr: W, f: F) -> Result where W: Write + std::io::Seek, F: Fn(&mut dyn Write, &V) -> std::result::Result<(), E>, E: std::error::Error + Sync + Send + 'static, { - DiskTreeWriter::new(wtr).write(self, f) + HexDbWriter::new(wtr).write(self, f) } } -pub(crate) struct DiskTreeWriter { +pub(crate) struct HexDbWriter { scratch_pad: Vec, wtr: W, } -impl DiskTreeWriter { +impl HexDbWriter { pub fn new(wtr: W) -> Self { let scratch_pad = Vec::new(); Self { wtr, scratch_pad } } } -impl DiskTreeWriter +impl HexDbWriter where W: Write + std::io::Seek, { @@ -126,7 +126,7 @@ where } } -impl DtSeek for DiskTreeWriter +impl DbSeek for HexDbWriter where W: std::io::Seek, { @@ -135,7 +135,7 @@ where } fn seek(&mut self, dp: Dp) -> std::io::Result { - DtSeek::seek(&mut self.wtr, dp) + DbSeek::seek(&mut self.wtr, dp) } fn fast_forward(&mut self) -> std::io::Result { diff --git a/src/lib.rs b/src/lib.rs index 34c0314..f7f32c6 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,12 +4,12 @@ mod cell; pub mod compaction; mod digits; -#[cfg(feature = "disktree")] -pub mod disktree; mod entry; mod error; pub mod hex_tree_map; mod hex_tree_set; +#[cfg(feature = "hexdb")] +pub mod hexdb; mod iteration; mod node; @@ -17,5 +17,5 @@ pub use crate::cell::Cell; pub use crate::hex_tree_map::HexTreeMap; pub use crate::hex_tree_set::HexTreeSet; pub use error::{Error, Result}; -#[cfg(feature = "disktree")] +#[cfg(feature = "hexdb")] pub use memmap;