diff --git a/src/disktree/dptr.rs b/src/disktree/dptr.rs index 72d5394..ccf538e 100644 --- a/src/disktree/dptr.rs +++ b/src/disktree/dptr.rs @@ -4,49 +4,79 @@ use std::{ mem::size_of, }; -/// The on-disk little-endian byte-representation of an offset. -pub(crate) type Dptr = [u8; 5]; -pub(crate) const DPTR_SZ: usize = size_of::(); -#[allow(clippy::cast_possible_truncation)] -pub(crate) const DPTR_MAX: u64 = 2_u64.pow(DPTR_SZ as u32 * 8) - 1; -pub(crate) const DPTR_NULL: u64 = 0; - -/// Read 5 bytes from disk and parses them as litte-endient `u64`. -pub(crate) fn read(src: &mut R) -> Result -where - R: Read, -{ - let mut buf = [0u8; size_of::()]; - src.read_exact(&mut buf[..DPTR_SZ])?; - let dptr = u64::from_le_bytes(buf); - Ok(dptr) +/// A 'disk' pointer. +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[repr(transparent)] +pub(crate) struct Dptr(u64); + +impl Dptr { + #[allow(clippy::cast_possible_truncation)] + const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1; + const DISK_REPR_SZ: usize = 5; + const NULL: u64 = 0; + + pub(crate) const fn is_null(self) -> bool { + self.0 == Self::NULL + } + + pub(crate) const fn null() -> Dptr { + Dptr(Self::NULL) + } + + pub(crate) const fn size() -> u64 { + Self::DISK_REPR_SZ as u64 + } + + /// Read 5 bytes from disk and parses them as litte-endient `u64`. + pub(crate) fn read(src: &mut R) -> Result + where + R: Read, + { + let mut buf = [0u8; size_of::()]; + src.read_exact(&mut buf[..Self::DISK_REPR_SZ])?; + let dptr = u64::from_le_bytes(buf); + Ok(dptr.into()) + } + + /// Read 5 * `n` bytes from disk, for up to n=7, and parses them as + /// litte-endien `u64`s. + pub(crate) fn read_n(src: &mut R, n: usize) -> Result> + where + R: Read, + { + debug_assert!(n <= 7); + let mut buf = [0; Self::DISK_REPR_SZ * 7]; + src.read_exact(&mut buf[..(Self::DISK_REPR_SZ * n)])?; + Ok(buf[..(Self::DISK_REPR_SZ * n)] + .chunks(Self::DISK_REPR_SZ) + .map(|chunk| { + let mut buf = [0u8; size_of::()]; + buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk); + u64::from_le_bytes(buf) + }) + .map(Dptr) + .collect()) + } + + /// Writes the 5 lower bytes of a `u64` to disk. + pub(crate) fn write(self, dst: &mut W) -> Result + where + W: Write, + { + let buf = self.0.to_le_bytes(); + Ok(dst.write_all(&buf[..Self::DISK_REPR_SZ])?) + } } -/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as -/// litte-endien `u64`s. -pub(crate) fn read_n(src: &mut R, n: usize) -> Result> -where - R: Read, -{ - assert!(n <= 7); - let mut buf = [0; DPTR_SZ * 7]; - src.read_exact(&mut buf[..(DPTR_SZ * n)])?; - Ok(buf[..(DPTR_SZ * n)] - .chunks(DPTR_SZ) - .map(|chunk| { - let mut buf = [0u8; size_of::()]; - buf[..DPTR_SZ].copy_from_slice(chunk); - u64::from_le_bytes(buf) - }) - .collect()) +impl From for u64 { + fn from(Dptr(raw): Dptr) -> u64 { + raw + } } -/// Writes the 5 lower bytes of a `u64` to disk. -pub(crate) fn write(dst: &mut W, dptr: u64) -> Result -where - W: Write, -{ - assert!(dptr <= DPTR_MAX); - let buf = dptr.to_le_bytes(); - Ok(dst.write_all(&buf[..DPTR_SZ])?) +impl From for Dptr { + fn from(raw: u64) -> Dptr { + assert!(raw <= Self::MAX); + Dptr(raw) + } } diff --git a/src/disktree/iter.rs b/src/disktree/iter.rs index ebd7eff..4f9f8f0 100644 --- a/src/disktree/iter.rs +++ b/src/disktree/iter.rs @@ -1,6 +1,6 @@ use crate::{ cell::CellStack, - disktree::{dptr, tree::HDR_SZ, ReadVal}, + disktree::{dptr::Dptr, tree::HDR_SZ, ReadVal}, error::Result, }; use byteorder::ReadBytesExt; @@ -8,36 +8,35 @@ use std::io::{Read, Seek, SeekFrom}; pub(crate) struct Iter<'a, R, F> { cell_stack: CellStack, - curr: Option<(u8, u64)>, + curr: Option<(u8, Dptr)>, rdr: &'a mut R, - recycle_bin: Vec>, - stack: Vec>, + recycle_bin: Vec>, + stack: Vec>, f: F, } enum Node { // File position for the fist byte of value data. - Leaf(u64), + Leaf(Dptr), // (H3 Cell digit, file position of child's node tag) - Parent(Vec<(u8, u64)>), + Parent(Vec<(u8, Dptr)>), } impl<'a, R, F> Iter<'a, R, F> where R: Seek + Read, { - fn seek_to(&mut self, pos: u64) -> Result { - self.rdr.seek(SeekFrom::Start(pos))?; - Ok(()) + fn seek_to(&mut self, dptr: Dptr) -> Result { + Ok(Dptr::from(self.rdr.seek(SeekFrom::Start(u64::from(dptr)))?)) } - fn read_base_nodes(rdr: &mut R) -> Result> { + fn read_base_nodes(rdr: &mut R) -> Result> { let mut buf = Vec::with_capacity(122); rdr.seek(SeekFrom::Start(HDR_SZ))?; for digit in 0..122 { - let dptr = dptr::read(rdr)?; - if dptr != dptr::DPTR_NULL { - buf.push((digit, dptr)) + let dptr = Dptr::read(rdr)?; + if !dptr.is_null() { + buf.push((digit, dptr)); } } buf.reverse(); @@ -45,18 +44,18 @@ where } // `pos` is a position in the file of this node's tag. - fn read_node(&mut self, dptr: u64) -> Result { - self.seek_to(dptr)?; + fn read_node(&mut self, dptr: Dptr) -> Result { + let dptr = self.seek_to(dptr)?; let node_tag = self.rdr.read_u8()?; - let base_pos = dptr + std::mem::size_of_val(&node_tag) as u64; - debug_assert_eq!(base_pos, self.rdr.stream_position().unwrap()); + let base_pos = Dptr::from(u64::from(dptr) + std::mem::size_of_val(&node_tag) as u64); + debug_assert_eq!(base_pos, Dptr::from(self.rdr.stream_position().unwrap())); assert!(node_tag == 0 || node_tag > 0b1000_0000); if node_tag == 0 { Ok(Node::Leaf(base_pos)) } else { let mut children = self.node_buf(); let n_children = (node_tag & 0b0111_1111).count_ones() as usize; - let child_dptrs = dptr::read_n(&mut self.rdr, n_children)?; + let child_dptrs = Dptr::read_n(&mut self.rdr, n_children)?; children.extend( (0..7) .rev() @@ -71,7 +70,7 @@ where /// allocates a new one. /// /// See [`Iter::recycle_node_buf`]. - fn node_buf(&mut self) -> Vec<(u8, u64)> { + fn node_buf(&mut self) -> Vec<(u8, Dptr)> { let buf = self .recycle_bin .pop() @@ -83,7 +82,7 @@ where /// Accepts a used, empty, node buffer for later reuse. /// /// See [`Iter::node_buf`]. - fn recycle_node_buf(&mut self, buf: Vec<(u8, u64)>) { + fn recycle_node_buf(&mut self, buf: Vec<(u8, Dptr)>) { debug_assert!(buf.is_empty()); self.recycle_bin.push(buf); } @@ -152,7 +151,7 @@ where self.curr = Some((digit, dptr)); self.stack.push(children); } else { - self.recycle_node_buf(children) + self.recycle_node_buf(children); } } Ok(Node::Leaf(dptr)) => { diff --git a/src/disktree/tree.rs b/src/disktree/tree.rs index 5a2eddf..d0af550 100644 --- a/src/disktree/tree.rs +++ b/src/disktree/tree.rs @@ -1,10 +1,6 @@ use crate::{ digits::Digits, - disktree::{ - dptr::{self, DPTR_NULL, DPTR_SZ}, - iter::Iter, - ReadVal, - }, + disktree::{dptr::Dptr, iter::Iter, ReadVal}, error::{Error, Result}, Cell, }; @@ -52,10 +48,10 @@ impl DiskTree { /// Returns a reader pre-seeked to the value for cell, if present. pub fn seek_to_cell(&mut self, cell: Cell) -> Result> { - let base_cell_pos = Self::base_cell_offset(cell); + let base_cell_pos = Self::base_cell_dptr(cell); self.seek_to_pos(base_cell_pos)?; - let node_dptr = dptr::read(&mut self.0)?; - if node_dptr == DPTR_NULL { + let node_dptr = Dptr::read(&mut self.0)?; + if node_dptr.is_null() { return Ok(None); } let digits = Digits::new(cell); @@ -68,10 +64,10 @@ impl DiskTree { /// Returns `true` if the tree fully contains `cell`. pub fn contains(&mut self, cell: Cell) -> Result { - let base_cell_pos = Self::base_cell_offset(cell); + let base_cell_pos = Self::base_cell_dptr(cell); self.seek_to_pos(base_cell_pos)?; - let node_dptr = dptr::read(&mut self.0)?; - if node_dptr == DPTR_NULL { + let node_dptr = Dptr::read(&mut self.0)?; + if node_dptr.is_null() { return Ok(false); } let digits = Digits::new(cell); @@ -99,7 +95,7 @@ impl DiskTree { fn _get( &mut self, res: u8, - node_dptr: u64, + node_dptr: Dptr, cell: Cell, mut digits: Digits, ) -> Result> { @@ -114,9 +110,9 @@ impl DiskTree { ))), (Some(digit), _) => { let bit_cnt = (((node_tag as u16) << (8 - digit)) & 0xFF).count_ones(); - self.seek_forward(bit_cnt as u64 * DPTR_SZ as u64)?; - let child_dptr = dptr::read(&mut self.0)?; - if child_dptr == DPTR_NULL { + self.seek_forward(u64::from(bit_cnt) * Dptr::size())?; + let child_dptr = Dptr::read(&mut self.0)?; + if child_dptr.is_null() { Ok(None) } else { self._get(res + 1, child_dptr, cell, digits) @@ -128,8 +124,8 @@ impl DiskTree { } } - fn seek_to_pos(&mut self, pos: u64) -> Result { - self.0.seek(SeekFrom::Start(pos))?; + fn seek_to_pos(&mut self, dptr: Dptr) -> Result { + self.0.seek(SeekFrom::Start(u64::from(dptr)))?; Ok(()) } @@ -138,8 +134,8 @@ impl DiskTree { Ok(()) } - /// Returns the offset to a base (res0) cell dptr. - fn base_cell_offset(cell: Cell) -> u64 { - HDR_SZ + (DPTR_SZ as u64) * (cell.base() as u64) + /// Returns the DPtr to a base (res0) cell dptr. + fn base_cell_dptr(cell: Cell) -> Dptr { + Dptr::from(HDR_SZ + Dptr::size() * (cell.base() as u64)) } } diff --git a/src/disktree/writer.rs b/src/disktree/writer.rs index 88f0ce4..9108f5d 100644 --- a/src/disktree/writer.rs +++ b/src/disktree/writer.rs @@ -1,6 +1,6 @@ use crate::{ compaction::Compactor, - disktree::dptr::{self, DPTR_NULL}, + disktree::dptr::Dptr, error::{Error, Result}, node::Node, HexTreeMap, @@ -32,24 +32,24 @@ impl DiskTreeWriter { const VERSION: u8 = 0; self.0.write_u8(0xFE - VERSION)?; // Write base cells placeholder offsets. - let mut fixups: Vec<(u64, &Node)> = Vec::new(); + let mut fixups: Vec<(Dptr, &Node)> = Vec::new(); // Empty: | DPTR_DEFAULT | // Node: | Dptr | for base in hextree.nodes.iter() { match base.as_deref() { - None => dptr::write(&mut self.0, DPTR_NULL)?, + None => Dptr::null().write(&mut self.0)?, Some(node) => { - fixups.push((self.0.stream_position()?, node)); - dptr::write(&mut self.0, DPTR_NULL)? + fixups.push((self.pos()?, node)); + Dptr::null().write(&mut self.0)? } } } for (fixee_dptr, node) in fixups { let node_dptr = self.write_node(node, &mut f)?; - self.0.seek(SeekFrom::Start(fixee_dptr))?; - dptr::write(&mut self.0, node_dptr)?; + self.seek_to(fixee_dptr)?; + node_dptr.write(&mut self.0)?; } Ok(()) @@ -57,13 +57,13 @@ impl DiskTreeWriter { /// Leaf: | 0_u8 | bincode bytes | /// Parent: | 1_u8 | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr | Dptr | - fn write_node(&mut self, node: &Node, f: &mut F) -> Result + fn write_node(&mut self, node: &Node, f: &mut F) -> Result where F: Fn(&mut W, &V) -> std::result::Result<(), E>, E: std::error::Error + Sync + Send + 'static, { - let node_pos = self.0.seek(SeekFrom::End(0))?; - let mut node_fixups: Vec<(u64, &Node)> = Vec::new(); + let node_pos: Dptr = self.0.seek(SeekFrom::End(0))?.into(); + let mut node_fixups: Vec<(Dptr, &Node)> = Vec::new(); match node { Node::Leaf(val) => { self.0.write_u8(0)?; @@ -71,7 +71,7 @@ impl DiskTreeWriter { f(&mut self.0, val).map_err(|e| Error::Writer(Box::new(e)))? } Node::Parent(children) => { - let tag_pos = self.0.stream_position()?; + let tag_pos = self.pos()?; self.0.write_u8(0b1000_0000)?; let mut tag = 0; for child in children.iter() { @@ -85,12 +85,12 @@ impl DiskTreeWriter { // "insert" a 1 into the tag denoting that // this node is empty. tag = (tag >> 1) | 0b1000_0000; - node_fixups.push((self.0.stream_position()?, node)); - dptr::write(&mut self.0, DPTR_NULL)?; + node_fixups.push((self.pos()?, node)); + Dptr::null().write(&mut self.0)?; } } } - self.0.seek(SeekFrom::Start(tag_pos))?; + self.seek_to(tag_pos)?; // Make the top bit 1 as a sentinel. tag = (tag >> 1) | 0b1000_0000; // println!("{tag_pos:010x}: write tag {tag:08b}"); @@ -100,10 +100,18 @@ impl DiskTreeWriter { for (fixee_dptr, node) in node_fixups { let node_dptr = self.write_node(node, f)?; - self.0.seek(SeekFrom::Start(fixee_dptr))?; - dptr::write(&mut self.0, node_dptr)?; + self.seek_to(fixee_dptr)?; + node_dptr.write(&mut self.0)?; } Ok(node_pos) } + + fn pos(&mut self) -> Result { + Ok(Dptr::from(self.0.stream_position()?)) + } + + fn seek_to(&mut self, dptr: Dptr) -> Result { + Ok(Dptr::from(self.0.seek(SeekFrom::Start(u64::from(dptr)))?)) + } }