Skip to content

Commit

Permalink
Make strong type for disk pointer
Browse files Browse the repository at this point in the history
  • Loading branch information
JayKickliter committed Nov 9, 2023
1 parent 23e8aec commit f7bee48
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 98 deletions.
112 changes: 71 additions & 41 deletions src/disktree/dptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,79 @@ use std::{
mem::size_of,
};

/// The on-disk little-endian byte-representation of an offset.
pub(crate) type Dptr = [u8; 5];
pub(crate) const DPTR_SZ: usize = size_of::<Dptr>();
#[allow(clippy::cast_possible_truncation)]
pub(crate) const DPTR_MAX: u64 = 2_u64.pow(DPTR_SZ as u32 * 8) - 1;
pub(crate) const DPTR_NULL: u64 = 0;

/// Read 5 bytes from disk and parses them as litte-endient `u64`.
pub(crate) fn read<R>(src: &mut R) -> Result<u64>
where
R: Read,
{
let mut buf = [0u8; size_of::<u64>()];
src.read_exact(&mut buf[..DPTR_SZ])?;
let dptr = u64::from_le_bytes(buf);
Ok(dptr)
/// A 'disk' pointer.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[repr(transparent)]
pub(crate) struct Dptr(u64);

impl Dptr {
#[allow(clippy::cast_possible_truncation)]
const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1;
const DISK_REPR_SZ: usize = 5;
const NULL: u64 = 0;

pub(crate) const fn is_null(self) -> bool {
self.0 == Self::NULL
}

pub(crate) const fn null() -> Dptr {
Dptr(Self::NULL)
}

pub(crate) const fn size() -> u64 {
Self::DISK_REPR_SZ as u64
}

/// Read 5 bytes from disk and parses them as litte-endient `u64`.
pub(crate) fn read<R>(src: &mut R) -> Result<Self>
where
R: Read,
{
let mut buf = [0u8; size_of::<u64>()];
src.read_exact(&mut buf[..Self::DISK_REPR_SZ])?;
let dptr = u64::from_le_bytes(buf);
Ok(dptr.into())
}

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// litte-endien `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dptr>>
where
R: Read,
{
debug_assert!(n <= 7);
let mut buf = [0; Self::DISK_REPR_SZ * 7];
src.read_exact(&mut buf[..(Self::DISK_REPR_SZ * n)])?;
Ok(buf[..(Self::DISK_REPR_SZ * n)]
.chunks(Self::DISK_REPR_SZ)
.map(|chunk| {
let mut buf = [0u8; size_of::<u64>()];
buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.map(Dptr)
.collect())
}

/// Writes the 5 lower bytes of a `u64` to disk.
pub(crate) fn write<W>(self, dst: &mut W) -> Result
where
W: Write,
{
let buf = self.0.to_le_bytes();
Ok(dst.write_all(&buf[..Self::DISK_REPR_SZ])?)
}
}

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// litte-endien `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<u64>>
where
R: Read,
{
assert!(n <= 7);
let mut buf = [0; DPTR_SZ * 7];
src.read_exact(&mut buf[..(DPTR_SZ * n)])?;
Ok(buf[..(DPTR_SZ * n)]
.chunks(DPTR_SZ)
.map(|chunk| {
let mut buf = [0u8; size_of::<u64>()];
buf[..DPTR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.collect())
impl From<Dptr> for u64 {
fn from(Dptr(raw): Dptr) -> u64 {
raw
}
}

/// Writes the 5 lower bytes of a `u64` to disk.
pub(crate) fn write<W>(dst: &mut W, dptr: u64) -> Result
where
W: Write,
{
assert!(dptr <= DPTR_MAX);
let buf = dptr.to_le_bytes();
Ok(dst.write_all(&buf[..DPTR_SZ])?)
impl From<u64> for Dptr {
fn from(raw: u64) -> Dptr {
assert!(raw <= Self::MAX);
Dptr(raw)
}
}
41 changes: 20 additions & 21 deletions src/disktree/iter.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,61 @@
use crate::{
cell::CellStack,
disktree::{dptr, tree::HDR_SZ, ReadVal},
disktree::{dptr::Dptr, tree::HDR_SZ, ReadVal},
error::Result,
};
use byteorder::ReadBytesExt;
use std::io::{Read, Seek, SeekFrom};

pub(crate) struct Iter<'a, R, F> {
cell_stack: CellStack,
curr: Option<(u8, u64)>,
curr: Option<(u8, Dptr)>,
rdr: &'a mut R,
recycle_bin: Vec<Vec<(u8, u64)>>,
stack: Vec<Vec<(u8, u64)>>,
recycle_bin: Vec<Vec<(u8, Dptr)>>,
stack: Vec<Vec<(u8, Dptr)>>,
f: F,
}

enum Node {
// File position for the fist byte of value data.
Leaf(u64),
Leaf(Dptr),
// (H3 Cell digit, file position of child's node tag)
Parent(Vec<(u8, u64)>),
Parent(Vec<(u8, Dptr)>),
}

impl<'a, R, F> Iter<'a, R, F>
where
R: Seek + Read,
{
fn seek_to(&mut self, pos: u64) -> Result {
self.rdr.seek(SeekFrom::Start(pos))?;
Ok(())
fn seek_to(&mut self, dptr: Dptr) -> Result<Dptr> {
Ok(Dptr::from(self.rdr.seek(SeekFrom::Start(u64::from(dptr)))?))
}

fn read_base_nodes(rdr: &mut R) -> Result<Vec<(u8, u64)>> {
fn read_base_nodes(rdr: &mut R) -> Result<Vec<(u8, Dptr)>> {
let mut buf = Vec::with_capacity(122);
rdr.seek(SeekFrom::Start(HDR_SZ))?;
for digit in 0..122 {
let dptr = dptr::read(rdr)?;
if dptr != dptr::DPTR_NULL {
buf.push((digit, dptr))
let dptr = Dptr::read(rdr)?;
if !dptr.is_null() {
buf.push((digit, dptr));
}
}
buf.reverse();
Ok(buf)
}

// `pos` is a position in the file of this node's tag.
fn read_node(&mut self, dptr: u64) -> Result<Node> {
self.seek_to(dptr)?;
fn read_node(&mut self, dptr: Dptr) -> Result<Node> {
let dptr = self.seek_to(dptr)?;
let node_tag = self.rdr.read_u8()?;
let base_pos = dptr + std::mem::size_of_val(&node_tag) as u64;
debug_assert_eq!(base_pos, self.rdr.stream_position().unwrap());
let base_pos = Dptr::from(u64::from(dptr) + std::mem::size_of_val(&node_tag) as u64);
debug_assert_eq!(base_pos, Dptr::from(self.rdr.stream_position().unwrap()));
assert!(node_tag == 0 || node_tag > 0b1000_0000);
if node_tag == 0 {
Ok(Node::Leaf(base_pos))
} else {
let mut children = self.node_buf();
let n_children = (node_tag & 0b0111_1111).count_ones() as usize;
let child_dptrs = dptr::read_n(&mut self.rdr, n_children)?;
let child_dptrs = Dptr::read_n(&mut self.rdr, n_children)?;
children.extend(
(0..7)
.rev()
Expand All @@ -71,7 +70,7 @@ where
/// allocates a new one.
///
/// See [`Iter::recycle_node_buf`].
fn node_buf(&mut self) -> Vec<(u8, u64)> {
fn node_buf(&mut self) -> Vec<(u8, Dptr)> {
let buf = self
.recycle_bin
.pop()
Expand All @@ -83,7 +82,7 @@ where
/// Accepts a used, empty, node buffer for later reuse.
///
/// See [`Iter::node_buf`].
fn recycle_node_buf(&mut self, buf: Vec<(u8, u64)>) {
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dptr)>) {
debug_assert!(buf.is_empty());
self.recycle_bin.push(buf);
}
Expand Down Expand Up @@ -152,7 +151,7 @@ where
self.curr = Some((digit, dptr));
self.stack.push(children);
} else {
self.recycle_node_buf(children)
self.recycle_node_buf(children);
}
}
Ok(Node::Leaf(dptr)) => {
Expand Down
36 changes: 16 additions & 20 deletions src/disktree/tree.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
use crate::{
digits::Digits,
disktree::{
dptr::{self, DPTR_NULL, DPTR_SZ},
iter::Iter,
ReadVal,
},
disktree::{dptr::Dptr, iter::Iter, ReadVal},
error::{Error, Result},
Cell,
};
Expand Down Expand Up @@ -52,10 +48,10 @@ impl<R: Read + Seek> DiskTree<R> {

/// Returns a reader pre-seeked to the value for cell, if present.
pub fn seek_to_cell(&mut self, cell: Cell) -> Result<Option<(Cell, &mut R)>> {
let base_cell_pos = Self::base_cell_offset(cell);
let base_cell_pos = Self::base_cell_dptr(cell);
self.seek_to_pos(base_cell_pos)?;
let node_dptr = dptr::read(&mut self.0)?;
if node_dptr == DPTR_NULL {
let node_dptr = Dptr::read(&mut self.0)?;
if node_dptr.is_null() {
return Ok(None);
}
let digits = Digits::new(cell);
Expand All @@ -68,10 +64,10 @@ impl<R: Read + Seek> DiskTree<R> {

/// Returns `true` if the tree fully contains `cell`.
pub fn contains(&mut self, cell: Cell) -> Result<bool> {
let base_cell_pos = Self::base_cell_offset(cell);
let base_cell_pos = Self::base_cell_dptr(cell);
self.seek_to_pos(base_cell_pos)?;
let node_dptr = dptr::read(&mut self.0)?;
if node_dptr == DPTR_NULL {
let node_dptr = Dptr::read(&mut self.0)?;
if node_dptr.is_null() {
return Ok(false);
}
let digits = Digits::new(cell);
Expand Down Expand Up @@ -99,7 +95,7 @@ impl<R: Read + Seek> DiskTree<R> {
fn _get(
&mut self,
res: u8,
node_dptr: u64,
node_dptr: Dptr,
cell: Cell,
mut digits: Digits,
) -> Result<Option<(Cell, u64)>> {
Expand All @@ -114,9 +110,9 @@ impl<R: Read + Seek> DiskTree<R> {
))),
(Some(digit), _) => {
let bit_cnt = (((node_tag as u16) << (8 - digit)) & 0xFF).count_ones();
self.seek_forward(bit_cnt as u64 * DPTR_SZ as u64)?;
let child_dptr = dptr::read(&mut self.0)?;
if child_dptr == DPTR_NULL {
self.seek_forward(u64::from(bit_cnt) * Dptr::size())?;
let child_dptr = Dptr::read(&mut self.0)?;
if child_dptr.is_null() {
Ok(None)
} else {
self._get(res + 1, child_dptr, cell, digits)
Expand All @@ -128,8 +124,8 @@ impl<R: Read + Seek> DiskTree<R> {
}
}

fn seek_to_pos(&mut self, pos: u64) -> Result {
self.0.seek(SeekFrom::Start(pos))?;
fn seek_to_pos(&mut self, dptr: Dptr) -> Result {
self.0.seek(SeekFrom::Start(u64::from(dptr)))?;
Ok(())
}

Expand All @@ -138,8 +134,8 @@ impl<R: Read + Seek> DiskTree<R> {
Ok(())
}

/// Returns the offset to a base (res0) cell dptr.
fn base_cell_offset(cell: Cell) -> u64 {
HDR_SZ + (DPTR_SZ as u64) * (cell.base() as u64)
/// Returns the DPtr to a base (res0) cell dptr.
fn base_cell_dptr(cell: Cell) -> Dptr {
Dptr::from(HDR_SZ + Dptr::size() * (cell.base() as u64))
}
}
Loading

0 comments on commit f7bee48

Please sign in to comment.