Skip to content

Commit

Permalink
Merge pull request #39 from JayKickliter/jsk/refactor-dptr
Browse files Browse the repository at this point in the history
Refactor Dptr
  • Loading branch information
JayKickliter authored Feb 16, 2024
2 parents 4cff9c4 + d4b351e commit bbc9fcf
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 79 deletions.
64 changes: 51 additions & 13 deletions src/disktree/dptr.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use crate::Result;
use std::{
convert::TryFrom,
io::{Read, Write},
mem::size_of,
ops::Add,
};

/// A 'disk' pointer.
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[repr(transparent)]
pub(crate) struct Dptr(u64);
pub(crate) struct Dp(u64);

impl Dptr {
impl Dp {
#[allow(clippy::cast_possible_truncation)]
const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1;
const DISK_REPR_SZ: usize = 5;
Expand All @@ -19,12 +21,12 @@ impl Dptr {
self.0 == Self::NULL
}

pub(crate) const fn null() -> Dptr {
Dptr(Self::NULL)
pub(crate) const fn null() -> Dp {
Dp(Self::NULL)
}

pub(crate) const fn size() -> u64 {
Self::DISK_REPR_SZ as u64
pub(crate) const fn size() -> usize {
Self::DISK_REPR_SZ
}

/// Read 5 bytes from disk and parses them as little-endian `u64`.
Expand All @@ -40,7 +42,7 @@ impl Dptr {

/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as
/// little-endian `u64`s.
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dptr>>
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dp>>
where
R: Read,
{
Expand All @@ -54,7 +56,7 @@ impl Dptr {
buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk);
u64::from_le_bytes(buf)
})
.map(Dptr)
.map(Dp::from)
.collect())
}

Expand All @@ -68,15 +70,51 @@ impl Dptr {
}
}

impl From<Dptr> for u64 {
fn from(Dptr(raw): Dptr) -> u64 {
impl Add<usize> for Dp {
type Output = Dp;

fn add(self, rhs: usize) -> Dp {
Dp::from(self.0 + rhs as u64)
}
}

impl Add<u64> for Dp {
type Output = Dp;

fn add(self, rhs: u64) -> Dp {
Dp::from(self.0 + rhs)
}
}

impl Add<u32> for Dp {
type Output = Dp;

fn add(self, rhs: u32) -> Dp {
Dp::from(self.0 + rhs as u64)
}
}

impl From<Dp> for u64 {
fn from(Dp(raw): Dp) -> u64 {
raw
}
}

impl From<u64> for Dptr {
fn from(raw: u64) -> Dptr {
impl From<u64> for Dp {
fn from(raw: u64) -> Dp {
assert!(raw <= Self::MAX);
Dptr(raw)
Dp(raw)
}
}

impl From<usize> for Dp {
fn from(raw: usize) -> Dp {
Dp::from(raw as u64)
}
}

impl From<Dp> for usize {
fn from(Dp(raw): Dp) -> usize {
usize::try_from(raw).unwrap()
}
}
26 changes: 26 additions & 0 deletions src/disktree/dtseek.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::disktree::dptr::Dp;

pub(crate) trait DtSeek {
fn pos(&mut self) -> std::io::Result<Dp>;

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp>;

fn fast_forward(&mut self) -> std::io::Result<Dp>;
}

impl<S> DtSeek for S
where
S: std::io::Seek,
{
fn pos(&mut self) -> std::io::Result<Dp> {
self.stream_position().map(Dp::from)
}

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp> {
self.seek(std::io::SeekFrom::Start(dp.into())).map(Dp::from)
}

fn fast_forward(&mut self) -> std::io::Result<Dp> {
self.seek(std::io::SeekFrom::End(0)).map(Dp::from)
}
}
56 changes: 32 additions & 24 deletions src/disktree/iter.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,34 @@
use crate::{
cell::CellStack,
disktree::{dptr::Dptr, tree::HDR_SZ, varint},
error::Result,
disktree::{dptr::Dp, dtseek::DtSeek, tree::HDR_SZ, varint},
error::{Error, Result},
Cell,
};
use byteorder::ReadBytesExt;
use std::io::{Cursor, Seek, SeekFrom};
use std::io::Cursor;

pub(crate) struct Iter<'a> {
cell_stack: CellStack,
curr_node: Option<(u8, Dptr)>,
curr_node: Option<(u8, Dp)>,
disktree_buf: &'a [u8],
disktree_csr: Cursor<&'a [u8]>,
node_stack: Vec<Vec<(u8, Dptr)>>,
recycle_bin: Vec<Vec<(u8, Dptr)>>,
node_stack: Vec<Vec<(u8, Dp)>>,
recycle_bin: Vec<Vec<(u8, Dp)>>,
}

enum Node {
// File position for the fist byte of value data.
Leaf(Dptr),
Leaf(Dp),
// (H3 Cell digit, file position of child's node tag)
Parent(Vec<(u8, Dptr)>),
Parent(Vec<(u8, Dp)>),
}

impl<'a> Iter<'a> {
fn seek_to(&mut self, dptr: Dptr) -> Result<Dptr> {
Ok(Dptr::from(
self.disktree_csr.seek(SeekFrom::Start(u64::from(dptr)))?,
))
}

fn read_base_nodes(rdr: &mut Cursor<&[u8]>) -> Result<Vec<(u8, Dptr)>> {
pub(crate) fn read_base_nodes(rdr: &mut Cursor<&[u8]>) -> Result<Vec<(u8, Dp)>> {
let mut buf = Vec::with_capacity(122);
rdr.seek(SeekFrom::Start(HDR_SZ))?;
rdr.seek(HDR_SZ.into())?;
for digit in 0..122 {
let dptr = Dptr::read(rdr)?;
let dptr = Dp::read(rdr)?;
if !dptr.is_null() {
buf.push((digit, dptr));
}
Expand All @@ -44,15 +38,15 @@ impl<'a> Iter<'a> {
}

// `pos` is a position in the file of this node's tag.
fn read_node(&mut self, dptr: Dptr) -> Result<Node> {
let dptr = self.seek_to(dptr)?;
fn read_node(&mut self, dptr: Dp) -> Result<Node> {
let dptr = self.seek(dptr)?;
let node_tag = self.disktree_csr.read_u8()?;
if 0 == node_tag & 0b1000_0000 {
Ok(Node::Leaf(dptr))
} else {
let mut children = self.node_buf();
let n_children = (node_tag & 0b0111_1111).count_ones() as usize;
let child_dptrs = Dptr::read_n(&mut self.disktree_csr, n_children)?;
let child_dptrs = Dp::read_n(&mut self.disktree_csr, n_children)?;
children.extend(
(0..7)
.rev()
Expand All @@ -67,7 +61,7 @@ impl<'a> Iter<'a> {
/// allocates a new one.
///
/// See [`Iter::recycle_node_buf`].
fn node_buf(&mut self) -> Vec<(u8, Dptr)> {
fn node_buf(&mut self) -> Vec<(u8, Dp)> {
let buf = self
.recycle_bin
.pop()
Expand All @@ -79,7 +73,7 @@ impl<'a> Iter<'a> {
/// Accepts a used, empty, node buffer for later reuse.
///
/// See [`Iter::node_buf`].
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dptr)>) {
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dp)>) {
debug_assert!(buf.is_empty());
self.recycle_bin.push(buf);
}
Expand Down Expand Up @@ -150,9 +144,9 @@ impl<'a> Iterator for Iter<'a> {
}
Ok(Node::Leaf(dptr)) => {
self.curr_node = None;
if let Err(e) = self.seek_to(dptr) {
if let Err(e) = self.seek(dptr) {
self.stop_yielding();
return Some(Err(e));
return Some(Err(Error::from(e)));
}
match varint::read(&mut self.disktree_csr) {
Err(e) => {
Expand All @@ -174,3 +168,17 @@ impl<'a> Iterator for Iter<'a> {
None
}
}

impl<'a> DtSeek for Iter<'a> {
fn pos(&mut self) -> std::io::Result<Dp> {
self.disktree_csr.pos()
}

fn seek(&mut self, dp: Dp) -> std::io::Result<Dp> {
self.disktree_csr.seek(dp)
}

fn fast_forward(&mut self) -> std::io::Result<Dp> {
self.disktree_csr.fast_forward()
}
}
1 change: 1 addition & 0 deletions src/disktree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ compile_warning!("disktree may silently fail on non-64bit systems");
pub use tree::DiskTreeMap;

mod dptr;
mod dtseek;
mod iter;
mod node;
mod tree;
Expand Down
26 changes: 11 additions & 15 deletions src/disktree/node.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,42 @@
use crate::{
disktree::{dptr::Dptr, varint},
disktree::{dptr::Dp, dtseek::DtSeek, varint},
error::Result,
};
use byteorder::ReadBytesExt;
use std::{
io::{Read, Seek},
mem::size_of,
ops::Range,
};
use std::{io::Read, mem::size_of, ops::Range};

// Enough bytes to read node tag and 7 child dptrs.
const NODE_BUF_SZ: usize = size_of::<u8>() + 7 * Dptr::size() as usize;
const NODE_BUF_SZ: usize = size_of::<u8>() + 7 * Dp::size();

pub(crate) enum Node {
// value_begin..value_end
Leaf(Range<usize>),
// (H3 Cell digit, file position of child's node tag)
Parent([Option<Dptr>; 7]),
Parent([Option<Dp>; 7]),
}

impl Node {
pub(crate) fn read<R>(rdr: &mut R) -> Result<Node>
where
R: Seek + Read,
R: Read + DtSeek,
{
let start_pos = rdr.stream_position()?;
let start_pos = rdr.pos()?;
let mut buf = [0u8; NODE_BUF_SZ];
let bytes_read = rdr.read(&mut buf)?;
let buf_rdr = &mut &buf[..bytes_read];
let node_tag = buf_rdr.read_u8()?;
if 0 == node_tag & 0b1000_0000 {
let (val_len, n_read) = varint::read(&mut &buf[..bytes_read])?;
let begin = (start_pos + n_read) as usize;
let end = begin + val_len as usize;
Ok(Node::Leaf(begin..end))
let begin = start_pos + n_read;
let end = begin + val_len;
Ok(Node::Leaf(usize::from(begin)..usize::from(end)))
} else {
let mut children: [Option<Dptr>; 7] = [None, None, None, None, None, None, None];
let mut children: [Option<Dp>; 7] = [None, None, None, None, None, None, None];
for (_digit, child) in (0..7)
.zip(children.iter_mut())
.filter(|(digit, _)| node_tag & (1 << digit) != 0)
{
*child = Some(Dptr::read(buf_rdr)?);
*child = Some(Dp::read(buf_rdr)?);
}
Ok(Node::Parent(children))
}
Expand Down
14 changes: 7 additions & 7 deletions src/disktree/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
digits::Digits,
disktree::{dptr::Dptr, iter::Iter, node::Node},
disktree::{dptr::Dp, iter::Iter, node::Node},
error::Result,
Cell, Error,
};
Expand All @@ -15,10 +15,10 @@ use std::{
};

pub(crate) const HDR_MAGIC: &[u8] = b"hextree\0";
pub(crate) const HDR_SZ: u64 = HDR_MAGIC.len() as u64 + 1;
pub(crate) const HDR_SZ: usize = HDR_MAGIC.len() + 1;

/// An on-disk hextree map.
pub struct DiskTreeMap(Box<dyn AsRef<[u8]> + Send + Sync + 'static>);
pub struct DiskTreeMap(pub(crate) Box<dyn AsRef<[u8]> + Send + Sync + 'static>);

impl DiskTreeMap {
/// Opens a `DiskTree` at the specified path.
Expand Down Expand Up @@ -65,7 +65,7 @@ impl DiskTreeMap {
let base_cell_pos = Self::base_cell_dptr(cell);
let mut csr = Cursor::new((*self.0).as_ref());
csr.seek(SeekFrom::Start(base_cell_pos.into()))?;
let node_dptr = Dptr::read(&mut csr)?;
let node_dptr = Dp::read(&mut csr)?;
if node_dptr.is_null() {
return Ok(None);
}
Expand All @@ -92,7 +92,7 @@ impl DiskTreeMap {
fn _get(
csr: &mut Cursor<&[u8]>,
res: u8,
node_dptr: Dptr,
node_dptr: Dp,
cell: Cell,
mut digits: Digits,
) -> Result<Option<(Cell, Range<usize>)>> {
Expand All @@ -115,7 +115,7 @@ impl DiskTreeMap {
}

/// Returns the DPtr to a base (res0) cell dptr.
fn base_cell_dptr(cell: Cell) -> Dptr {
Dptr::from(HDR_SZ + Dptr::size() * (cell.base() as u64))
fn base_cell_dptr(cell: Cell) -> Dp {
Dp::from(HDR_SZ + Dp::size() * cell.base() as usize)
}
}
Loading

0 comments on commit bbc9fcf

Please sign in to comment.