-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from JayKickliter/jsk/add-disk-repr
Add on DiskTree: an on-disk hextree
- Loading branch information
Showing
15 changed files
with
1,003 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
use crate::Result; | ||
use std::{ | ||
io::{Read, Write}, | ||
mem::size_of, | ||
}; | ||
|
||
/// A 'disk' pointer. | ||
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] | ||
#[repr(transparent)] | ||
pub(crate) struct Dptr(u64); | ||
|
||
impl Dptr { | ||
#[allow(clippy::cast_possible_truncation)] | ||
const MAX: u64 = 2_u64.pow(Self::DISK_REPR_SZ as u32 * 8) - 1; | ||
const DISK_REPR_SZ: usize = 5; | ||
const NULL: u64 = 0; | ||
|
||
pub(crate) const fn is_null(self) -> bool { | ||
self.0 == Self::NULL | ||
} | ||
|
||
pub(crate) const fn null() -> Dptr { | ||
Dptr(Self::NULL) | ||
} | ||
|
||
pub(crate) const fn size() -> u64 { | ||
Self::DISK_REPR_SZ as u64 | ||
} | ||
|
||
/// Read 5 bytes from disk and parses them as little-endian `u64`. | ||
pub(crate) fn read<R>(src: &mut R) -> Result<Self> | ||
where | ||
R: Read, | ||
{ | ||
let mut buf = [0u8; size_of::<u64>()]; | ||
src.read_exact(&mut buf[..Self::DISK_REPR_SZ])?; | ||
let dptr = u64::from_le_bytes(buf); | ||
Ok(dptr.into()) | ||
} | ||
|
||
/// Read 5 * `n` bytes from disk, for up to n=7, and parses them as | ||
/// little-endian `u64`s. | ||
pub(crate) fn read_n<R>(src: &mut R, n: usize) -> Result<Vec<Dptr>> | ||
where | ||
R: Read, | ||
{ | ||
debug_assert!(n <= 7); | ||
let mut buf = [0; Self::DISK_REPR_SZ * 7]; | ||
src.read_exact(&mut buf[..(Self::DISK_REPR_SZ * n)])?; | ||
Ok(buf[..(Self::DISK_REPR_SZ * n)] | ||
.chunks(Self::DISK_REPR_SZ) | ||
.map(|chunk| { | ||
let mut buf = [0u8; size_of::<u64>()]; | ||
buf[..Self::DISK_REPR_SZ].copy_from_slice(chunk); | ||
u64::from_le_bytes(buf) | ||
}) | ||
.map(Dptr) | ||
.collect()) | ||
} | ||
|
||
/// Writes the 5 lower bytes of a `u64` to disk. | ||
pub(crate) fn write<W>(self, dst: &mut W) -> Result | ||
where | ||
W: Write, | ||
{ | ||
let buf = self.0.to_le_bytes(); | ||
Ok(dst.write_all(&buf[..Self::DISK_REPR_SZ])?) | ||
} | ||
} | ||
|
||
impl From<Dptr> for u64 { | ||
fn from(Dptr(raw): Dptr) -> u64 { | ||
raw | ||
} | ||
} | ||
|
||
impl From<u64> for Dptr { | ||
fn from(raw: u64) -> Dptr { | ||
assert!(raw <= Self::MAX); | ||
Dptr(raw) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
use crate::{ | ||
cell::CellStack, | ||
disktree::{dptr::Dptr, tree::HDR_SZ, varint}, | ||
error::Result, | ||
Cell, | ||
}; | ||
use byteorder::ReadBytesExt; | ||
use std::io::{Cursor, Seek, SeekFrom}; | ||
|
||
pub(crate) struct Iter<'a> { | ||
cell_stack: CellStack, | ||
curr_node: Option<(u8, Dptr)>, | ||
disktree_buf: &'a [u8], | ||
disktree_csr: Cursor<&'a [u8]>, | ||
node_stack: Vec<Vec<(u8, Dptr)>>, | ||
recycle_bin: Vec<Vec<(u8, Dptr)>>, | ||
} | ||
|
||
enum Node { | ||
// File position for the fist byte of value data. | ||
Leaf(Dptr), | ||
// (H3 Cell digit, file position of child's node tag) | ||
Parent(Vec<(u8, Dptr)>), | ||
} | ||
|
||
impl<'a> Iter<'a> { | ||
fn seek_to(&mut self, dptr: Dptr) -> Result<Dptr> { | ||
Ok(Dptr::from( | ||
self.disktree_csr.seek(SeekFrom::Start(u64::from(dptr)))?, | ||
)) | ||
} | ||
|
||
fn read_base_nodes(rdr: &mut Cursor<&[u8]>) -> Result<Vec<(u8, Dptr)>> { | ||
let mut buf = Vec::with_capacity(122); | ||
rdr.seek(SeekFrom::Start(HDR_SZ))?; | ||
for digit in 0..122 { | ||
let dptr = Dptr::read(rdr)?; | ||
if !dptr.is_null() { | ||
buf.push((digit, dptr)); | ||
} | ||
} | ||
buf.reverse(); | ||
Ok(buf) | ||
} | ||
|
||
// `pos` is a position in the file of this node's tag. | ||
fn read_node(&mut self, dptr: Dptr) -> Result<Node> { | ||
let dptr = self.seek_to(dptr)?; | ||
let node_tag = self.disktree_csr.read_u8()?; | ||
if 0 == node_tag & 0b1000_0000 { | ||
Ok(Node::Leaf(dptr)) | ||
} else { | ||
let mut children = self.node_buf(); | ||
let n_children = (node_tag & 0b0111_1111).count_ones() as usize; | ||
let child_dptrs = Dptr::read_n(&mut self.disktree_csr, n_children)?; | ||
children.extend( | ||
(0..7) | ||
.rev() | ||
.filter(|digit| node_tag & (1 << digit) != 0) | ||
.zip(child_dptrs.into_iter().rev()), | ||
); | ||
Ok(Node::Parent(children)) | ||
} | ||
} | ||
|
||
/// Returns a recycled node buffer if available, otherwise | ||
/// allocates a new one. | ||
/// | ||
/// See [`Iter::recycle_node_buf`]. | ||
fn node_buf(&mut self) -> Vec<(u8, Dptr)> { | ||
let buf = self | ||
.recycle_bin | ||
.pop() | ||
.unwrap_or_else(|| Vec::with_capacity(7)); | ||
debug_assert!(buf.is_empty()); | ||
buf | ||
} | ||
|
||
/// Accepts a used, empty, node buffer for later reuse. | ||
/// | ||
/// See [`Iter::node_buf`]. | ||
fn recycle_node_buf(&mut self, buf: Vec<(u8, Dptr)>) { | ||
debug_assert!(buf.is_empty()); | ||
self.recycle_bin.push(buf); | ||
} | ||
|
||
// We've encountered an IO error. We're still going to return | ||
// `Some` with the contents of the user's deserializer, but let's | ||
// make sure we never yield another value by clearing stack. | ||
fn stop_yielding(&mut self) { | ||
self.node_stack.clear(); | ||
self.curr_node = None; | ||
} | ||
|
||
pub(crate) fn new(disktree_buf: &'a [u8]) -> Result<Iter<'a>> { | ||
let mut disktree_csr = Cursor::new(disktree_buf); | ||
let mut cell_stack = CellStack::new(); | ||
let mut node_stack = Vec::new(); | ||
let recycle_bin = Vec::new(); | ||
let mut base_nodes = Self::read_base_nodes(&mut disktree_csr)?; | ||
let curr_node = base_nodes.pop(); | ||
node_stack.push(base_nodes); | ||
if let Some((digit, _)) = curr_node { | ||
cell_stack.push(digit); | ||
} | ||
Ok(Self { | ||
cell_stack, | ||
curr_node, | ||
disktree_buf, | ||
disktree_csr, | ||
recycle_bin, | ||
node_stack, | ||
}) | ||
} | ||
} | ||
|
||
impl<'a> Iterator for Iter<'a> { | ||
type Item = Result<(Cell, &'a [u8])>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
while self.curr_node.is_none() { | ||
if let Some(mut dptrs) = self.node_stack.pop() { | ||
self.cell_stack.pop(); | ||
if let Some((digit, dptr)) = dptrs.pop() { | ||
self.cell_stack.push(digit); | ||
self.curr_node = Some((digit, dptr)); | ||
self.node_stack.push(dptrs); | ||
} else { | ||
self.recycle_node_buf(dptrs); | ||
} | ||
} else { | ||
break; | ||
} | ||
} | ||
while let Some((digit, dptr)) = self.curr_node { | ||
self.cell_stack.swap(digit); | ||
match self.read_node(dptr) { | ||
Err(e) => { | ||
self.stop_yielding(); | ||
return Some(Err(e)); | ||
} | ||
Ok(Node::Parent(mut children)) => { | ||
if let Some((digit, dptr)) = children.pop() { | ||
self.cell_stack.push(digit); | ||
self.curr_node = Some((digit, dptr)); | ||
self.node_stack.push(children); | ||
} else { | ||
self.recycle_node_buf(children); | ||
} | ||
} | ||
Ok(Node::Leaf(dptr)) => { | ||
self.curr_node = None; | ||
if let Err(e) = self.seek_to(dptr) { | ||
self.stop_yielding(); | ||
return Some(Err(e)); | ||
} | ||
match varint::read(&mut self.disktree_csr) { | ||
Err(e) => { | ||
self.stop_yielding(); | ||
return Some(Err(e)); | ||
} | ||
Ok((val_len, _n_read)) => { | ||
let pos = self.disktree_csr.position() as usize; | ||
let val_buf = &self.disktree_buf[pos..][..val_len as usize]; | ||
return Some(Ok(( | ||
*self.cell_stack.cell().expect("corrupted cell-stack"), | ||
val_buf, | ||
))); | ||
} | ||
} | ||
} | ||
}; | ||
} | ||
None | ||
} | ||
} |
Oops, something went wrong.