From fffc8a16f68bce249008546f6b20e57e2832142f Mon Sep 17 00:00:00 2001 From: PJ Tatlow Date: Fri, 3 Nov 2023 21:48:38 -0600 Subject: [PATCH] handle both old and new meta page formats --- Cargo.toml | 11 ++--- src/bucket.rs | 2 + src/db.rs | 103 ++++++++++++++++++++++------------------ src/meta.rs | 78 +++++++++++++++++++++++++++++- src/page.rs | 68 ++++++++++++++++++++++---- tests/common/record.rs | 7 +-- tests/simple_inserts.rs | 7 ++- 7 files changed, 203 insertions(+), 73 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 404921a..50efd49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "jammdb" description = "An embedded single-file database for Rust" -version = "0.10.0" +version = "0.11.0" authors = ["PJ Tatlow "] edition = "2021" license = "MIT OR Apache-2.0" @@ -10,13 +10,7 @@ readme = "README.md" keywords = ["db", "database", "embedded-database", "memory-map"] categories = ["database", "database-implementations"] -exclude = [ - ".*.yml", - ".github/*", - "ci/*", - "tests/*", - "makefile", -] +exclude = [".*.yml", ".github/*", "ci/*", "tests/*", "makefile"] [dependencies] libc = "0.2.149" @@ -26,6 +20,7 @@ fs4 = "0.7.0" bytes = "1.5.0" bumpalo = "3.14.0" fnv = "1.0.7" +sha3 = "0.10.8" [dev-dependencies] bytes = { version = "1", features = ["serde"] } diff --git a/src/bucket.rs b/src/bucket.rs index 41ada77..203d4a4 100644 --- a/src/bucket.rs +++ b/src/bucket.rs @@ -1029,6 +1029,7 @@ mod tests { let tx = db.tx(true).unwrap(); let b = tx.create_bucket("abc").unwrap(); tx.delete_bucket("abc").unwrap(); + #[allow(clippy::redundant_closure_call)] $value(&b); } )* @@ -1089,6 +1090,7 @@ mod tests { } let tx = db.tx($rw)?; let b = tx.get_bucket("abc")?; + #[allow(clippy::redundant_closure_call)] $value(&b); Ok(()) } diff --git a/src/db.rs b/src/db.rs index d3294c2..35f3097 100644 --- a/src/db.rs +++ b/src/db.rs @@ -283,56 +283,65 @@ impl DBInner { pub(crate) fn meta(&self) -> Result { let data = self.data.lock()?; - let meta1 = Page::from_buf(&data, 0, self.pagesize).meta(); - - // Double check that we have the right pagesize before we read the second page. - if meta1.valid() && meta1.pagesize != self.pagesize { - assert_eq!( - meta1.pagesize, self.pagesize, - "Invalid pagesize from meta1 {}. Expected {}.", - meta1.pagesize, self.pagesize - ); - } - let meta2 = Page::from_buf(&data, 1, self.pagesize).meta(); - let meta = match (meta1.valid(), meta2.valid()) { - (true, true) => { - assert_eq!( - meta1.pagesize, self.pagesize, - "Invalid pagesize from meta1 {}. Expected {}.", - meta1.pagesize, self.pagesize - ); - assert_eq!( - meta2.pagesize, self.pagesize, - "Invalid pagesize from meta2 {}. Expected {}.", - meta2.pagesize, self.pagesize - ); - if meta1.tx_id > meta2.tx_id { - meta1 - } else { - meta2 + macro_rules! check_meta { + ($func:ident) => {{ + let meta1 = Page::from_buf(&data, 0, self.pagesize).$func(); + // Double check that we have the right pagesize before we read the second page. + if meta1.valid() && meta1.pagesize != self.pagesize { + assert_eq!( + meta1.pagesize, self.pagesize, + "Invalid pagesize from meta1 {}. Expected {}.", + meta1.pagesize, self.pagesize + ); } - } - (true, false) => { - assert_eq!( - meta1.pagesize, self.pagesize, - "Invalid pagesize from meta1 {}. Expected {}.", - meta1.pagesize, self.pagesize - ); - meta1 - } - (false, true) => { - assert_eq!( - meta2.pagesize, self.pagesize, - "Invalid pagesize from meta2 {}. Expected {}.", - meta2.pagesize, self.pagesize - ); - meta2 - } - (false, false) => panic!("NO VALID META PAGES"), - }; + let meta2 = Page::from_buf(&data, 1, self.pagesize).$func(); + match (meta1.valid(), meta2.valid()) { + (true, true) => { + assert_eq!( + meta1.pagesize, self.pagesize, + "Invalid pagesize from meta1 {}. Expected {}.", + meta1.pagesize, self.pagesize + ); + assert_eq!( + meta2.pagesize, self.pagesize, + "Invalid pagesize from meta2 {}. Expected {}.", + meta2.pagesize, self.pagesize + ); + if meta1.tx_id > meta2.tx_id { + Some(meta1) + } else { + Some(meta2) + } + } + (true, false) => { + assert_eq!( + meta1.pagesize, self.pagesize, + "Invalid pagesize from meta1 {}. Expected {}.", + meta1.pagesize, self.pagesize + ); + Some(meta1) + } + (false, true) => { + assert_eq!( + meta2.pagesize, self.pagesize, + "Invalid pagesize from meta2 {}. Expected {}.", + meta2.pagesize, self.pagesize + ); + Some(meta2) + } + (false, false) => None, + } + }}; + } - Ok(meta.clone()) + if let Some(meta) = check_meta!(meta) { + Ok(meta.clone()) + } else if let Some(old_meta) = check_meta!(old_meta) { + Ok(old_meta.into()) + } else { + panic!("NO VALID META PAGES"); + } } } diff --git a/src/meta.rs b/src/meta.rs index 131db36..2ed6681 100644 --- a/src/meta.rs +++ b/src/meta.rs @@ -1,5 +1,7 @@ -use fnv::FnvHasher; use std::hash::Hasher; + +use fnv::FnvHasher; + use crate::{bucket::BucketMeta, page::PageID}; #[repr(C)] @@ -38,7 +40,6 @@ impl Meta { } } - #[cfg(test)] mod tests { use super::*; @@ -71,3 +72,76 @@ mod tests { assert_eq!(meta.hash, meta.hash_self()); } } + +// OldMeta is the metadata format for versions <= 0.10 +// For now we check all databases for either metadata version, +// but always write the new format. +use std::io::Write; + +use bytes::BufMut; +use sha3::{Digest, Sha3_256}; + +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct OldMeta { + pub(crate) meta_page: u32, + pub(crate) magic: u32, + pub(crate) version: u32, + pub(crate) pagesize: u64, + pub(crate) root: BucketMeta, + pub(crate) num_pages: PageID, + pub(crate) freelist_page: PageID, + pub(crate) tx_id: u64, + pub(crate) hash: [u8; 32], +} + +impl OldMeta { + pub(crate) fn valid(&self) -> bool { + self.hash == self.hash_self() + } + + pub(crate) fn hash_self(&self) -> [u8; 32] { + let mut hash_result: [u8; 32] = [0; 32]; + let mut hasher = Sha3_256::new(); + hasher.update(self.bytes()); + let hash = hasher.finalize(); + assert_eq!(hash.len(), 32); + hash_result.copy_from_slice(&hash[..]); + hash_result + } + + fn bytes(&self) -> bytes::Bytes { + let buf = bytes::BytesMut::new(); + let mut w = buf.writer(); + let _ = w.write(&self.meta_page.to_be_bytes()); + let _ = w.write(&self.magic.to_be_bytes()); + let _ = w.write(&self.version.to_be_bytes()); + let _ = w.write(&self.pagesize.to_be_bytes()); + let _ = w.write(&self.root.root_page.to_be_bytes()); + let _ = w.write(&self.root.next_int.to_be_bytes()); + let _ = w.write(&self.num_pages.to_be_bytes()); + let _ = w.write(&self.freelist_page.to_be_bytes()); + let _ = w.write(&self.tx_id.to_be_bytes()); + + w.into_inner().freeze() + } +} + +impl From<&OldMeta> for Meta { + fn from(val: &OldMeta) -> Self { + let mut m = Meta { + meta_page: val.meta_page, + magic: val.magic, + version: val.version, + pagesize: val.pagesize, + root: val.root, + num_pages: val.num_pages, + freelist_page: val.freelist_page, + tx_id: val.tx_id, + hash: 0, + }; + + m.hash = m.hash_self(); + m + } +} diff --git a/src/page.rs b/src/page.rs index 42cd53a..6489bf6 100644 --- a/src/page.rs +++ b/src/page.rs @@ -9,7 +9,7 @@ use memmap2::Mmap; use crate::{ errors::Result, - meta::Meta, + meta::{Meta, OldMeta}, node::{Node, NodeData, NodeType}, }; @@ -66,17 +66,42 @@ impl Page { } pub(crate) fn meta(&self) -> &Meta { - assert_eq!(self.page_type, Page::TYPE_META); + assert_eq!( + self.page_type, + Page::TYPE_META, + "Did not find meta page, found {}", + self.page_type + ); unsafe { &*(&self.ptr as *const u64 as *const Meta) } } + pub(crate) fn old_meta(&self) -> &OldMeta { + assert_eq!( + self.page_type, + Page::TYPE_META, + "Did not find meta page, found {}", + self.page_type + ); + unsafe { &*(&self.ptr as *const u64 as *const OldMeta) } + } + pub(crate) fn meta_mut(&mut self) -> &mut Meta { - assert_eq!(self.page_type, Page::TYPE_META); + assert_eq!( + self.page_type, + Page::TYPE_META, + "Did not find meta page, found {}", + self.page_type + ); unsafe { &mut *(&mut self.ptr as *mut u64 as *mut Meta) } } pub(crate) fn freelist(&self) -> &[PageID] { - assert_eq!(self.page_type, Page::TYPE_FREELIST); + assert_eq!( + self.page_type, + Page::TYPE_FREELIST, + "Did not find freelist page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *const PageID; from_raw_parts(start, self.count as usize) @@ -84,7 +109,12 @@ impl Page { } pub(crate) fn freelist_mut(&mut self) -> &mut [PageID] { - assert_eq!(self.page_type, Page::TYPE_FREELIST); + assert_eq!( + self.page_type, + Page::TYPE_FREELIST, + "Did not find freelist page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *mut PageID; from_raw_parts_mut(start, self.count as usize) @@ -92,7 +122,12 @@ impl Page { } pub(crate) fn leaf_elements(&self) -> &[LeafElement] { - assert_eq!(self.page_type, Page::TYPE_LEAF); + assert_eq!( + self.page_type, + Page::TYPE_LEAF, + "Did not find leaf page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *const LeafElement; from_raw_parts(start, self.count as usize) @@ -100,7 +135,12 @@ impl Page { } pub(crate) fn branch_elements(&self) -> &[BranchElement] { - assert_eq!(self.page_type, Page::TYPE_BRANCH); + assert_eq!( + self.page_type, + Page::TYPE_BRANCH, + "Did not find branch page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *const BranchElement; from_raw_parts(start, self.count as usize) @@ -108,7 +148,12 @@ impl Page { } pub(crate) fn leaf_elements_mut(&mut self) -> &mut [LeafElement] { - assert_eq!(self.page_type, Page::TYPE_LEAF); + assert_eq!( + self.page_type, + Page::TYPE_LEAF, + "Did not find leaf page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *const LeafElement as *mut LeafElement; from_raw_parts_mut(start, self.count as usize) @@ -116,7 +161,12 @@ impl Page { } pub(crate) fn branch_elements_mut(&mut self) -> &mut [BranchElement] { - assert_eq!(self.page_type, Page::TYPE_BRANCH); + assert_eq!( + self.page_type, + Page::TYPE_BRANCH, + "Did not find branch page, found {}", + self.page_type + ); unsafe { let start = &self.ptr as *const u64 as *const BranchElement as *mut BranchElement; from_raw_parts_mut(start, self.count as usize) diff --git a/tests/common/record.rs b/tests/common/record.rs index 2fe4aad..9d6bf40 100644 --- a/tests/common/record.rs +++ b/tests/common/record.rs @@ -416,12 +416,7 @@ pub fn log_playback(name: &str) -> Result<(), Error> { Ok(()) } -fn mutate_buckets<'tx, F>( - tx: &Tx<'tx>, - root: &mut FakeNode, - path: &Vec, - f: F, -) -> Result<(), Error> +fn mutate_buckets(tx: &Tx, root: &mut FakeNode, path: &Vec, f: F) -> Result<(), Error> where F: Fn(&Bucket, &mut BTreeMap) -> Result<(), Error>, { diff --git a/tests/simple_inserts.rs b/tests/simple_inserts.rs index 5fa7815..f92b943 100644 --- a/tests/simple_inserts.rs +++ b/tests/simple_inserts.rs @@ -5,7 +5,12 @@ mod common; #[test] fn super_simple() -> Result<(), Error> { - test_insert((0..=1).collect())?; + // test_insert((0..=1).collect())?; + + OpenOptions::new() + .pagesize(2048) + .open("/Users/pjtatlow/projects/pjtatlow/jammdb/demo.db")?; + Ok(()) }