From b485e51c732d04740be55e25f6a561221486f154 Mon Sep 17 00:00:00 2001 From: lemolatoon <63438515+lemolatoon@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:11:40 -0500 Subject: [PATCH] Impl RoaringBitmap::from_bitmap_bytes --- roaring/src/bitmap/inherent.rs | 74 ++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index 559b7579..f954d0ef 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -1,6 +1,7 @@ use core::cmp::Ordering; use core::ops::RangeBounds; +use crate::bitmap::store::{BitmapStore, Store, BITMAP_LENGTH}; use crate::RoaringBitmap; use super::container::Container; @@ -22,6 +23,59 @@ impl RoaringBitmap { RoaringBitmap { containers: Vec::new() } } + /// Creates a `RoaringBitmap` from a byte slice. + /// + /// # Panics + /// + /// Panics if `offset` is not a multiple of 65536([u16::MAX] + 1). + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let bytes = [0b00000101, 0b00000010, 0b00000000, 0b10000000]; + /// // ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ + /// // 76543210 98 + /// let rb = RoaringBitmap::from_bytes(0, &bytes); + /// assert!(rb.contains(0)); + /// assert!(!rb.contains(1)); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(9)); + /// assert!(rb.contains(31)); + /// ``` + pub fn from_bitmap_bytes(offset: u32, bytes: &[u8]) -> RoaringBitmap { + const BITS_IN_ONE_CONTAINER: usize = 8 * size_of::() * BITMAP_LENGTH; + const BYTE_IN_ONE_CONTAINER: usize = BITS_IN_ONE_CONTAINER / 8; + assert_eq!(offset % BITS_IN_ONE_CONTAINER as u32, 0); + let mut containers = Vec::with_capacity(bytes.len() / BYTE_IN_ONE_CONTAINER); + let bitmap_store_chunks = bytes.chunks(BYTE_IN_ONE_CONTAINER); + + let (offset_key, _) = util::split(offset); + for (i, chunk) in bitmap_store_chunks.enumerate() { + let len: u64 = chunk.iter().map(|&b| b.count_ones() as u64).sum(); + let mut bits: Box<[u64; BITMAP_LENGTH]> = Box::new([0; BITMAP_LENGTH]); + let bits_ptr = bits.as_mut_ptr().cast::(); + + debug_assert!(chunk.len() <= BITMAP_LENGTH * size_of::()); + // Safety: + // * `chunk` is a slice of `bytes` and is guaranteed to be smaller than `BITMAP_LENGTH` u64s + unsafe { + core::ptr::copy_nonoverlapping(chunk.as_ptr(), bits_ptr.cast::(), chunk.len()) + }; + + let store = BitmapStore::from_unchecked(len, bits); + + let mut container = + Container { key: offset_key + i as u16, store: Store::Bitmap(store) }; + container.ensure_correct_store(); + + containers.push(container); + } + + RoaringBitmap { containers } + } + /// Creates a full `RoaringBitmap`. /// /// # Examples @@ -709,6 +763,26 @@ mod tests { } } + #[test] + fn test_from_bitmap_bytes() { + const OFFSET_MULTIPLE: u32 = 65536; + let mut bytes = vec![0xff; OFFSET_MULTIPLE as usize]; + bytes.extend(&[0b00000001, 0b00000010, 0b00000011, 0b00000100]); + + let rb = RoaringBitmap::from_bitmap_bytes(OFFSET_MULTIPLE, &bytes); + for i in 0..(OFFSET_MULTIPLE as u32) { + assert!(!rb.contains(i)); + } + for i in 0..(OFFSET_MULTIPLE as u32) { + let i = i + OFFSET_MULTIPLE as u32; + assert!(rb.contains(i)); + } + for bit in [0, 9, 16, 17, 34] { + let i = OFFSET_MULTIPLE as u32 + bit; + assert!(rb.contains(i)); + } + } + #[test] fn test_insert_remove_range_same_container() { let mut b = RoaringBitmap::new();