diff --git a/api/src/config.rs b/api/src/config.rs index ea2aef8c1b3..3448e51b2d4 100644 --- a/api/src/config.rs +++ b/api/src/config.rs @@ -30,7 +30,7 @@ pub struct ConfigV2 { /// Configuration information for RAFS filesystem. pub rafs: Option, /// Configuration information for image deduplication. - pub dedup: Option, + pub deduplication: Option, /// Internal runtime configuration. #[serde(skip)] pub internal: ConfigV2Internal, @@ -44,7 +44,7 @@ impl Default for ConfigV2 { backend: None, cache: None, rafs: None, - dedup: None, + deduplication: None, internal: ConfigV2Internal::default(), } } @@ -59,7 +59,7 @@ impl ConfigV2 { backend: None, cache: None, rafs: None, - dedup: None, + deduplication: None, internal: ConfigV2Internal::default(), } } @@ -131,8 +131,8 @@ impl ConfigV2 { } /// Get configuration information for image deduplication. - pub fn get_dedup_config(&self) -> Result<&DeduplicationConfigV2> { - self.dedup.as_ref().ok_or_else(|| { + pub fn get_deduplication_config(&self) -> Result<&DeduplicationConfigV2> { + self.deduplication.as_ref().ok_or_else(|| { Error::new( ErrorKind::InvalidInput, "no configuration information for deduplication", @@ -978,7 +978,7 @@ pub struct BlobCacheEntryConfigV2 { pub cache: CacheConfigV2, /// Configuration information for chunk deduplication. #[serde(default)] - pub dedup: Option, + pub deduplication: Option, /// Optional file path for metadata blob. #[serde(default)] pub metadata_path: Option, @@ -1041,7 +1041,7 @@ impl From<&BlobCacheEntryConfigV2> for ConfigV2 { backend: Some(c.backend.clone()), cache: Some(c.cache.clone()), rafs: None, - dedup: c.dedup.clone(), + deduplication: c.deduplication.clone(), internal: ConfigV2Internal::default(), } } @@ -1425,7 +1425,7 @@ struct FactoryConfig { pub cache: CacheConfig, /// Configuration information for image deduplication. #[serde(default)] - pub dedup: Option, + pub deduplication: Option, } /// Rafs storage backend configuration information. @@ -1464,7 +1464,7 @@ impl TryFrom for ConfigV2 { fn try_from(v: RafsConfig) -> std::result::Result { let backend: BackendConfigV2 = (&v.device.backend).try_into()?; let mut cache: CacheConfigV2 = (&v.device.cache).try_into()?; - let dedup: Option = match &v.device.dedup { + let deduplication: Option = match &v.device.deduplication { Some(dedup) => { let dedup_v2: DeduplicationConfigV2 = dedup.try_into()?; Some(dedup_v2) @@ -1492,7 +1492,7 @@ impl TryFrom for ConfigV2 { backend: Some(backend), cache: Some(cache), rafs: Some(rafs), - dedup, + deduplication, internal: ConfigV2Internal::default(), }) } @@ -1611,7 +1611,7 @@ impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 { cache_validate: false, prefetch_config: v.prefetch_config.clone(), }; - let dedup_config = match &v.dedup_config { + let deduplication_config = match &v.dedup_config { Some(cfg) => { let cfg_v2: DeduplicationConfigV2 = cfg.try_into()?; Some(cfg_v2) @@ -1623,7 +1623,7 @@ impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 { id: v.id.clone(), backend: (&backend_config).try_into()?, cache: (&cache_config).try_into()?, - dedup: dedup_config, + deduplication: deduplication_config, metadata_path: v.metadata_path.clone(), }) } @@ -2346,9 +2346,9 @@ mod tests { "#; let config = ConfigV2::from_str(content).unwrap(); assert_eq!(&config.id, ""); - assert!(!config.dedup.as_ref().unwrap().enable); + assert!(!config.deduplication.as_ref().unwrap().enable); assert_eq!( - &config.dedup.unwrap().work_dir, + &config.deduplication.unwrap().work_dir, "/home/t4/containerd/io.containerd.snapshotter.v1.nydus" ); } diff --git a/builder/src/core/bootstrap_dedup.rs b/builder/src/core/bootstrap_dedup.rs index ab619f2499a..aa2c990e213 100644 --- a/builder/src/core/bootstrap_dedup.rs +++ b/builder/src/core/bootstrap_dedup.rs @@ -51,7 +51,7 @@ impl BootstrapDedup { .open(&bootstrap_path)?, ) as RafsIoReader; - let dedup_config = cfg.get_dedup_config()?; + let dedup_config = cfg.get_deduplication_config()?; let db_dir = dedup_config.get_work_dir()?; let cas_mgr = CasMgr::new(db_dir)?; diff --git a/rafs/src/fs.rs b/rafs/src/fs.rs index e958c5f8f55..a20f4bb304c 100644 --- a/rafs/src/fs.rs +++ b/rafs/src/fs.rs @@ -18,12 +18,14 @@ use std::any::Any; use std::cmp; use std::ffi::{CStr, OsStr, OsString}; use std::io::Result; +use std::mem::size_of; use std::ops::Deref; use std::os::unix::ffi::OsStrExt; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, SystemTime}; +use bitvec::prelude::*; use fuse_backend_rs::abi::fuse_abi::Attr; use fuse_backend_rs::abi::fuse_abi::{stat64, statvfs64}; use fuse_backend_rs::api::filesystem::*; @@ -37,7 +39,9 @@ use nydus_utils::{ div_round_up, metrics::{self, FopRecorder, StatsFop::*}, }; +use storage::device::BlobInfo; +use crate::metadata::layout::v5::RafsV5ChunkInfo; use crate::metadata::{ Inode, RafsInode, RafsInodeWalkAction, RafsSuper, RafsSuperMeta, DOT, DOTDOT, }; @@ -81,12 +85,14 @@ impl Rafs { pub fn new(cfg: &Arc, id: &str, path: &Path) -> RafsResult<(Self, RafsIoReader)> { // Assume all meta/data blobs are accessible, otherwise it will always cause IO errors. cfg.internal.set_blob_accessible(true); - let cache_cfg = cfg.get_cache_config().map_err(RafsError::LoadConfig)?; let rafs_cfg = cfg.get_rafs_config().map_err(RafsError::LoadConfig)?; let (sb, reader) = RafsSuper::load_from_file(path, cfg.clone(), false) .map_err(RafsError::FillSuperblock)?; - let blob_infos = sb.superblock.get_blob_infos(); + let mut blob_infos = sb.superblock.get_blob_infos(); + + // if enable chunk dedup, modify blob's dedup_bitmap. + blob_infos = Self::generate_dedup_bitmap_by_chunk_info(cfg, &blob_infos, &sb)?; let device = BlobDevice::new(cfg, &blob_infos).map_err(RafsError::CreateDevice)?; if cfg.is_chunk_validation_enabled() && sb.meta.has_inlined_chunk_digest() { @@ -324,6 +330,42 @@ impl Rafs { entry } + + fn generate_dedup_bitmap_by_chunk_info( + cfg: &Arc, + blob_infos: &Vec>, + sb: &RafsSuper, + ) -> RafsResult>> { + if let Some(deduplication_config) = &cfg.deduplication { + if deduplication_config.enable { + let mut dedup_blobs = vec![]; + for blob in blob_infos.iter() { + let mut blob = blob.deref().clone(); + let chunk_count = blob.chunk_count() as usize; + let mut dedup_bitmap = bitvec!(0; chunk_count); + let size = sb.meta.chunk_table_size as usize; + let unit_size = size_of::(); + if size % unit_size != 0 { + return Err(RafsError::InvalidImageData); + } + + for idx in 0..(size / unit_size) { + let chunk = sb.superblock.get_chunk_info(idx as usize).unwrap(); + let chunk_idx = chunk.id() as usize; + if chunk_idx < chunk_count { + dedup_bitmap.set(chunk_idx, chunk.is_deduped()); + } + } + blob.set_dedup_bitmap(Some(dedup_bitmap)); + dedup_blobs.push(Arc::new(blob)); + } + + return Ok(dedup_blobs); + } + } + + Ok(blob_infos.to_owned()) + } } impl Rafs { diff --git a/storage/src/cache/cachedfile.rs b/storage/src/cache/cachedfile.rs index 0fc96c9cdc2..e2de177c078 100644 --- a/storage/src/cache/cachedfile.rs +++ b/storage/src/cache/cachedfile.rs @@ -376,11 +376,15 @@ impl FileCacheEntry { extended_chunks[start..=end].to_vec() } } else { - while !extended_chunks.is_empty() { - let chunk = &extended_chunks[extended_chunks.len() - 1]; - if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true)) { - extended_chunks.pop(); - } else { + // check from start to end for dedup + for idx in 0..extended_chunks.len() { + let chunk = &extended_chunks[idx]; + if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true)) + || self + .blob_info + .get_dedup_by_chunk_idx(chunk.as_ref().id() as usize) + { + extended_chunks.truncate(idx); break; } } @@ -591,6 +595,10 @@ impl BlobCache for FileCacheEntry { continue; } + if self.blob_info.get_dedup_by_chunk_idx(c.id() as usize) { + continue; + } + // For digested chunk map, we must check whether the cached data is valid because // the digested chunk map cannot persist readiness state. let d_size = c.uncompressed_size() as usize; @@ -606,9 +614,12 @@ impl BlobCache for FileCacheEntry { if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) { // The chunk is ready, so skip it. continue; - } else { - pending.push(c.clone()); } + if self.blob_info.get_dedup_by_chunk_idx(c.id() as usize) { + continue; + } + + pending.push(c.clone()); } } diff --git a/storage/src/cache/dummycache.rs b/storage/src/cache/dummycache.rs index 2e554b6cbc0..156a6bded88 100644 --- a/storage/src/cache/dummycache.rs +++ b/storage/src/cache/dummycache.rs @@ -136,6 +136,12 @@ impl BlobCache for DummyCache { if !bios[0].user_io { return Ok(0); } + if self + .blob_info + .get_dedup_by_chunk_idx(bios[0].chunkinfo.id() as usize) + { + return Ok(0); + } let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), d_size) }; self.read_chunk_from_backend(&bios[0].chunkinfo, buf)?; return Ok(buf.len()); diff --git a/storage/src/device.rs b/storage/src/device.rs index 122ce1dd7ee..27ed9cab271 100644 --- a/storage/src/device.rs +++ b/storage/src/device.rs @@ -1181,8 +1181,8 @@ impl BlobDevice { /// Create new blob device instance. pub fn new(config: &Arc, blob_infos: &[Arc]) -> io::Result { let mut blobs = Vec::with_capacity(blob_infos.len()); - let dedup_config = config.get_dedup_config().ok(); - let cas_mgr = dedup_config + let deduplication_config = config.get_deduplication_config().ok(); + let cas_mgr = deduplication_config .as_ref() .filter(|config| config.get_enable() && config.get_work_dir().is_ok()) .and_then(|config| CasMgr::new(config.get_work_dir().unwrap()).ok());