Skip to content

Commit

Permalink
nydusd: solving the mismatch between nydus chunk amplification and dy…
Browse files Browse the repository at this point in the history
…namic dedup.

The mismatch between dynamic dedup and nydus' chunk amplification can
result in a larger cache size after dedup than without dedup. Because
chunk amplification can cause reused chunks to be pulled multiple
times, resulting in a larger cache size after dedup is enabled than when
dedup is not enabled.

To address this issue, a dedup_bitmap was introduced. When initializing
rafs, dedup_bitmap is generated based on the chunk information in blob.
The determination of whether a chunk in a blob is ready requires both
the chunk map and deduplication bitmap to make a joint decision.

Signed-off-by: xwb1136021767 <[email protected]>
  • Loading branch information
xwb1136021767 committed Aug 15, 2023
1 parent dff2451 commit 10ee6ca
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 26 deletions.
28 changes: 14 additions & 14 deletions api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub struct ConfigV2 {
/// Configuration information for RAFS filesystem.
pub rafs: Option<RafsConfigV2>,
/// Configuration information for image deduplication.
pub dedup: Option<DeduplicationConfigV2>,
pub deduplication: Option<DeduplicationConfigV2>,
/// Internal runtime configuration.
#[serde(skip)]
pub internal: ConfigV2Internal,
Expand All @@ -44,7 +44,7 @@ impl Default for ConfigV2 {
backend: None,
cache: None,
rafs: None,
dedup: None,
deduplication: None,
internal: ConfigV2Internal::default(),
}
}
Expand All @@ -59,7 +59,7 @@ impl ConfigV2 {
backend: None,
cache: None,
rafs: None,
dedup: None,
deduplication: None,
internal: ConfigV2Internal::default(),
}
}
Expand Down Expand Up @@ -131,8 +131,8 @@ impl ConfigV2 {
}

/// Get configuration information for image deduplication.
pub fn get_dedup_config(&self) -> Result<&DeduplicationConfigV2> {
self.dedup.as_ref().ok_or_else(|| {
pub fn get_deduplication_config(&self) -> Result<&DeduplicationConfigV2> {
self.deduplication.as_ref().ok_or_else(|| {
Error::new(
ErrorKind::InvalidInput,
"no configuration information for deduplication",
Expand Down Expand Up @@ -978,7 +978,7 @@ pub struct BlobCacheEntryConfigV2 {
pub cache: CacheConfigV2,
/// Configuration information for chunk deduplication.
#[serde(default)]
pub dedup: Option<DeduplicationConfigV2>,
pub deduplication: Option<DeduplicationConfigV2>,
/// Optional file path for metadata blob.
#[serde(default)]
pub metadata_path: Option<String>,
Expand Down Expand Up @@ -1041,7 +1041,7 @@ impl From<&BlobCacheEntryConfigV2> for ConfigV2 {
backend: Some(c.backend.clone()),
cache: Some(c.cache.clone()),
rafs: None,
dedup: c.dedup.clone(),
deduplication: c.deduplication.clone(),
internal: ConfigV2Internal::default(),
}
}
Expand Down Expand Up @@ -1425,7 +1425,7 @@ struct FactoryConfig {
pub cache: CacheConfig,
/// Configuration information for image deduplication.
#[serde(default)]
pub dedup: Option<DeduplicationConfig>,
pub deduplication: Option<DeduplicationConfig>,
}

/// Rafs storage backend configuration information.
Expand Down Expand Up @@ -1464,7 +1464,7 @@ impl TryFrom<RafsConfig> for ConfigV2 {
fn try_from(v: RafsConfig) -> std::result::Result<Self, Self::Error> {
let backend: BackendConfigV2 = (&v.device.backend).try_into()?;
let mut cache: CacheConfigV2 = (&v.device.cache).try_into()?;
let dedup: Option<DeduplicationConfigV2> = match &v.device.dedup {
let deduplication: Option<DeduplicationConfigV2> = match &v.device.deduplication {
Some(dedup) => {
let dedup_v2: DeduplicationConfigV2 = dedup.try_into()?;
Some(dedup_v2)
Expand Down Expand Up @@ -1492,7 +1492,7 @@ impl TryFrom<RafsConfig> for ConfigV2 {
backend: Some(backend),
cache: Some(cache),
rafs: Some(rafs),
dedup,
deduplication,
internal: ConfigV2Internal::default(),
})
}
Expand Down Expand Up @@ -1611,7 +1611,7 @@ impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 {
cache_validate: false,
prefetch_config: v.prefetch_config.clone(),
};
let dedup_config = match &v.dedup_config {
let deduplication_config = match &v.dedup_config {
Some(cfg) => {
let cfg_v2: DeduplicationConfigV2 = cfg.try_into()?;
Some(cfg_v2)
Expand All @@ -1623,7 +1623,7 @@ impl TryFrom<&BlobCacheEntryConfig> for BlobCacheEntryConfigV2 {
id: v.id.clone(),
backend: (&backend_config).try_into()?,
cache: (&cache_config).try_into()?,
dedup: dedup_config,
deduplication: deduplication_config,
metadata_path: v.metadata_path.clone(),
})
}
Expand Down Expand Up @@ -2346,9 +2346,9 @@ mod tests {
"#;
let config = ConfigV2::from_str(content).unwrap();
assert_eq!(&config.id, "");
assert!(!config.dedup.as_ref().unwrap().enable);
assert!(!config.deduplication.as_ref().unwrap().enable);
assert_eq!(
&config.dedup.unwrap().work_dir,
&config.deduplication.unwrap().work_dir,
"/home/t4/containerd/io.containerd.snapshotter.v1.nydus"
);
}
Expand Down
2 changes: 1 addition & 1 deletion builder/src/core/bootstrap_dedup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl BootstrapDedup {
.open(&bootstrap_path)?,
) as RafsIoReader;

let dedup_config = cfg.get_dedup_config()?;
let dedup_config = cfg.get_deduplication_config()?;
let db_dir = dedup_config.get_work_dir()?;
let cas_mgr = CasMgr::new(db_dir)?;

Expand Down
46 changes: 44 additions & 2 deletions rafs/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ use std::any::Any;
use std::cmp;
use std::ffi::{CStr, OsStr, OsString};
use std::io::Result;
use std::mem::size_of;
use std::ops::Deref;
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, SystemTime};

use bitvec::prelude::*;
use fuse_backend_rs::abi::fuse_abi::Attr;
use fuse_backend_rs::abi::fuse_abi::{stat64, statvfs64};
use fuse_backend_rs::api::filesystem::*;
Expand All @@ -37,7 +39,9 @@ use nydus_utils::{
div_round_up,
metrics::{self, FopRecorder, StatsFop::*},
};
use storage::device::BlobInfo;

use crate::metadata::layout::v5::RafsV5ChunkInfo;
use crate::metadata::{
Inode, RafsInode, RafsInodeWalkAction, RafsSuper, RafsSuperMeta, DOT, DOTDOT,
};
Expand Down Expand Up @@ -81,12 +85,14 @@ impl Rafs {
pub fn new(cfg: &Arc<ConfigV2>, id: &str, path: &Path) -> RafsResult<(Self, RafsIoReader)> {
// Assume all meta/data blobs are accessible, otherwise it will always cause IO errors.
cfg.internal.set_blob_accessible(true);

let cache_cfg = cfg.get_cache_config().map_err(RafsError::LoadConfig)?;
let rafs_cfg = cfg.get_rafs_config().map_err(RafsError::LoadConfig)?;
let (sb, reader) = RafsSuper::load_from_file(path, cfg.clone(), false)
.map_err(RafsError::FillSuperblock)?;
let blob_infos = sb.superblock.get_blob_infos();
let mut blob_infos = sb.superblock.get_blob_infos();

// if enable chunk dedup, modify blob's dedup_bitmap.
blob_infos = Self::generate_dedup_bitmap_by_chunk_info(cfg, &blob_infos, &sb)?;
let device = BlobDevice::new(cfg, &blob_infos).map_err(RafsError::CreateDevice)?;

if cfg.is_chunk_validation_enabled() && sb.meta.has_inlined_chunk_digest() {
Expand Down Expand Up @@ -324,6 +330,42 @@ impl Rafs {

entry
}

fn generate_dedup_bitmap_by_chunk_info(
cfg: &Arc<ConfigV2>,
blob_infos: &Vec<Arc<BlobInfo>>,
sb: &RafsSuper,
) -> RafsResult<Vec<Arc<BlobInfo>>> {
if let Some(deduplication_config) = &cfg.deduplication {
if deduplication_config.enable {
let mut dedup_blobs = vec![];
for blob in blob_infos.iter() {
let mut blob = blob.deref().clone();
let chunk_count = blob.chunk_count() as usize;
let mut dedup_bitmap = bitvec!(0; chunk_count);
let size = sb.meta.chunk_table_size as usize;
let unit_size = size_of::<RafsV5ChunkInfo>();
if size % unit_size != 0 {
return Err(RafsError::InvalidImageData);
}

for idx in 0..(size / unit_size) {
let chunk = sb.superblock.get_chunk_info(idx as usize).unwrap();
let chunk_idx = chunk.id() as usize;
if chunk_idx < chunk_count {
dedup_bitmap.set(chunk_idx, chunk.is_deduped());
}
}
blob.set_dedup_bitmap(Some(dedup_bitmap));
dedup_blobs.push(Arc::new(blob));
}

return Ok(dedup_blobs);
}
}

Ok(blob_infos.to_owned())
}
}

impl Rafs {
Expand Down
25 changes: 18 additions & 7 deletions storage/src/cache/cachedfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,11 +376,15 @@ impl FileCacheEntry {
extended_chunks[start..=end].to_vec()
}
} else {
while !extended_chunks.is_empty() {
let chunk = &extended_chunks[extended_chunks.len() - 1];
if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true)) {
extended_chunks.pop();
} else {
// check from start to end for dedup
for idx in 0..extended_chunks.len() {
let chunk = &extended_chunks[idx];
if matches!(self.chunk_map.is_ready(chunk.as_ref()), Ok(true))
|| self
.blob_info
.get_dedup_by_chunk_idx(chunk.as_ref().id() as usize)
{
extended_chunks.truncate(idx);
break;
}
}
Expand Down Expand Up @@ -591,6 +595,10 @@ impl BlobCache for FileCacheEntry {
continue;
}

if self.blob_info.get_dedup_by_chunk_idx(c.id() as usize) {
continue;
}

// For digested chunk map, we must check whether the cached data is valid because
// the digested chunk map cannot persist readiness state.
let d_size = c.uncompressed_size() as usize;
Expand All @@ -606,9 +614,12 @@ impl BlobCache for FileCacheEntry {
if let Ok(true) = self.chunk_map.check_ready_and_mark_pending(c.as_ref()) {
// The chunk is ready, so skip it.
continue;
} else {
pending.push(c.clone());
}
if self.blob_info.get_dedup_by_chunk_idx(c.id() as usize) {
continue;
}

pending.push(c.clone());
}
}

Expand Down
6 changes: 6 additions & 0 deletions storage/src/cache/dummycache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ impl BlobCache for DummyCache {
if !bios[0].user_io {
return Ok(0);
}
if self
.blob_info
.get_dedup_by_chunk_idx(bios[0].chunkinfo.id() as usize)
{
return Ok(0);
}
let buf = unsafe { std::slice::from_raw_parts_mut(bufs[0].as_ptr(), d_size) };
self.read_chunk_from_backend(&bios[0].chunkinfo, buf)?;
return Ok(buf.len());
Expand Down
4 changes: 2 additions & 2 deletions storage/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1181,8 +1181,8 @@ impl BlobDevice {
/// Create new blob device instance.
pub fn new(config: &Arc<ConfigV2>, blob_infos: &[Arc<BlobInfo>]) -> io::Result<BlobDevice> {
let mut blobs = Vec::with_capacity(blob_infos.len());
let dedup_config = config.get_dedup_config().ok();
let cas_mgr = dedup_config
let deduplication_config = config.get_deduplication_config().ok();
let cas_mgr = deduplication_config
.as_ref()
.filter(|config| config.get_enable() && config.get_work_dir().is_ok())
.and_then(|config| CasMgr::new(config.get_work_dir().unwrap()).ok());
Expand Down

0 comments on commit 10ee6ca

Please sign in to comment.