Skip to content

Commit

Permalink
feat: Disk cache & archive-based files for Debian
Browse files Browse the repository at this point in the history
  • Loading branch information
VorpalBlade committed Jul 26, 2024
1 parent eb7210c commit 213d8bd
Show file tree
Hide file tree
Showing 18 changed files with 421 additions and 155 deletions.
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ rune-modules = "0.13.4"
rust-ini = "0.21.0"
scopeguard = "1.2.0"
serde = "1.0.204"
serde_bytes = "0.11.15"
serde_json = "1.0.120"
smallvec = { version = "1.13.2", features = [
"const_generics",
Expand Down
1 change: 1 addition & 0 deletions crates/konfigkoll/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ tokio = { workspace = true, features = [
tracing-log.workspace = true
tracing-subscriber = { workspace = true, features = ["env-filter", "parking_lot"] }
tracing.workspace = true
dashmap.workspace = true

[target.'cfg(target_env = "musl")'.dependencies]
# The allocator on musl is attrociously slow, so we use a custom one.
Expand Down
31 changes: 24 additions & 7 deletions crates/konfigkoll/src/fs_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use std::sync::Arc;

use anyhow::Context;
use compact_str::CompactString;
use dashmap::DashMap;
use itertools::Itertools;
use ouroboros::self_referencing;
use rayon::prelude::*;

use konfigkoll_types::FsInstruction;
use paketkoll_core::config::{
Expand All @@ -13,7 +16,7 @@ use paketkoll_core::config::{
use paketkoll_core::file_ops::{
canonicalize_file_entries, create_path_map, mismatching_and_unexpected_files,
};
use paketkoll_types::backend::Files;
use paketkoll_types::backend::{Files, PackageMap};
use paketkoll_types::files::FileEntry;
use paketkoll_types::files::PathMap;
use paketkoll_types::intern::Interner;
Expand All @@ -30,17 +33,31 @@ pub(crate) struct ScanResult {
pub(crate) fn scan_fs(
interner: &Arc<Interner>,
backend: &Arc<dyn Files>,
package_map: &PackageMap,
ignores: &[CompactString],
trust_mtime: bool,
) -> anyhow::Result<(ScanResult, Vec<FsInstruction>)> {
tracing::debug!("Scanning filesystem");
let mut fs_instructions_sys = vec![];
let mut files = backend.files(interner).with_context(|| {
format!(
"Failed to collect information from backend {}",
backend.name()
)
})?;
let mut files = if backend.prefer_files_from_archive() {
let all = package_map.keys().cloned().collect::<Vec<_>>();
let files = backend.files_from_archives(&all, package_map, interner)?;
let file_map = DashMap::new();
files
.into_par_iter()
.flat_map_iter(|(_pkg, files)| files)
.for_each(|entry| {
file_map.insert(entry.path.clone(), entry);
});
file_map.into_iter().map(|(_, v)| v).collect_vec()
} else {
backend.files(interner).with_context(|| {
format!(
"Failed to collect information from backend {}",
backend.name()
)
})?
};
if backend.may_need_canonicalization() {
tracing::debug!("Canonicalizing file entries");
canonicalize_file_entries(&mut files);
Expand Down
34 changes: 28 additions & 6 deletions crates/konfigkoll/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use konfigkoll_core::state::DiffGoal;
use konfigkoll_script::Phase;
#[cfg(target_env = "musl")]
use mimalloc::MiMalloc;
use paketkoll_cache::FromArchiveCache;
use paketkoll_cache::OriginalFilesCache;
use paketkoll_core::backend::ConcreteBackend;
use paketkoll_core::paketkoll_types::intern::Interner;
Expand Down Expand Up @@ -118,8 +119,19 @@ async fn main() -> anyhow::Result<()> {
let backend = b
.create_files(&backend_cfg, &interner)
.with_context(|| format!("Failed to create backend {b}"))?;
let backend = if backend.prefer_files_from_archive() {
tracing::info!("Using archive cache for backend {}", backend.name());
// This is slow so we need to cache it
Box::new(
FromArchiveCache::from_path(backend, proj_dirs.cache_dir())
.context("Failed to create archive disk cache")?,
)
} else {
// This is fast so we don't need to cache it
backend
};
let backend = OriginalFilesCache::from_path(backend, proj_dirs.cache_dir())
.context("Failed to create disk cache")?;
.context("Failed to create original files disk cache")?;
Arc::new(backend)
};

Expand All @@ -133,6 +145,10 @@ async fn main() -> anyhow::Result<()> {
// Script: Get FS ignores
script_engine.run_phase(Phase::Ignores).await?;

tracing::info!("Waiting for package loading results...");
let (pkgs_sys, package_maps) = package_loader.await??;
tracing::info!("Got package loading results");

// Do FS scan
tracing::info!("Starting filesystem scan background job");
let fs_instructions_sys = {
Expand All @@ -146,18 +162,24 @@ async fn main() -> anyhow::Result<()> {
let trust_mtime = cli.trust_mtime;
let interner = interner.clone();
let backends_files = backend_files.clone();
let package_map = package_maps
.get(&backend_files.as_backend_enum())
.expect("No matching package backend?")
.clone();
tokio::task::spawn_blocking(move || {
fs_scan::scan_fs(&interner, &backends_files, &ignores, trust_mtime)
fs_scan::scan_fs(
&interner,
&backends_files,
&package_map,
&ignores,
trust_mtime,
)
})
};

// Script: Do early package phase
script_engine.run_phase(Phase::ScriptDependencies).await?;

tracing::info!("Waiting for package loading results...");
let (pkgs_sys, package_maps) = package_loader.await??;
tracing::info!("Got package loading results");

// Create the set of package managers for use by the script
script_engine.state_mut().setup_package_managers(
&backends_pkg,
Expand Down
7 changes: 5 additions & 2 deletions crates/konfigkoll/src/pkgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use konfigkoll_types::PkgInstructions;
use paketkoll_types::{
backend::{Backend, PackageBackendMap, PackageMap, PackageMapMap},
intern::Interner,
package::PackageInstallStatus,
};

#[tracing::instrument(skip_all)]
Expand All @@ -31,9 +32,11 @@ pub(crate) fn load_packages(
backend.name()
)
})
.map(|backend_pkgs| {
.map(|mut backend_pkgs| {
// Because we can have partially installed packages on Debian...
backend_pkgs.retain(|pkg| pkg.status == PackageInstallStatus::Installed);
let pkg_map = Arc::new(paketkoll_types::backend::packages_to_package_map(
backend_pkgs.clone(),
backend_pkgs.iter(),
));
let pkg_instructions =
konfigkoll_core::conversion::convert_packages_to_pkg_instructions(
Expand Down
5 changes: 4 additions & 1 deletion crates/paketkoll_cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ cached = { workspace = true, features = [
], default-features = false }
compact_str.workspace = true
dashmap.workspace = true
paketkoll_types = { version = "0.1.0", path = "../paketkoll_types" }
paketkoll_types = { version = "0.1.0", path = "../paketkoll_types", features = [
"serde",
] }
rayon.workspace = true
serde = { workspace = true, features = ["derive"] }
tracing.workspace = true

Expand Down
35 changes: 23 additions & 12 deletions crates/paketkoll_cache/src/from_archives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub struct FromArchiveCache {

impl FromArchiveCache {
pub fn from_path(inner: Box<dyn Files>, path: &Path) -> anyhow::Result<Self> {
let cache = DiskCacheBuilder::new(inner.name())
let cache = DiskCacheBuilder::new("from_archives")
.set_refresh(true)
.set_lifespan(60 * 60 * 24 * 15) // Half a month
.set_disk_directory(path)
Expand Down Expand Up @@ -170,19 +170,30 @@ impl Files for FromArchiveCache {
}
}
// Fetch uncached queries
let uncached_results =
self.inner
.files_from_archives(&uncached_queries, package_map, interner)?;

// Insert the uncached results into the cache and update the results
for (query, result) in uncached_results.into_iter() {
let cache_key = cache_keys.remove(&query).context("Cache key not found")?;
self.cache
.cache_set(cache_key, result.iter().map(Into::into).collect())
.context("Cache set failed")?;
results.push((query, result));
if !uncached_queries.is_empty() {
let uncached_results =
self.inner
.files_from_archives(&uncached_queries, package_map, interner)?;
// Insert the uncached results into the cache and update the results
for (query, result) in uncached_results.into_iter() {
let cache_key = cache_keys.remove(&query).context("Cache key not found")?;
self.cache
.cache_set(cache_key.clone(), result.iter().map(Into::into).collect())
.with_context(|| {
format!(
"Cache set failed: pkg={} cache_key={}",
query.to_str(interner),
cache_key
)
})?;
results.push((query, result));
}
}

Ok(results)
}

fn prefer_files_from_archive(&self) -> bool {
self.inner.prefer_files_from_archive()
}
}
6 changes: 5 additions & 1 deletion crates/paketkoll_cache/src/original_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub struct OriginalFilesCache {

impl OriginalFilesCache {
pub fn from_path(inner: Box<dyn Files>, path: &Path) -> anyhow::Result<Self> {
let cache = DiskCacheBuilder::new(inner.name())
let cache = DiskCacheBuilder::new("original_files")
.set_refresh(true)
.set_lifespan(60 * 60 * 24 * 30) // A month
.set_disk_directory(path)
Expand Down Expand Up @@ -153,4 +153,8 @@ impl Files for OriginalFilesCache {
self.inner
.files_from_archives(filter, package_map, interner)
}

fn prefer_files_from_archive(&self) -> bool {
self.inner.prefer_files_from_archive()
}
}
34 changes: 31 additions & 3 deletions crates/paketkoll_core/src/backend/arch.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! The Arch Linux (and derivatives) backend

use std::{
borrow::Cow,
collections::BTreeSet,
io::BufReader,
iter::once,
Expand All @@ -9,6 +10,7 @@ use std::{

use ahash::AHashSet;
use anyhow::Context;
use bstr::{ByteSlice, ByteVec};
use compact_str::format_compact;
use dashmap::{DashMap, DashSet};
use either::Either;
Expand Down Expand Up @@ -196,7 +198,7 @@ impl Files for ArchLinux {

// Now, lets extract the requested files from the package
extract_files(archive, &queries, &mut results, pkg, |path| {
format_compact!("/{path}")
Some(format_compact!("/{path}"))
})?;
}

Expand All @@ -209,8 +211,17 @@ impl Files for ArchLinux {
package_map: &PackageMap,
interner: &Interner,
) -> Result<Vec<(PackageRef, Vec<FileEntry>)>, PackageManagerError> {
log::info!(
"Finding archives for {} packages (may take a while)",
filter.len()
);
let archives =
iterate_pkg_archives(filter, package_map, interner, &self.pacman_config.cache_dir);

log::info!(
"Loading files from {} archives (may take a while)",
filter.len()
);
let results: anyhow::Result<Vec<_>> = archives
.par_bridge()
.map(|value| {
Expand All @@ -219,7 +230,6 @@ impl Files for ArchLinux {
.collect();

let results = results?;

Ok(results)
}
}
Expand Down Expand Up @@ -257,9 +267,27 @@ fn archive_to_entries(pkg_ref: PackageRef, pkg_file: &Path) -> anyhow::Result<Ve
let archive = tar::Archive::new(decompressed);

// Now, lets extract the requested files from the package
convert_archive_entries(archive, pkg_ref, NAME, |path| format_compact!("/{path}"))
convert_archive_entries(archive, pkg_ref, NAME, |path| {
let path = path.as_os_str().as_encoded_bytes();
if SPECIAL_ARCHIVE_FILES.contains(path) {
None
} else {
let path = path.trim_end_with(|ch| ch == '/');
let path = bstr::concat([b"/", path]);
Some(Cow::Owned(path.into_path_buf().expect("Invalid path")))
}
})
}

/// Files to ignore when reading archives
const SPECIAL_ARCHIVE_FILES: phf::Set<&'static [u8]> = phf::phf_set! {
b".BUILDINFO",
b".CHANGELOG",
b".PKGINFO",
b".INSTALL",
b".MTREE",
};

fn format_pkg_filename(interner: &Interner, package: &PackageInterned) -> String {
format!(
"{}-{}-{}.pkg.tar.zst",
Expand Down
Loading

0 comments on commit 213d8bd

Please sign in to comment.