Skip to content

Commit

Permalink
When exporting, use hardlinks for duplicated files
Browse files Browse the repository at this point in the history
For ostree_repo_export_tree_to_archive(), and 'ostree export', when the
exported tree contains multiple files with the same checksum, write an
archive with hard links.

Without this, importing a tree, then exporting it again breaks
hardlinks.

As an example of savings: this reduces the (compressed) size of the
Fedora Flatpak Runtime image from 1345MiB to 712MiB.

Resolves: #2925
  • Loading branch information
owtaylor committed Sep 29, 2023
1 parent 8c25452 commit cef1a98
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 12 deletions.
50 changes: 39 additions & 11 deletions src/libostree/ostree-repo-libarchive.c
Original file line number Diff line number Diff line change
Expand Up @@ -943,15 +943,10 @@ ostree_repo_write_archive_to_mtree_from_fd (OstreeRepo *self, int fd, OstreeMuta

#ifdef HAVE_LIBARCHIVE

static gboolean
file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
GFileInfo *file_info, struct archive_entry *entry, GError **error)
static char *
file_to_pathstr (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path)
{
gboolean ret = FALSE;
g_autofree char *pathstr = g_file_get_relative_path (root, path);
g_autoptr (GVariant) xattrs = NULL;
time_t ts = (time_t)opts->timestamp_secs;

if (opts->path_prefix && opts->path_prefix[0])
{
g_autofree char *old_pathstr = pathstr;
Expand All @@ -964,6 +959,18 @@ file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts,
pathstr = g_strdup (".");
}

return g_steal_pointer (&pathstr);
}

static gboolean
file_to_archive_entry_common (GFile *root, OstreeRepoExportArchiveOptions *opts, GFile *path,
GFileInfo *file_info, struct archive_entry *entry, GError **error)
{
gboolean ret = FALSE;
g_autofree char *pathstr = file_to_pathstr (root, opts, path);
g_autoptr (GVariant) xattrs = NULL;
time_t ts = (time_t)opts->timestamp_secs;

archive_entry_update_pathname_utf8 (entry, pathstr);
archive_entry_set_ctime (entry, ts, OSTREE_TIMESTAMP);
archive_entry_set_mtime (entry, ts, OSTREE_TIMESTAMP);
Expand Down Expand Up @@ -1021,7 +1028,8 @@ write_header_free_entry (struct archive *a, struct archive_entry **entryp, GErro
static gboolean
write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchiveOptions *opts,
GFile *root, GFile *dir, struct archive *a,
GCancellable *cancellable, GError **error)
GHashTable *seen_checksums, GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
g_autoptr (GFileInfo) dir_info = NULL;
Expand Down Expand Up @@ -1057,8 +1065,8 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
/* First, handle directories recursively */
if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY)
{
if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, cancellable,
error))
if (!write_directory_to_libarchive_recurse (self, opts, root, path, a, seen_checksums,
cancellable, error))
goto out;

/* Go to the next entry */
Expand Down Expand Up @@ -1086,9 +1094,27 @@ write_directory_to_libarchive_recurse (OstreeRepo *self, OstreeRepoExportArchive
g_autoptr (GInputStream) file_in = NULL;
g_autoptr (GFileInfo) regular_file_info = NULL;
const char *checksum;
GFile *old_path;

checksum = ostree_repo_file_get_checksum ((OstreeRepoFile *)path);

old_path = g_hash_table_lookup (seen_checksums, checksum);
if (old_path)
{
g_autofree char *old_pathstr = file_to_pathstr (root, opts, old_path);

archive_entry_set_hardlink (entry, old_pathstr);
if (!write_header_free_entry (a, &entry, error))
goto out;

break;
}
else
{
/* The checksum is owned by path (an OstreeRepoFile) */
g_hash_table_insert (seen_checksums, (char *)checksum, g_object_ref (path));
}

if (!ostree_repo_load_file (self, checksum, &file_in, &regular_file_info, NULL,
cancellable, error))
goto out;
Expand Down Expand Up @@ -1168,9 +1194,11 @@ ostree_repo_export_tree_to_archive (OstreeRepo *self, OstreeRepoExportArchiveOpt
#ifdef HAVE_LIBARCHIVE
gboolean ret = FALSE;
struct archive *a = archive;
g_autoptr (GHashTable) seen_checksums
= g_hash_table_new_full (g_str_hash, g_str_equal, NULL, g_object_unref);

if (!write_directory_to_libarchive_recurse (self, opts, (GFile *)root, (GFile *)root, a,
cancellable, error))
seen_checksums, cancellable, error))
goto out;

ret = TRUE;
Expand Down
7 changes: 7 additions & 0 deletions tests/libtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ setup_test_repository () {
mkdir baz/another/
echo x > baz/another/y

mkdir baz/sub1
echo SAME_CONTENT > baz/sub1/duplicate_a
echo SAME_CONTENT > baz/sub1/duplicate_b

mkdir baz/sub2
echo SAME_CONTENT > baz/sub2/duplicate_c

# if we are running inside a container we cannot test
# the overlayfs whiteout marker passthrough
if ! test -n "${OSTREE_NO_WHITEOUTS:-}"; then
Expand Down
10 changes: 9 additions & 1 deletion tests/test-export.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fi

setup_test_repository "archive"

echo '1..5'
echo '1..6'

$OSTREE checkout test2 test2-co
$OSTREE commit --no-xattrs -b test2-noxattrs -s "test2 without xattrs" --tree=dir=test2-co
Expand Down Expand Up @@ -81,3 +81,11 @@ assert_file_empty diff.txt
rm test2.tar diff.txt t -rf

echo 'ok export import'

cd ${test_tmpdir}
${OSTREE} 'export' test2 -o test2.tar
tar tvf test2.tar > test2.manifest
assert_file_has_content test2.manifest 'baz/sub1/duplicate_b link to baz/sub1/duplicate_a'
assert_file_has_content test2.manifest 'baz/sub2/duplicate_c link to baz/sub1/duplicate_a'

echo 'ok export hard links'

0 comments on commit cef1a98

Please sign in to comment.