From 582c5acfbf1ccc2b065c01f86ce1d7b6afb3301e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 21 Oct 2024 21:40:50 -0400 Subject: [PATCH] path-walk: add new 'edge_aggressive' option In preparation for allowing both the --shallow and --path-walk options in the 'git pack-objects' builtin, create a new 'edge_aggressive' option in the path-walk API. The only use of the 'edge_hint_aggressive' option in the revision API is within mark_edges_uninteresting(), which is usually called before between prepare_revision_walk() and before visiting commits with get_revision(). In prepare_revision_walk(), the UNINTERESTING commits are walked until a boundary is found. We didn't use this in the past because we would mark objects UNINTERESTING after doing the initial commit walk to the boundary. But we actually should be marking these objects as UNINTERESTING, but we shouldn't _emit_ them all via the path-walk algorithm or else our delta calculations will get really slow. Based on these observations, the way we were handling the UNINTERESTING flag in walk_objects_by_path() was overly complicated and buggy. A lot of it can be removed and simplified to work with this new approach. It also means that we will see the UNINTERESTING boundaries of paths when doing a default path-walk call, changing some existing test cases. Signed-off-by: Derrick Stolee --- Documentation/technical/api-path-walk.txt | 8 +++++ path-walk.c | 42 +++++++++-------------- path-walk.h | 7 ++++ t/t6601-path-walk.sh | 16 ++++----- 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index 83bfe3d665e9fb..85d8e262401486 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -65,6 +65,14 @@ better off using the revision walk API instead. the revision walk so that the walk emits commits marked with the `UNINTERESTING` flag. +`edge_aggressive`:: + For performance reasons, usually only the boundary commits are + explored to find UNINTERESTING objects. However, in the case of + shallow clones it can be helpful to mark all trees and blobs + reachable from UNINTERESTING tip commits as UNINTERESTING. This + matches the behavior of `--objects-edge-aggressive` in the + revision API. + `pl`:: This pattern list pointer allows focusing the path-walk search to a set of patterns, only emitting paths that match the given diff --git a/path-walk.c b/path-walk.c index c0be95922c1615..f9bc30155c71ca 100644 --- a/path-walk.c +++ b/path-walk.c @@ -18,6 +18,7 @@ #include "trace2.h" #include "tree.h" #include "tree-walk.h" +#include "list-objects.h" struct type_and_oid_list { @@ -233,6 +234,11 @@ static void clear_strmap(struct strmap *map) strmap_init(map); } +static void show_edge(struct commit *commit UNUSED) +{ + /* empty */ +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -242,7 +248,7 @@ static void clear_strmap(struct strmap *map) int walk_objects_by_path(struct path_walk_info *info) { const char *root_path = ""; - int ret = 0, has_uninteresting = 0; + int ret = 0; size_t commits_nr = 0, paths_nr = 0; struct commit *c; struct type_and_oid_list *root_tree_list; @@ -254,7 +260,6 @@ int walk_objects_by_path(struct path_walk_info *info) .path_stack = STRING_LIST_INIT_DUP, .paths_to_lists = STRMAP_INIT }; - struct oidset root_tree_set = OIDSET_INIT; trace2_region_enter("path-walk", "commit-walk", info->revs->repo); @@ -280,6 +285,15 @@ int walk_objects_by_path(struct path_walk_info *info) if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + /* + * Do an initial walk of tip commits in info->revs->commits and + * info->revs->cmdline.rev to match the standard edge-walk behavior. + * + * This is particularly important when 'edge_aggressive' is set. + */ + info->revs->edge_hint_aggressive = info->edge_aggressive; + mark_edges_uninteresting(info->revs, show_edge, info->prune_all_uninteresting); + info->revs->blob_objects = info->revs->tree_objects = 0; if (info->tags) { @@ -366,17 +380,10 @@ int walk_objects_by_path(struct path_walk_info *info) if (t->object.flags & SEEN) continue; t->object.flags |= SEEN; - - if (!oidset_insert(&root_tree_set, oid)) - oid_array_append(&root_tree_list->oids, oid); + oid_array_append(&root_tree_list->oids, oid); } else { warning("could not find tree %s", oid_to_hex(oid)); } - - if (t && (c->object.flags & UNINTERESTING)) { - t->object.flags |= UNINTERESTING; - has_uninteresting = 1; - } } trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); @@ -389,21 +396,6 @@ int walk_objects_by_path(struct path_walk_info *info) oid_array_clear(&commit_list->oids); free(commit_list); - /* - * Before performing a DFS of our paths and emitting them as interesting, - * do a full walk of the trees to distribute the UNINTERESTING bit. Use - * the sparse algorithm if prune_all_uninteresting was set. - */ - if (has_uninteresting) { - trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo); - if (info->prune_all_uninteresting) - mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set); - else - mark_trees_uninteresting_dense(ctx.repo, &root_tree_set); - trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo); - } - oidset_clear(&root_tree_set); - string_list_append(&ctx.path_stack, root_path); trace2_region_enter("path-walk", "path-walk", info->revs->repo); diff --git a/path-walk.h b/path-walk.h index 090cda3b5cf8f4..6f23f16c6fb084 100644 --- a/path-walk.h +++ b/path-walk.h @@ -48,6 +48,13 @@ struct path_walk_info { */ int prune_all_uninteresting; + /** + * When 'edge_aggressive' is set, then the revision walk will use + * the '--object-edge-aggressive' option to mark even more objects + * as uniniteresting. + */ + int edge_aggressive; + /** * Specify a sparse-checkout definition to match our paths to. Do not * walk outside of this sparse definition. If the patterns are in diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 312bf3c19c176a..d67a077d37b99d 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -181,13 +181,13 @@ test_expect_success 'topic, not base' ' COMMIT::$(git rev-parse topic) commits:1 TREE::$(git rev-parse topic^{tree}) - TREE:left/:$(git rev-parse topic:left) + TREE:left/:$(git rev-parse base~1:left):UNINTERESTING TREE:right/:$(git rev-parse topic:right) trees:3 - BLOB:a:$(git rev-parse topic:a) - BLOB:left/b:$(git rev-parse topic:left/b) + BLOB:a:$(git rev-parse base~1:a):UNINTERESTING + BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING BLOB:right/c:$(git rev-parse topic:right/c) - BLOB:right/d:$(git rev-parse topic:right/d) + BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:4 tags:0 EOF @@ -205,10 +205,10 @@ test_expect_success 'topic, not base, only blobs' ' cat >expect <<-EOF && commits:0 trees:0 - BLOB:a:$(git rev-parse topic:a) - BLOB:left/b:$(git rev-parse topic:left/b) + BLOB:a:$(git rev-parse base~1:a):UNINTERESTING + BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING BLOB:right/c:$(git rev-parse topic:right/c) - BLOB:right/d:$(git rev-parse topic:right/d) + BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:4 tags:0 EOF @@ -246,7 +246,7 @@ test_expect_success 'topic, not base, only trees' ' cat >expect <<-EOF && commits:0 TREE::$(git rev-parse topic^{tree}) - TREE:left/:$(git rev-parse topic:left) + TREE:left/:$(git rev-parse base~1:left):UNINTERESTING TREE:right/:$(git rev-parse topic:right) trees:3 blobs:0