diff --git a/builtin/backfill.c b/builtin/backfill.c index d794dd842f65ce..b661c34f7c398a 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -85,6 +85,7 @@ static int do_backfill(struct backfill_context *ctx) if (ctx->sparse) { CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = 1; if (get_sparse_checkout_patterns(info.pl)) { path_walk_info_clear(&info); return error(_("problem loading sparse-checkout")); @@ -144,6 +145,10 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit if (argc > 1) die(_("unrecognized argument: %s"), argv[1]); + if (!path_walk_filter_compatible(&ctx.revs.filter)) + die(_("cannot backfill with these filter options")); + if (ctx.revs.filter.blob_limit_value) + die(_("cannot backfill with blob size limits")); repo_config(repo, git_default_config, NULL); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dd2480a73d2edf..ba00d8148abe0a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4777,6 +4777,8 @@ static void get_object_list_path_walk(struct rev_info *revs) result = walk_objects_by_path(&info); trace2_region_leave("pack-objects", "path-walk", revs->repo); + path_walk_info_clear(&info); + if (result) die(_("failed to pack objects via path-walk")); } @@ -5177,7 +5179,7 @@ int cmd_pack_objects(int argc, if (path_walk) { const char *option = NULL; - if (filter_options.choice) + if (!path_walk_filter_compatible(&filter_options)) option = "--filter"; else if (use_delta_islands) option = "--delta-islands"; @@ -5190,10 +5192,7 @@ int cmd_pack_objects(int argc, } if (path_walk) { strvec_push(&rp, "--boundary"); - /* - * We must disable the bitmaps because we are removing - * the --objects / --objects-edge[-aggressive] options. - */ + strvec_push(&rp, "--objects"); use_bitmap_index = 0; } else if (thin) { use_internal_rev_list = 1; diff --git a/path-walk.c b/path-walk.c index 6e426af4330893..700617ee2fe72c 100644 --- a/path-walk.c +++ b/path-walk.c @@ -9,6 +9,9 @@ #include "hashmap.h" #include "hex.h" #include "list-objects.h" +#include "list-objects-filter-options.h" +#include "object-name.h" +#include "odb.h" #include "object.h" #include "oid-array.h" #include "path.h" @@ -178,11 +181,6 @@ static int add_tree_entries(struct path_walk_context *ctx, return -1; } - /* Skip this object if already seen. */ - if (o->flags & SEEN) - continue; - o->flags |= SEEN; - strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); @@ -193,6 +191,40 @@ static int add_tree_entries(struct path_walk_context *ctx, if (type == OBJ_TREE) strbuf_addch(&path, '/'); + if (o->flags & SEEN) { + /* + * A tree with a shared OID may appear at multiple + * paths. Even though we already added this tree to + * the output at some other path, we still need to + * walk into it at this in-cone path to discover + * blobs that were not found at the earlier + * out-of-cone path. + * + * Only do this for paths not yet in our map, to + * avoid duplicate entries when the same tree OID + * appears at the same path across multiple commits. + */ + if (type == OBJ_TREE && ctx->info->pl && + ctx->info->pl->use_cone_patterns && + !ctx->info->pl_sparse_trees && + !strmap_contains(&ctx->paths_to_lists, path.buf)) { + int dtype; + enum pattern_match_result m; + m = path_matches_pattern_list(path.buf, path.len, + path.buf + base_len, + &dtype, + ctx->info->pl, + ctx->repo->index); + if (m != NOT_MATCHED) { + add_path_to_list(ctx, path.buf, type, + &entry.oid, + !(o->flags & UNINTERESTING)); + push_to_stack(ctx, path.buf); + } + } + continue; + } + if (ctx->info->pl) { int dtype; enum pattern_match_result match; @@ -202,7 +234,8 @@ static int add_tree_entries(struct path_walk_context *ctx, ctx->repo->index); if (ctx->info->pl->use_cone_patterns && - match == NOT_MATCHED) + match == NOT_MATCHED && + (type == OBJ_BLOB || ctx->info->pl_sparse_trees)) continue; else if (!ctx->info->pl->use_cone_patterns && type == OBJ_BLOB && @@ -237,6 +270,7 @@ static int add_tree_entries(struct path_walk_context *ctx, continue; } + o->flags |= SEEN; add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -314,9 +348,29 @@ static int walk_path(struct path_walk_context *ctx, /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || - (list->type == OBJ_TAG && ctx->info->tags)) - ret = ctx->info->path_fn(path, &list->oids, list->type, - ctx->info->path_fn_data); + (list->type == OBJ_TAG && ctx->info->tags)) { + struct oid_array *oids = &list->oids; + struct oid_array filtered = OID_ARRAY_INIT; + + if (list->type == OBJ_BLOB && ctx->info->blob_limit) { + for (size_t i = 0; i < list->oids.nr; i++) { + unsigned long size; + + if (odb_read_object_info(ctx->repo->objects, + &list->oids.oid[i], + &size) != OBJ_BLOB || + size < ctx->info->blob_limit) + oid_array_append(&filtered, + &list->oids.oid[i]); + } + oids = &filtered; + } + + if (oids->nr) + ret = ctx->info->path_fn(path, oids, list->type, + ctx->info->path_fn_data); + oid_array_clear(&filtered); + } /* Expand data for children. */ if (list->type == OBJ_TREE) { @@ -485,6 +539,85 @@ static int setup_pending_objects(struct path_walk_info *info, return 0; } +static int prepare_filters(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + switch (options->choice) { + case LOFC_DISABLED: + return 1; + + case LOFC_BLOB_NONE: + if (info) { + info->blobs = 0; + list_objects_filter_release(options); + } + return 1; + + case LOFC_BLOB_LIMIT: + if (info) { + if (!options->blob_limit_value) { + info->blobs = 0; + } else { + info->blob_limit = options->blob_limit_value; + } + list_objects_filter_release(options); + } + return 1; + + case LOFC_SPARSE_OID: + if (info) { + struct object_id sparse_oid; + struct repository *repo = info->revs->repo; + + if (info->pl) { + warning(_("sparse filter cannot be combined with existing sparse patterns")); + return 0; + } + + if (repo_get_oid_with_flags(repo, + options->sparse_oid_name, + &sparse_oid, + GET_OID_BLOB)) { + error(_("unable to access sparse blob in '%s'"), + options->sparse_oid_name); + return 0; + } + + CALLOC_ARRAY(info->pl, 1); + info->pl->use_cone_patterns = 1; + + if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, + info->pl) < 0) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + error(_("unable to parse sparse filter data in '%s'"), + oid_to_hex(&sparse_oid)); + return 0; + } + + if (!info->pl->use_cone_patterns) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + warning(_("sparse filter is not cone-mode compatible")); + return 0; + } + + list_objects_filter_release(options); + } + return 1; + + default: + error(_("object filter '%s' not supported by the path-walk API"), + list_objects_filter_spec(options)); + return 0; + } +} + +int path_walk_filter_compatible(struct list_objects_filter_options *options) +{ + return prepare_filters(NULL, options); +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -512,6 +645,9 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + if (!prepare_filters(info, &info->revs->filter)) + return -1; + CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; diff --git a/path-walk.h b/path-walk.h index 5ef5a8440e6b5e..5fa3ff46b46354 100644 --- a/path-walk.h +++ b/path-walk.h @@ -42,6 +42,14 @@ struct path_walk_info { int blobs; int tags; + /** + * If non-zero, specifies a maximum blob size. Blobs with a + * size equal to or greater than this limit will be omitted + * from the walk. Blobs smaller than the limit (or blobs + * whose size cannot be determined) are still visited. + */ + unsigned long blob_limit; + /** * When 'prune_all_uninteresting' is set and a path has all objects * marked as UNINTERESTING, then the path-walk will not visit those @@ -64,8 +72,14 @@ struct path_walk_info { * of the cone. If not in cone mode, then all tree paths will be * explored but the path_fn will only be called when the path matches * the sparse-checkout patterns. + * + * When 'pl_sparse_trees' is zero, the sparse patterns only restrict + * blobs and all trees are included in the walk output. This matches + * the behavior of the sparse:oid object filter. When nonzero, trees + * are also pruned by the sparse patterns (as used by backfill). */ struct pattern_list *pl; + int pl_sparse_trees; }; #define PATH_WALK_INFO_INIT { \ @@ -85,3 +99,10 @@ void path_walk_info_clear(struct path_walk_info *info); * Returns nonzero on an error. */ int walk_objects_by_path(struct path_walk_info *info); + +struct list_objects_filter_options; +/** + * Given a set of options for filtering objects, return 1 if the options + * are compatible with the path-walk API and 0 otherwise. + */ +int path_walk_filter_compatible(struct list_objects_filter_options *options); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..3f2b50a9aa16bd 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -4,6 +4,7 @@ #include "dir.h" #include "environment.h" #include "hex.h" +#include "list-objects-filter-options.h" #include "object-name.h" #include "object.h" #include "pretty.h" @@ -67,10 +68,12 @@ static int emit_block(const char *path, struct oid_array *oids, int cmd__path_walk(int argc, const char **argv) { - int res, stdin_pl = 0; + int res, stdin_pl = 0, pl_sparse_trees = -1; struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option options[] = { OPT_BOOL(0, "blobs", &info.blobs, N_("toggle inclusion of blob objects")), @@ -86,11 +89,14 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), + OPT_BOOL(0, "pl-sparse-trees", &pl_sparse_trees, + N_("toggle pruning of trees by sparse patterns")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END(), }; setup_git_directory(); - revs.repo = the_repository; + repo_init_revisions(the_repository, &revs, NULL); argc = parse_options(argc, argv, NULL, options, path_walk_usage, @@ -101,6 +107,10 @@ int cmd__path_walk(int argc, const char **argv) else usage(path_walk_usage[0]); + /* Apply the filter after setup_revisions to avoid the --objects check. */ + if (filter_options.choice) + list_objects_filter_copy(&revs.filter, &filter_options); + info.revs = &revs; info.path_fn = emit_block; info.path_fn_data = &data; @@ -108,6 +118,8 @@ int cmd__path_walk(int argc, const char **argv) if (stdin_pl) { struct strbuf in = STRBUF_INIT; CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = (pl_sparse_trees >= 0) ? + pl_sparse_trees : 1; info.pl->use_cone_patterns = 1; @@ -129,6 +141,7 @@ int cmd__path_walk(int argc, const char **argv) free(info.pl); } + list_objects_filter_release(&filter_options); release_revisions(&revs); return res; } diff --git a/t/perf/p5315-pack-objects-filter.sh b/t/perf/p5315-pack-objects-filter.sh new file mode 100755 index 00000000000000..b009039c8908fd --- /dev/null +++ b/t/perf/p5315-pack-objects-filter.sh @@ -0,0 +1,129 @@ +#!/bin/sh + +test_description='Tests pack-objects performance with filters and --path-walk' +. ./perf-lib.sh + +test_perf_large_repo + +test_expect_success 'setup filter inputs' ' + # Sample a few depth-2 directories from the test repo to build + # a cone-mode sparse-checkout definition. The sampling picks + # directories at evenly-spaced positions so the choice is stable + # and scales to repos of any shape. + + git ls-tree -d --name-only HEAD >top-dirs && + top_nr=$(wc -l depth2-dirs && + while read tdir + do + git ls-tree -d --name-only "HEAD:$tdir" 2>/dev/null | + sed "s|^|$tdir/|" >>depth2-dirs || return 1 + done sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first $mid" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + elif test "$top_nr" -ge 1 + then + # Fallback: use a single top-level directory. + first=$(sed -n "1p" top-dirs) && + { + echo "/*" && + echo "!/*/" && + echo "/$first/" + } >sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + fi +' + +test_perf 'repack (no filter)' ' + git pack-objects --stdout --no-reuse-delta --revs --all pk +' + +test_size 'repack size (no filter)' ' + test_file_size pk +' + +test_perf 'repack (no filter, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk pk +' + +test_size 'repack size (no filter, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (blob:none)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --filter=blob:none pk +' + +test_size 'repack size (blob:none)' ' + test_file_size pk +' + +test_perf 'repack (blob:none, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=blob:none pk +' + +test_size 'repack size (blob:none, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (sparse:oid)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_perf 'repack (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_done diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 501d715b9a16b7..dddb79ba627036 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -478,4 +478,129 @@ test_expect_success 'verify pack-objects w/ --missing=allow-any' ' EOF ' +# Test that --path-walk produces the same object set as standard traversal +# when using sparse:oid filters with cone-mode patterns. +# +# The sparse:oid filter restricts only blobs, not trees. Both standard +# and path-walk should produce identical sets of blobs, commits, and trees. + +test_expect_success 'setup pw_sparse for path-walk comparison' ' + git init pw_sparse && + mkdir -p pw_sparse/inc/sub pw_sparse/exc/sub && + + for n in 1 2 + do + echo "inc $n" >pw_sparse/inc/file$n && + echo "inc sub $n" >pw_sparse/inc/sub/file$n && + echo "exc $n" >pw_sparse/exc/file$n && + echo "exc sub $n" >pw_sparse/exc/sub/file$n && + echo "root $n" >pw_sparse/root$n || return 1 + done && + + git -C pw_sparse add . && + git -C pw_sparse commit -m "first" && + + echo "inc 1 modified" >pw_sparse/inc/file1 && + echo "exc 1 modified" >pw_sparse/exc/file1 && + echo "root 1 modified" >pw_sparse/root1 && + git -C pw_sparse add . && + git -C pw_sparse commit -m "second" && + + # Cone-mode sparse pattern: include root + inc/ + printf "/*\n!/*/\n/inc/\n" | + git -C pw_sparse hash-object -w --stdin >sparse_oid +' + +test_expect_success 'sparse:oid with --path-walk produces same blobs' ' + oid=$(cat sparse_oid) && + + git -C pw_sparse pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >standard.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../standard.pack && + git -C pw_sparse verify-pack -v ../standard.pack >standard_verify && + + git -C pw_sparse pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >pathwalk.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../pathwalk.pack && + git -C pw_sparse verify-pack -v ../pathwalk.pack >pathwalk_verify && + + # Blobs must match exactly + grep -E "^[0-9a-f]{40} blob" standard_verify | + awk "{print \$1}" | sort >standard_blobs && + grep -E "^[0-9a-f]{40} blob" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_blobs && + test_cmp standard_blobs pathwalk_blobs && + + # Commits must match exactly + grep -E "^[0-9a-f]{40} commit" standard_verify | + awk "{print \$1}" | sort >standard_commits && + grep -E "^[0-9a-f]{40} commit" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_commits && + test_cmp standard_commits pathwalk_commits +' + +test_expect_success 'sparse:oid with --path-walk includes all trees' ' + # The sparse:oid filter restricts only blobs, not trees. + # Both standard and path-walk should include the same trees. + grep -E "^[0-9a-f]{40} tree" standard_verify | + awk "{print \$1}" | sort >standard_trees && + grep -E "^[0-9a-f]{40} tree" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_trees && + + test_cmp standard_trees pathwalk_trees +' + +# Test the edge case where the same tree/blob OID appears at both an +# in-cone and out-of-cone path. When sibling directories have identical +# contents, they share a tree OID. The path-walk defers marking objects +# SEEN until after checking sparse patterns, so an object at an out-of-cone +# path can still be discovered at an in-cone path. + +test_expect_success 'setup pw_shared for shared OID across cone boundary' ' + git init pw_shared && + mkdir pw_shared/aaa pw_shared/zzz && + echo "shared content" >pw_shared/aaa/file && + echo "shared content" >pw_shared/zzz/file && + echo "root file" >pw_shared/rootfile && + git -C pw_shared add . && + git -C pw_shared commit -m "aaa and zzz share tree OID" && + + # Verify they share a tree OID + aaa_tree=$(git -C pw_shared rev-parse HEAD:aaa) && + zzz_tree=$(git -C pw_shared rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + printf "/*\n!/*/\n/zzz/\n" | + git -C pw_shared hash-object -w --stdin >shared_sparse_oid +' + +test_expect_success 'shared tree OID: --path-walk blobs match standard' ' + oid=$(cat shared_sparse_oid) && + + git -C pw_shared pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >shared_std.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_std.pack && + git -C pw_shared verify-pack -v ../shared_std.pack >shared_std_verify && + + git -C pw_shared pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >shared_pw.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_pw.pack && + git -C pw_shared verify-pack -v ../shared_pw.pack >shared_pw_verify && + + grep -E "^[0-9a-f]{40} blob" shared_std_verify | + awk "{print \$1}" | sort >shared_std_blobs && + grep -E "^[0-9a-f]{40} blob" shared_pw_verify | + awk "{print \$1}" | sort >shared_pw_blobs && + test_cmp shared_std_blobs shared_pw_blobs +' + test_done diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index f3b5e39493677b..3c8a75192a4791 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,6 +15,14 @@ test_expect_success 'backfill rejects unexpected arguments' ' test_grep "unrecognized argument: --unexpected-arg" err ' +test_expect_success 'backfill rejects incompatible filter options' ' + test_must_fail git backfill --objects --filter=tree:1 2>err && + test_grep "cannot backfill with these filter options" err && + + test_must_fail git backfill --objects --filter=blob:limit=10m 2>err && + test_grep "cannot backfill with blob size limits" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk && diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 56bd1e3c5bec97..520269dfc6506a 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -206,6 +206,43 @@ test_expect_success 'base & topic, sparse' ' test_cmp_sorted expect out ' +test_expect_success 'base & topic, sparse, no tree pruning' ' + cat >patterns <<-EOF && + /* + !/*/ + /left/ + EOF + + test-tool path-walk --stdin-pl --no-pl-sparse-trees \ + -- base topic out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:left/:$(git rev-parse base:left) + 4:tree:left/:$(git rev-parse base~2:left) + 5:blob:left/b:$(git rev-parse base~2:left/b) + 5:blob:left/b:$(git rev-parse base:left/b) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:3 + commits:4 + tags:0 + trees:10 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'topic only' ' test-tool path-walk -- topic >out && @@ -415,4 +452,273 @@ test_expect_success 'trees are reported exactly once' ' test_line_count = 1 out-filtered ' +test_expect_success 'all, blob:none filter' ' + test-tool path-walk --filter=blob:none -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 5:tree:left/:$(git rev-parse base:left) + 5:tree:left/:$(git rev-parse base~2:left) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, blob:none filter' ' + test-tool path-walk --filter=blob:none -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:left/:$(git rev-parse base~2:left) + 3:tree:right/:$(git rev-parse topic:right) + 3:tree:right/:$(git rev-parse base~1:right) + 3:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=0 filter' ' + test-tool path-walk --filter=blob:limit=0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 5:tree:left/:$(git rev-parse base:left) + 5:tree:left/:$(git rev-parse base~2:left) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=3 filter' ' + test-tool path-walk --filter=blob:limit=3 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:blob:a:$(git rev-parse base~2:a) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:blob:left/b:$(git rev-parse base~2:left/b) + 8:tree:right/:$(git rev-parse topic:right) + 8:tree:right/:$(git rev-parse base~1:right) + 8:tree:right/:$(git rev-parse base~2:right) + 9:blob:right/c:$(git rev-parse base~2:right/c) + 10:blob:right/d:$(git rev-parse base~1:right/d) + blobs:4 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'setup sparse filter blob' ' + # Cone-mode patterns: include root, exclude all dirs, include left/ + cat >patterns <<-\EOF && + /* + !/*/ + /left/ + EOF + sparse_oid=$(git hash-object -w -t blob patterns) +' + +test_expect_success 'all, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 6:tree:a/:$(git rev-parse base:a) + 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 8:tree:left/:$(git rev-parse base:left) + 8:tree:left/:$(git rev-parse base~2:left) + 9:blob:left/b:$(git rev-parse base~2:left/b) + 9:blob:left/b:$(git rev-parse base:left/b) + 10:tree:right/:$(git rev-parse topic:right) + 10:tree:right/:$(git rev-parse base~1:right) + 10:tree:right/:$(git rev-parse base~2:right) + blobs:6 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base~2:left/b) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +# Demonstrate the SEEN flag ordering issue: when the same tree/blob OID +# appears at two sibling paths where one is in-cone and the other is +# out-of-cone, the path-walk must still discover blobs at the in-cone +# path even when the shared tree OID was first encountered out-of-cone. +# Since sparse:oid includes all trees, the out-of-cone tree (aaa/) is +# walked first, and its blob is skipped. The path-walk then re-walks +# the same tree OID at the in-cone path (zzz/) to find the blob there. + +test_expect_success 'setup shared tree OID across cone boundary' ' + git checkout --orphan shared-tree && + git rm -rf . && + mkdir aaa zzz && + echo "shared content" >aaa/file && + echo "shared content" >zzz/file && + echo "root file" >rootfile && + git add aaa zzz rootfile && + git commit -m "aaa and zzz have same tree OID" && + + # Verify they really share a tree OID + aaa_tree=$(git rev-parse HEAD:aaa) && + zzz_tree=$(git rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + cat >shared-patterns <<-\EOF && + /* + !/*/ + /zzz/ + EOF + shared_sparse_oid=$(git hash-object -w -t blob shared-patterns) +' + +test_expect_success 'sparse:oid with shared tree OID across cone boundary' ' + test-tool path-walk \ + --filter=sparse:oid=$shared_sparse_oid \ + -- shared-tree >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse shared-tree) + 1:tree::$(git rev-parse shared-tree^{tree}) + 2:blob:rootfile:$(git rev-parse shared-tree:rootfile) + 3:tree:aaa/:$(git rev-parse shared-tree:aaa) + 4:tree:zzz/:$(git rev-parse shared-tree:zzz) + 5:blob:zzz/file:$(git rev-parse shared-tree:zzz/file) + blobs:2 + commits:1 + tags:0 + trees:3 + EOF + + test_cmp_sorted expect out +' + test_done