From 1adf81ecb2c45c2e90d9b5dcfb02c814ecce6d8d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 1 May 2025 12:43:54 -0400 Subject: [PATCH 1/4] apply: integrate with the sparse index The sparse index allows storing directory entries in the index, marked with the skip-wortkree bit and pointing to a tree object. This may be an unexpected data shape for some implementation areas, so we are rolling it out incrementally on a builtin-per-builtin basis. This change enables the sparse index for 'git apply'. The main motivation for this change is that 'git apply' is used as a child process of 'git add -p' and expanding the sparse index for each of those child processes can lead to significant performance issues. The good news is that the actual index manipulation code used by 'git apply' is already integrated with the sparse index, so the only product change is to mark the builtin as allowing the sparse index so it isn't inflated on read. The more involved part of this change is around adding tests that verify how 'git apply' behaves in a sparse-checkout environment and whether or not the index expands in certain operations. Signed-off-by: Derrick Stolee --- builtin/apply.c | 7 +++- t/t1092-sparse-checkout-compatibility.sh | 53 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/builtin/apply.c b/builtin/apply.c index 84f1863d3ac349..a1e20c593d0903 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -12,7 +12,7 @@ static const char * const apply_usage[] = { int cmd_apply(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int force_apply = 0; int options = 0; @@ -35,6 +35,11 @@ int cmd_apply(int argc, &state, &force_apply, &options, apply_usage); + if (repo) { + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; + } + if (check_apply_state(&state, force_apply)) exit(128); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index f9b448792cb4a0..83353a7dbab43e 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -1340,6 +1340,30 @@ test_expect_success 'submodule handling' ' grep "160000 $(git -C initial-repo rev-parse HEAD) 0 modules/sub" cache ' +test_expect_success 'git apply functionality' ' + init_repos && + + test_all_match git checkout base && + + git -C full-checkout diff base..merge-right -- deep >patch-in-sparse && + git -C full-checkout diff base..merge-right -- folder2 >patch-outside && + + # Apply a patch to a file inside the sparse definition + test_all_match git apply --index --stat ../patch-in-sparse && + test_all_match git status --porcelain=v2 && + + # Apply a patch to a file outside the sparse definition + test_sparse_match test_must_fail git apply ../patch-outside && + grep "No such file or directory" sparse-checkout-err && + + # But it works with --index and --cached + test_all_match git apply --index --stat ../patch-outside && + test_all_match git status --porcelain=v2 && + test_all_match git reset --hard && + test_all_match git apply --cached --stat ../patch-outside && + test_all_match git status --porcelain=v2 +' + # When working with a sparse index, some commands will need to expand the # index to operate properly. If those commands also write the index back # to disk, they need to convert the index to sparse before writing. @@ -2345,6 +2369,35 @@ test_expect_success 'sparse-index is not expanded: check-attr' ' ensure_not_expanded check-attr -a --cached -- folder1/a ' +test_expect_success 'sparse-index is not expanded: git apply' ' + init_repos && + + git -C sparse-index checkout base && + git -C full-checkout diff base..merge-right -- deep >patch-in-sparse && + git -C full-checkout diff base..merge-right -- folder2 >patch-outside && + + # Apply a patch to a file inside the sparse definition + ensure_not_expanded apply --index --stat ../patch-in-sparse && + + # Apply a patch to a file outside the sparse definition + # Fails when caring about the worktree. + ensure_not_expanded ! apply ../patch-outside && + + # Expands when using --index. + ensure_expanded apply --index ../patch-outside && + + # Does not when index is partially expanded. + git -C sparse-index reset --hard && + ensure_not_expanded apply --cached ../patch-outside && + + # Try again with a reset and collapsed index. + git -C sparse-index reset --hard && + git -C sparse-index sparse-checkout reapply && + + # Expands when index is collapsed. + ensure_expanded apply --cached ../patch-outside +' + test_expect_success 'advice.sparseIndexExpanded' ' init_repos && From 0a2752721d0292c6e2527f7a95c846b280fda2a7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 1 May 2025 13:09:27 -0400 Subject: [PATCH 2/4] git add: make -p/-i aware of sparse index It is slow to expand a sparse index in-memory due to parsing of trees. We aim to minimize that performance cost when possible. 'git add -p' uses 'git apply' child processes to modify the index, but still there are some expansions that occur. It turns out that control flows out of cmd_add() in the interactive cases before the lines that confirm that the builtin is integrated with the sparse index. Moving that integration point earlier in cmd_add() allows 'git add -p' and 'git add -p' to operate without expanding a sparse index to a full one. Add test cases that confirm that these interactive add options work with the sparse index. Signed-off-by: Derrick Stolee --- builtin/add.c | 7 +-- t/t1092-sparse-checkout-compatibility.sh | 60 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 747511b68bc310..7c292ffdc6c2c9 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -390,6 +390,10 @@ int cmd_add(int argc, argc = parse_options(argc, argv, prefix, builtin_add_options, builtin_add_usage, PARSE_OPT_KEEP_ARGV0); + + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; + if (patch_interactive) add_interactive = 1; if (add_interactive) { @@ -426,9 +430,6 @@ int cmd_add(int argc, add_new_files = !take_worktree_changes && !refresh_only && !add_renormalize; require_pathspec = !(take_worktree_changes || (0 < addremove_explicit)); - prepare_repo_settings(repo); - repo->settings.command_requires_full_index = 0; - repo_hold_locked_index(repo, &lock_file, LOCK_DIE_ON_ERROR); /* diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 83353a7dbab43e..c419d8b57e846a 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -384,6 +384,38 @@ test_expect_success 'add, commit, checkout' ' test_all_match git checkout - ' +test_expect_success 'git add -p' ' + init_repos && + + write_script edit-contents <<-\EOF && + echo text >>$1 + EOF + + # Does not expand when edits are within sparse checkout. + run_on_all ../edit-contents deep/a && + run_on_all ../edit-contents deep/deeper1/a && + + test_write_lines y n >in && + run_on_all git add -p in && + run_on_all git add -i in && + run_on_all git add -p in && + run_on_all git add -i sparse-index/deep/a && + echo "new content" >sparse-index/deep/deeper1/a && + test_write_lines y n >in && + ensure_not_expanded add -p sparse-index/folder1/a && + test_write_lines y n y >in && + ensure_expanded add -p sparse-index/folder1/a && + test_write_lines u 2 3 "" q >in && + ensure_expanded add -i Date: Fri, 16 May 2025 09:12:38 -0400 Subject: [PATCH 3/4] reset: integrate sparse index with --patch Similar to the previous change for 'git add -p', the reset builtin checked for integration with the sparse index after possibly redirecting its logic toward the interactive logic. This means that the builtin would expand the sparse index to a full one upon read. Move this check earlier within cmd_reset() to improve performance here. Add tests to guarantee that we are not universally expanding the index. Add behavior tests to check that we are doing the same operations as a full index. Signed-off-by: Derrick Stolee --- builtin/reset.c | 6 ++-- t/t1092-sparse-checkout-compatibility.sh | 42 ++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/builtin/reset.c b/builtin/reset.c index 73b4537a9a567d..dc50ffc1ac59e8 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -420,6 +420,9 @@ int cmd_reset(int argc, oidcpy(&oid, &tree->object.oid); } + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + if (patch_mode) { if (reset_type != NONE) die(_("options '%s' and '%s' cannot be used together"), "--patch", "--{hard,mixed,soft}"); @@ -457,9 +460,6 @@ int cmd_reset(int argc, if (intent_to_add && reset_type != MIXED) die(_("the option '%s' requires '%s'"), "-N", "--mixed"); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; - if (repo_read_index(the_repository) < 0) die(_("index file corrupt")); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index c419d8b57e846a..d8101139b40aa0 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -384,7 +384,7 @@ test_expect_success 'add, commit, checkout' ' test_all_match git checkout - ' -test_expect_success 'git add -p' ' +test_expect_success 'git add, checkout, and reset with -p' ' init_repos && write_script edit-contents <<-\EOF && @@ -398,7 +398,7 @@ test_expect_success 'git add -p' ' test_write_lines y n >in && run_on_all git add -p in && run_on_all git add -i in && run_on_all git add -i in && + test_sparse_match git checkout HEAD~1 --patch sparse-index/deep/a && + echo "new content" >sparse-index/deep/deeper1/a && + git -C sparse-index commit -a -m "inside-changes" && + + test_write_lines y y >in && + ensure_not_expanded checkout HEAD~1 --patch sparse-index/deep/a && + echo "new content" >sparse-index/deep/deeper1/a && + git -C sparse-index add . && + ensure_not_expanded reset --patch sparse-index/folder1/a && + git -C sparse-index add --sparse folder1 && + git -C sparse-index sparse-checkout reapply && + ensure_expanded reset --patch sparse-index/folder1/a && + git -C sparse-index add --sparse folder1 && + git -C sparse-index commit -m "folder1 change" && + git -C sparse-index sparse-checkout reapply && + ensure_expanded checkout HEAD~1 --patch Date: Fri, 2 May 2025 10:42:15 -0400 Subject: [PATCH 4/4] p2000: add performance test for patch-mode commands The previous three changes contributed performance improvements to 'git apply', 'git add -p', and 'git reset -p' when using a sparse index. The improvement to 'git apply' also improved 'git checkout -p'. Add performance tests to demonstrate this (and to help validate that performance remains good in the future). In the truncated test output below, we see that the full checkout performance changes within noise expectations, but the sparse index cases improve 33% and then 96% for 'git add -p' and 41% and then 95% for 'git reset -p'. 'git checkout -p' improves immediatley by 91% because it does not need any change to its builtin. Test HEAD~4 HEAD~3 HEAD~2 HEAD~1 ------------------------------------------------------------------------------------- 2000.118: ... git add -p (full-v3) 0.79 0.79 +0.0% 0.82 +3.8% 0.82 +3.8% 2000.119: ... git add -p (full-v4) 0.74 0.76 +2.7% 0.74 +0.0% 0.76 +2.7% 2000.120: ... git add -p (sparse-v3) 1.94 1.28 -34.0% 0.07 -96.4% 0.07 -96.4% 2000.121: ... git add -p (sparse-v4) 1.93 1.28 -33.7% 0.06 -96.9% 0.06 -96.9% 2000.122: ... git checkout -p (full-v3) 1.18 1.18 +0.0% 1.18 +0.0% 1.19 +0.8% 2000.123: ... git checkout -p (full-v4) 1.10 1.12 +1.8% 1.11 +0.9% 1.11 +0.9% 2000.124: ... git checkout -p (sparse-v3) 1.31 0.11 -91.6% 0.11 -91.6% 0.11 -91.6% 2000.125: ... git checkout -p (sparse-v4) 1.29 0.11 -91.5% 0.11 -91.5% 0.11 -91.5% 2000.126: ... git reset -p (full-v3) 0.81 0.80 -1.2% 0.83 +2.5% 0.83 +2.5% 2000.127: ... git reset -p (full-v4) 0.78 0.77 -1.3% 0.77 -1.3% 0.78 +0.0% 2000.128: ... git reset -p (sparse-v3) 1.58 0.92 -41.8% 0.91 -42.4% 0.07 -95.6% 2000.129: ... git reset -p (sparse-v4) 1.58 0.92 -41.8% 0.92 -41.8% 0.07 -95.6% It is worth noting that if our test was more involved and had multiple hunks to evaluate, then the time spent in 'git apply' would dominate due to multiple index loads and writes. As it stands, we need the sparse index improvement in 'git add -p' itself to confirm this performance improvement. Since the change for 'git add -i' is identical, we avoid a second test case for that similar operation. Signed-off-by: Derrick Stolee --- t/perf/p2000-sparse-operations.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 39e92b0841437b..aadf22bc2f0bb2 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -135,5 +135,8 @@ test_perf_on_all git diff-tree HEAD test_perf_on_all git diff-tree HEAD -- $SPARSE_CONE/a test_perf_on_all "git worktree add ../temp && git worktree remove ../temp" test_perf_on_all git check-attr -a -- $SPARSE_CONE/a +test_perf_on_all 'echo >>a && test_write_lines y | git add -p' +test_perf_on_all 'test_write_lines y y y | git checkout --patch -' +test_perf_on_all 'echo >>a && git add a && test_write_lines y | git reset --patch' test_done