aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTaylor Blau <me@ttaylorr.com>2024-06-25 13:39:13 -0400
committerJunio C Hamano <gitster@pobox.com>2024-06-25 13:52:05 -0700
commit1343c893138e63e35b38324387ea790aba77a49f (patch)
tree912f7acd6190a0801b99e96676ba528c5687d049
parentf88611c6d06ce1d2b7fe0e0ca440ff0fbbd45da8 (diff)
downloadgit-1343c893138e63e35b38324387ea790aba77a49f.tar.gz
revision.c: consult Bloom filters for root commits
The commit-graph stores changed-path Bloom filters which represent the set of paths included in a tree-level diff between a commit's root tree and that of its parent. When a commit has no parents, the tree-diff is computed against that commit's root tree and the empty tree. In other words, every path in that commit's tree is stored in the Bloom filter (since they all appear in the diff). Consult these filters during pathspec-limited traversals in the function `rev_same_tree_as_empty()`. Doing so yields a performance improvement where we can avoid enumerating the full set of paths in a parentless commit's root tree when we know that the path(s) of interest were not listed in that commit's changed-path Bloom filter. Suggested-by: SZEDER Gábor <szeder.dev@gmail.com> Original-patch-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--revision.c26
-rwxr-xr-xt/t4216-log-bloom.sh8
2 files changed, 28 insertions, 6 deletions
diff --git a/revision.c b/revision.c
index 2424c9bd67..0e6f7d02b6 100644
--- a/revision.c
+++ b/revision.c
@@ -833,17 +833,28 @@ static int rev_compare_tree(struct rev_info *revs,
return tree_difference;
}
-static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
+static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
+ int nth_parent)
{
struct tree *t1 = repo_get_commit_tree(the_repository, commit);
+ int bloom_ret = -1;
if (!t1)
return 0;
+ if (!nth_parent && revs->bloom_keys_nr) {
+ bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
+ if (!bloom_ret)
+ return 1;
+ }
+
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
+ if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
+ count_bloom_filter_false_positive++;
+
return tree_difference == REV_TREE_SAME;
}
@@ -881,7 +892,7 @@ static int compact_treesame(struct rev_info *revs, struct commit *commit, unsign
if (nth_parent != 0)
die("compact_treesame %u", nth_parent);
old_same = !!(commit->object.flags & TREESAME);
- if (rev_same_tree_as_empty(revs, commit))
+ if (rev_same_tree_as_empty(revs, commit, nth_parent))
commit->object.flags |= TREESAME;
else
commit->object.flags &= ~TREESAME;
@@ -977,7 +988,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
return;
if (!commit->parents) {
- if (rev_same_tree_as_empty(revs, commit))
+ /*
+ * Pretend as if we are comparing ourselves to the
+ * (non-existent) first parent of this commit object. Even
+ * though no such parent exists, its changed-path Bloom filter
+ * (if one exists) is relative to the empty tree, using Bloom
+ * filters is allowed here.
+ */
+ if (rev_same_tree_as_empty(revs, commit, 0))
commit->object.flags |= TREESAME;
return;
}
@@ -1058,7 +1076,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
case REV_TREE_NEW:
if (revs->remove_empty_trees &&
- rev_same_tree_as_empty(revs, p)) {
+ rev_same_tree_as_empty(revs, p, nth_parent)) {
/* We are adding all the specified
* paths from this parent, so the
* history beyond this parent is not
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index b7baf49d62..cc6ebc8140 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -88,7 +88,11 @@ test_bloom_filters_not_used () {
# if the Bloom filter system is initialized, ensure that no
# filters were used
data="statistics:{"
- data="$data\"filter_not_present\":0,"
+ # unusable filters (e.g., those computed with a
+ # different value of commitGraph.changedPathsVersion)
+ # are counted in the filter_not_present bucket, so any
+ # value is OK there.
+ data="$data\"filter_not_present\":[0-9][0-9]*,"
data="$data\"maybe\":0,"
data="$data\"definitely_not\":0,"
data="$data\"false_positive\":0}"
@@ -175,7 +179,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1
- bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
+ bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":10"
setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom