From 24f951a4924e72e51d4b658084902390f341ec2e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 9 Sep 2020 11:23:03 -0400 Subject: t/helper/test-read-graph.c: prepare repo settings The read-graph test-tool is used by a number of the commit-graph test to assert various properties about a commit-graph. Previously, this program never ran 'prepare_repo_settings()'. There was no need to do so, since none of the commit-graph machinery is affected by the repo settings. In the next patch, the commit-graph machinery's behavior will become dependent on the repo settings, and so loading them before running the rest of the test tool is critical. As such, teach the test tool to call 'prepare_repo_settings()'. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-read-graph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 't/helper') diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c index 6d0c962438..5f585a1725 100644 --- a/t/helper/test-read-graph.c +++ b/t/helper/test-read-graph.c @@ -12,11 +12,12 @@ int cmd__read_graph(int argc, const char **argv) setup_git_directory(); odb = the_repository->objects->odb; + prepare_repo_settings(the_repository); + graph = read_commit_graph_one(the_repository, odb); if (!graph) return 1; - printf("header: %08x %d %d %d %d\n", ntohl(*(uint32_t*)graph->data), *(unsigned char*)(graph->data + 4), -- cgit 1.2.3-korg From 312cff520742c933bde070be18c51c27e132cff1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 16 Sep 2020 14:07:32 -0400 Subject: bloom: split 'get_bloom_filter()' in two 'get_bloom_filter' takes a flag to control whether it will compute a Bloom filter if the requested one is missing. In the next patch, we'll add yet another parameter to this method, which would force all but one caller to specify an extra 'NULL' parameter at the end. Instead of doing this, split 'get_bloom_filter' into two functions: 'get_bloom_filter' and 'get_or_compute_bloom_filter'. The former only looks up a Bloom filter (and does not compute one if it's missing, thus dropping the 'compute_if_not_present' flag). The latter does compute missing Bloom filters, with an additional parameter to store whether or not it needed to do so. This simplifies many call-sites, since the majority of existing callers to 'get_bloom_filter' do not want missing Bloom filters to be computed (so they can drop the parameter entirely and use the simpler version of the function). While we're at it, instrument the new 'get_or_compute_bloom_filter()' with counters in the 'write_commit_graph_context' struct which store the number of filters that we did and didn't compute, as well as filters that were truncated. It would be nice to drop the 'compute_if_not_present' flag entirely, since all remaining callers of 'get_or_compute_bloom_filter' pass it as '1', but this will change in a future patch and hence cannot be removed. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- blame.c | 2 +- bloom.c | 16 +++++++++++++--- bloom.h | 16 +++++++++++++--- commit-graph.c | 34 +++++++++++++++++++++++++++++++--- line-log.c | 2 +- revision.c | 2 +- t/helper/test-bloom.c | 3 ++- 7 files changed, 62 insertions(+), 13 deletions(-) (limited to 't/helper') diff --git a/blame.c b/blame.c index 3e5f8787bc..756285fca7 100644 --- a/blame.c +++ b/blame.c @@ -1275,7 +1275,7 @@ static int maybe_changed_path(struct repository *r, if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY) return 1; - filter = get_bloom_filter(r, origin->commit, 0); + filter = get_bloom_filter(r, origin->commit); if (!filter) return 1; diff --git a/bloom.c b/bloom.c index cd9380ac62..393c61b4bc 100644 --- a/bloom.c +++ b/bloom.c @@ -177,9 +177,10 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data, return strcmp(e1->path, e2->path); } -struct bloom_filter *get_bloom_filter(struct repository *r, - struct commit *c, - int compute_if_not_present) +struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, + struct commit *c, + int compute_if_not_present, + enum bloom_filter_computed *computed) { struct bloom_filter *filter; struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; @@ -187,6 +188,9 @@ struct bloom_filter *get_bloom_filter(struct repository *r, struct diff_options diffopt; int max_changes = 512; + if (computed) + *computed = BLOOM_NOT_COMPUTED; + if (!bloom_filters.slab_size) return NULL; @@ -271,8 +275,14 @@ struct bloom_filter *get_bloom_filter(struct repository *r, diff_free_filepair(diff_queued_diff.queue[i]); filter->data = NULL; filter->len = 0; + + if (computed) + *computed |= BLOOM_TRUNC_LARGE; } + if (computed) + *computed |= BLOOM_COMPUTED; + free(diff_queued_diff.queue); DIFF_QUEUE_CLEAR(&diff_queued_diff); diff --git a/bloom.h b/bloom.h index 0b9b59a6fe..e2e035ad14 100644 --- a/bloom.h +++ b/bloom.h @@ -89,9 +89,19 @@ void add_key_to_filter(const struct bloom_key *key, void init_bloom_filters(void); -struct bloom_filter *get_bloom_filter(struct repository *r, - struct commit *c, - int compute_if_not_present); +enum bloom_filter_computed { + BLOOM_NOT_COMPUTED = (1 << 0), + BLOOM_COMPUTED = (1 << 1), + BLOOM_TRUNC_LARGE = (1 << 2), +}; + +struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, + struct commit *c, + int compute_if_not_present, + enum bloom_filter_computed *computed); + +#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \ + (r), (c), 0, NULL) int bloom_filter_contains(const struct bloom_filter *filter, const struct bloom_key *key, diff --git a/commit-graph.c b/commit-graph.c index 44dceb8045..67a2812e79 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -965,6 +965,10 @@ struct write_commit_graph_context { const struct split_commit_graph_opts *split_opts; size_t total_bloom_filter_data_size; const struct bloom_filter_settings *bloom_settings; + + int count_bloom_filter_computed; + int count_bloom_filter_not_computed; + int count_bloom_filter_trunc_large; }; static int write_graph_chunk_fanout(struct hashfile *f, @@ -1176,7 +1180,7 @@ static int write_graph_chunk_bloom_indexes(struct hashfile *f, uint32_t cur_pos = 0; while (list < last) { - struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); + struct bloom_filter *filter = get_bloom_filter(ctx->r, *list); size_t len = filter ? filter->len : 0; cur_pos += len; display_progress(ctx->progress, ++ctx->progress_cnt); @@ -1216,7 +1220,7 @@ static int write_graph_chunk_bloom_data(struct hashfile *f, hashwrite_be32(f, ctx->bloom_settings->bits_per_entry); while (list < last) { - struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); + struct bloom_filter *filter = get_bloom_filter(ctx->r, *list); size_t len = filter ? filter->len : 0; display_progress(ctx->progress, ++ctx->progress_cnt); @@ -1386,6 +1390,16 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx) +{ + trace2_data_intmax("commit-graph", ctx->r, "filter-computed", + ctx->count_bloom_filter_computed); + trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed", + ctx->count_bloom_filter_not_computed); + trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large", + ctx->count_bloom_filter_trunc_large); +} + static void compute_bloom_filters(struct write_commit_graph_context *ctx) { int i; @@ -1408,12 +1422,26 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp); for (i = 0; i < ctx->commits.nr; i++) { + enum bloom_filter_computed computed = 0; struct commit *c = sorted_commits[i]; - struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1); + struct bloom_filter *filter = get_or_compute_bloom_filter( + ctx->r, + c, + 1, + &computed); + if (computed & BLOOM_COMPUTED) { + ctx->count_bloom_filter_computed++; + if (computed & BLOOM_TRUNC_LARGE) + ctx->count_bloom_filter_trunc_large++; + } else if (computed & BLOOM_NOT_COMPUTED) + ctx->count_bloom_filter_not_computed++; ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len; display_progress(progress, i + 1); } + if (trace2_is_enabled()) + trace2_bloom_filter_write_statistics(ctx); + free(sorted_commits); stop_progress(&progress); } diff --git a/line-log.c b/line-log.c index c53692834d..9e58fd185a 100644 --- a/line-log.c +++ b/line-log.c @@ -1159,7 +1159,7 @@ static int bloom_filter_check(struct rev_info *rev, return 1; if (!rev->bloom_filter_settings || - !(filter = get_bloom_filter(rev->repo, commit, 0))) + !(filter = get_bloom_filter(rev->repo, commit))) return 1; if (!range) diff --git a/revision.c b/revision.c index c45ed1076e..b7ec712755 100644 --- a/revision.c +++ b/revision.c @@ -753,7 +753,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY) return -1; - filter = get_bloom_filter(revs->repo, commit, 0); + filter = get_bloom_filter(revs->repo, commit); if (!filter) { count_bloom_filter_not_present++; diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index f0aa80b98e..531af439c2 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -39,7 +39,8 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid) struct bloom_filter *filter; setup_git_directory(); c = lookup_commit(the_repository, commit_oid); - filter = get_bloom_filter(the_repository, c, 1); + filter = get_or_compute_bloom_filter(the_repository, c, 1, + NULL); print_bloom_filter(filter); } -- cgit 1.2.3-korg From 9a7a9ed10d56d6c22a0f16d7baf3f9895c47d693 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 16 Sep 2020 14:07:46 -0400 Subject: bloom: use provided 'struct bloom_filter_settings' When 'get_or_compute_bloom_filter()' needs to compute a Bloom filter from scratch, it looks to the default 'struct bloom_filter_settings' in order to determine the maximum number of changed paths, number of bits per entry, and so on. All of these values have so far been constant, and so there was no need to pass in a pointer from the caller (eg., the one that is stored in the 'struct write_commit_graph_context'). Start passing in a 'struct bloom_filter_settings *' instead of using the default values to respect graph-specific settings (eg., in the case of setting 'GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS'). In order to have an initialized value for these settings, move its initialization to earlier in the commit-graph write. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- bloom.c | 13 ++++++------- bloom.h | 3 ++- commit-graph.c | 21 ++++++++++----------- t/helper/test-bloom.c | 1 + 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 't/helper') diff --git a/bloom.c b/bloom.c index 393c61b4bc..2d6aef9098 100644 --- a/bloom.c +++ b/bloom.c @@ -180,13 +180,12 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data, struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct commit *c, int compute_if_not_present, + const struct bloom_filter_settings *settings, enum bloom_filter_computed *computed) { struct bloom_filter *filter; - struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; int i; struct diff_options diffopt; - int max_changes = 512; if (computed) *computed = BLOOM_NOT_COMPUTED; @@ -211,7 +210,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; diffopt.detect_rename = 0; - diffopt.max_changes = max_changes; + diffopt.max_changes = settings->max_changed_paths; diff_setup_done(&diffopt); /* ensure commit is parsed so we have parent information */ @@ -223,7 +222,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, diff_tree_oid(NULL, &c->object.oid, "", &diffopt); diffcore_std(&diffopt); - if (diffopt.num_changes <= max_changes) { + if (diffopt.num_changes <= settings->max_changed_paths) { struct hashmap pathmap; struct pathmap_hash_entry *e; struct hashmap_iter iter; @@ -260,13 +259,13 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, diff_free_filepair(diff_queued_diff.queue[i]); } - filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; + filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; filter->data = xcalloc(filter->len, sizeof(unsigned char)); hashmap_for_each_entry(&pathmap, &iter, e, entry) { struct bloom_key key; - fill_bloom_key(e->path, strlen(e->path), &key, &settings); - add_key_to_filter(&key, filter, &settings); + fill_bloom_key(e->path, strlen(e->path), &key, settings); + add_key_to_filter(&key, filter, settings); } hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry); diff --git a/bloom.h b/bloom.h index e2e035ad14..c6d77e8393 100644 --- a/bloom.h +++ b/bloom.h @@ -98,10 +98,11 @@ enum bloom_filter_computed { struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct commit *c, int compute_if_not_present, + const struct bloom_filter_settings *settings, enum bloom_filter_computed *computed); #define get_bloom_filter(r, c) get_or_compute_bloom_filter( \ - (r), (c), 0, NULL) + (r), (c), 0, NULL, NULL) int bloom_filter_contains(const struct bloom_filter *filter, const struct bloom_key *key, diff --git a/commit-graph.c b/commit-graph.c index 67a2812e79..4d6ce2967e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1428,6 +1428,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) ctx->r, c, 1, + ctx->bloom_settings, &computed); if (computed & BLOOM_COMPUTED) { ctx->count_bloom_filter_computed++; @@ -1685,17 +1686,6 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) int num_chunks = 3; uint64_t chunk_offset; struct object_id file_hash; - struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; - - if (!ctx->bloom_settings) { - bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", - bloom_settings.bits_per_entry); - bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", - bloom_settings.num_hashes); - bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS", - bloom_settings.max_changed_paths); - ctx->bloom_settings = &bloom_settings; - } if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -2141,6 +2131,7 @@ int write_commit_graph(struct object_directory *odb, uint32_t i, count_distinct = 0; int res = 0; int replace = 0; + struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS; if (!commit_graph_compatible(the_repository)) return 0; @@ -2154,6 +2145,14 @@ int write_commit_graph(struct object_directory *odb, ctx->split_opts = split_opts; ctx->total_bloom_filter_data_size = 0; + bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY", + bloom_settings.bits_per_entry); + bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES", + bloom_settings.num_hashes); + bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS", + bloom_settings.max_changed_paths); + ctx->bloom_settings = &bloom_settings; + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) ctx->changed_paths = 1; if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index 531af439c2..4af949164c 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -40,6 +40,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid) setup_git_directory(); c = lookup_commit(the_repository, commit_oid); filter = get_or_compute_bloom_filter(the_repository, c, 1, + &settings, NULL); print_bloom_filter(filter); } -- cgit 1.2.3-korg