diff options
| author | Lidong Yan <yldhome2d2@gmail.com> | 2025-07-12 17:35:15 +0800 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-07-14 10:03:03 -0700 |
| commit | 90d5518a7dd53ccc7d967a3a066d688da1d7e214 (patch) | |
| tree | f133f8221a3c4b84e4b0d427ae0c35dddc2bb150 /bloom.c | |
| parent | b187353ed2b92745a903d321eaafac342a5df8d4 (diff) | |
| download | git-90d5518a7dd53ccc7d967a3a066d688da1d7e214.tar.gz | |
bloom: replace struct bloom_key * with struct bloom_keyvec
Previously, we stored bloom keys in a flat array and marked a commit
as NOT TREESAME if any key reported "definitely not changed".
To support multiple pathspec items, we now require that for each
pathspec item, there exists a bloom key reporting "definitely not
changed".
This "for every" condition makes a flat array insufficient, so we
introduce a new structure to group keys by a single pathspec item.
`struct bloom_keyvec` is introduced to replace `struct bloom_key *`
and `bloom_key_nr`. And because we want to support multiple pathspec
items, we added a bloom_keyvec * and a bloom_keyvec_nr field to
`struct rev_info` to represent an array of bloom_keyvecs. This commit
still optimize only one pathspec item, thus bloom_keyvec_nr can only
be 0 or 1.
New bloom_keyvec_* functions are added to create and destroy a keyvec.
bloom_filter_contains_vec() is added to check if all key in keyvec is
contained in a bloom filter.
Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'bloom.c')
| -rw-r--r-- | bloom.c | 61 |
1 files changed, 61 insertions, 0 deletions
@@ -278,6 +278,55 @@ void deinit_bloom_filters(void) deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter); } +struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, + const struct bloom_filter_settings *settings) +{ + struct bloom_keyvec *vec; + const char *p; + size_t sz; + size_t nr = 1; + + p = path; + while (*p) { + /* + * At this point, the path is normalized to use Unix-style + * path separators. This is required due to how the + * changed-path Bloom filters store the paths. + */ + if (*p == '/') + nr++; + p++; + } + + sz = sizeof(struct bloom_keyvec); + sz += nr * sizeof(struct bloom_key); + vec = (struct bloom_keyvec *)xcalloc(1, sz); + if (!vec) + return NULL; + vec->count = nr; + + bloom_key_fill(&vec->key[0], path, len, settings); + nr = 1; + p = path + len - 1; + while (p > path) { + if (*p == '/') { + bloom_key_fill(&vec->key[nr++], path, p - path, settings); + } + p--; + } + assert(nr == vec->count); + return vec; +} + +void bloom_keyvec_free(struct bloom_keyvec *vec) +{ + if (!vec) + return; + for (size_t nr = 0; nr < vec->count; nr++) + bloom_key_clear(&vec->key[nr]); + free(vec); +} + static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, const struct hashmap_entry *eptr, const struct hashmap_entry *entry_or_key, @@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter, return 1; } +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *vec, + const struct bloom_filter_settings *settings) +{ + int ret = 1; + + for (size_t nr = 0; ret > 0 && nr < vec->count; nr++) + ret = bloom_filter_contains(filter, &vec->key[nr], settings); + + return ret; +} + uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, int version) { |
