diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-07-23 15:45:15 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-07-23 15:45:15 -0700 |
| commit | f22d4ac4fd50b55c88142dfd15a361680cf3fb40 (patch) | |
| tree | 92a3be8ed47f75e15d2bda0932b6e4ca45d6d064 /bloom.c | |
| parent | 0e8243a355a69035dac269528b49dc8c9bc81f8a (diff) | |
| parent | 2a6ce090f27016d68ee6952809d98fe88ce53522 (diff) | |
| download | git-f22d4ac4fd50b55c88142dfd15a361680cf3fb40.tar.gz | |
Merge branch 'ly/changed-paths-traversal'
Lift the limitation to use changed-path filter in "git log" so that
it can be used for a pathspec with multiple literal paths.
* ly/changed-paths-traversal:
bloom: optimize multiple pathspec items in revision
revision: make helper for pathspec to bloom keyvec
bloom: replace struct bloom_key * with struct bloom_keyvec
bloom: rename function operates on bloom_key
bloom: add test helper to return murmur3 hash
Diffstat (limited to 'bloom.c')
| -rw-r--r-- | bloom.c | 84 |
1 files changed, 77 insertions, 7 deletions
@@ -107,7 +107,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g, * Not considered to be cryptographically secure. * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm */ -uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len) +static uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len) { const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; @@ -221,9 +221,7 @@ static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len) return seed; } -void fill_bloom_key(const char *data, - size_t len, - struct bloom_key *key, +void bloom_key_fill(struct bloom_key *key, const char *data, size_t len, const struct bloom_filter_settings *settings) { int i; @@ -243,7 +241,7 @@ void fill_bloom_key(const char *data, key->hashes[i] = hash0 + i * hash1; } -void clear_bloom_key(struct bloom_key *key) +void bloom_key_clear(struct bloom_key *key) { FREE_AND_NULL(key->hashes); } @@ -280,6 +278,55 @@ void deinit_bloom_filters(void) deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter); } +struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, + const struct bloom_filter_settings *settings) +{ + struct bloom_keyvec *vec; + const char *p; + size_t sz; + size_t nr = 1; + + p = path; + while (*p) { + /* + * At this point, the path is normalized to use Unix-style + * path separators. This is required due to how the + * changed-path Bloom filters store the paths. + */ + if (*p == '/') + nr++; + p++; + } + + sz = sizeof(struct bloom_keyvec); + sz += nr * sizeof(struct bloom_key); + vec = (struct bloom_keyvec *)xcalloc(1, sz); + if (!vec) + return NULL; + vec->count = nr; + + bloom_key_fill(&vec->key[0], path, len, settings); + nr = 1; + p = path + len - 1; + while (p > path) { + if (*p == '/') { + bloom_key_fill(&vec->key[nr++], path, p - path, settings); + } + p--; + } + assert(nr == vec->count); + return vec; +} + +void bloom_keyvec_free(struct bloom_keyvec *vec) +{ + if (!vec) + return; + for (size_t nr = 0; nr < vec->count; nr++) + bloom_key_clear(&vec->key[nr]); + free(vec); +} + static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, const struct hashmap_entry *eptr, const struct hashmap_entry *entry_or_key, @@ -500,9 +547,9 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, hashmap_for_each_entry(&pathmap, &iter, e, entry) { struct bloom_key key; - fill_bloom_key(e->path, strlen(e->path), &key, settings); + bloom_key_fill(&key, e->path, strlen(e->path), settings); add_key_to_filter(&key, filter, settings); - clear_bloom_key(&key); + bloom_key_clear(&key); } cleanup: @@ -540,3 +587,26 @@ int bloom_filter_contains(const struct bloom_filter *filter, return 1; } + +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *vec, + const struct bloom_filter_settings *settings) +{ + int ret = 1; + + for (size_t nr = 0; ret > 0 && nr < vec->count; nr++) + ret = bloom_filter_contains(filter, &vec->key[nr], settings); + + return ret; +} + +uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, + int version) +{ + assert(version == 1 || version == 2); + + if (version == 2) + return murmur3_seeded_v2(seed, data, len); + else + return murmur3_seeded_v1(seed, data, len); +} |
