aboutsummaryrefslogtreecommitdiffstats
path: root/bloom.c
diff options
context:
space:
mode:
authorLidong Yan <yldhome2d2@gmail.com>2025-07-12 17:35:15 +0800
committerJunio C Hamano <gitster@pobox.com>2025-07-14 10:03:03 -0700
commit90d5518a7dd53ccc7d967a3a066d688da1d7e214 (patch)
treef133f8221a3c4b84e4b0d427ae0c35dddc2bb150 /bloom.c
parentb187353ed2b92745a903d321eaafac342a5df8d4 (diff)
downloadgit-90d5518a7dd53ccc7d967a3a066d688da1d7e214.tar.gz
bloom: replace struct bloom_key * with struct bloom_keyvec
Previously, we stored bloom keys in a flat array and marked a commit as NOT TREESAME if any key reported "definitely not changed". To support multiple pathspec items, we now require that for each pathspec item, there exists a bloom key reporting "definitely not changed". This "for every" condition makes a flat array insufficient, so we introduce a new structure to group keys by a single pathspec item. `struct bloom_keyvec` is introduced to replace `struct bloom_key *` and `bloom_key_nr`. And because we want to support multiple pathspec items, we added a bloom_keyvec * and a bloom_keyvec_nr field to `struct rev_info` to represent an array of bloom_keyvecs. This commit still optimize only one pathspec item, thus bloom_keyvec_nr can only be 0 or 1. New bloom_keyvec_* functions are added to create and destroy a keyvec. bloom_filter_contains_vec() is added to check if all key in keyvec is contained in a bloom filter. Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'bloom.c')
-rw-r--r--bloom.c61
1 files changed, 61 insertions, 0 deletions
diff --git a/bloom.c b/bloom.c
index 5523d198c8..b86015f6d1 100644
--- a/bloom.c
+++ b/bloom.c
@@ -278,6 +278,55 @@ void deinit_bloom_filters(void)
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
}
+struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
+ const struct bloom_filter_settings *settings)
+{
+ struct bloom_keyvec *vec;
+ const char *p;
+ size_t sz;
+ size_t nr = 1;
+
+ p = path;
+ while (*p) {
+ /*
+ * At this point, the path is normalized to use Unix-style
+ * path separators. This is required due to how the
+ * changed-path Bloom filters store the paths.
+ */
+ if (*p == '/')
+ nr++;
+ p++;
+ }
+
+ sz = sizeof(struct bloom_keyvec);
+ sz += nr * sizeof(struct bloom_key);
+ vec = (struct bloom_keyvec *)xcalloc(1, sz);
+ if (!vec)
+ return NULL;
+ vec->count = nr;
+
+ bloom_key_fill(&vec->key[0], path, len, settings);
+ nr = 1;
+ p = path + len - 1;
+ while (p > path) {
+ if (*p == '/') {
+ bloom_key_fill(&vec->key[nr++], path, p - path, settings);
+ }
+ p--;
+ }
+ assert(nr == vec->count);
+ return vec;
+}
+
+void bloom_keyvec_free(struct bloom_keyvec *vec)
+{
+ if (!vec)
+ return;
+ for (size_t nr = 0; nr < vec->count; nr++)
+ bloom_key_clear(&vec->key[nr]);
+ free(vec);
+}
+
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
const struct hashmap_entry *eptr,
const struct hashmap_entry *entry_or_key,
@@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
return 1;
}
+int bloom_filter_contains_vec(const struct bloom_filter *filter,
+ const struct bloom_keyvec *vec,
+ const struct bloom_filter_settings *settings)
+{
+ int ret = 1;
+
+ for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
+ ret = bloom_filter_contains(filter, &vec->key[nr], settings);
+
+ return ret;
+}
+
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
int version)
{