aboutsummaryrefslogtreecommitdiffstats
path: root/xdiff/xhistogram.c
diff options
context:
space:
mode:
authorEzekiel Newren <ezekielnewren@gmail.com>2025-11-18 22:34:18 +0000
committerJunio C Hamano <gitster@pobox.com>2025-11-18 14:53:10 -0800
commit6a26019c81faa07ba811541b4cf35be9e8ee1ead (patch)
tree1f1c230dd72da18f65389c727faaa6c0d4f9bbc5 /xdiff/xhistogram.c
parentb0d4ae30f5a23fa9da87e9396b78e6442b351ddc (diff)
downloadgit-6a26019c81faa07ba811541b4cf35be9e8ee1ead.tar.gz
xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash
The ha field is serving two different purposes, which makes the code harder to read. At first glance, it looks like many places assume there could never be hash collisions between lines of the two input files. In reality, line_hash is used together with xdl_recmatch() to ensure correct comparisons of lines, even when collisions occur. To make this clearer, the old ha field has been split: * line_hash: a straightforward hash of a line, independent of any external context. Its type is uint64_t, as it comes from a fixed width hash function. * minimal_perfect_hash: Not a new concept, but now a separate field. It comes from the classifier's general-purpose hash table, which assigns each line a unique and minimal hash across the two files. A size_t is used here because it's meant to be used to index an array. This also avoids ` as usize` casts on the Rust side when using it to index a slice. Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'xdiff/xhistogram.c')
-rw-r--r--xdiff/xhistogram.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 6dc450b1fe..5ae1282c27 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -90,7 +90,7 @@ struct region {
static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
{
- return r1->ha == r2->ha;
+ return r1->minimal_perfect_hash == r2->minimal_perfect_hash;
}
@@ -98,7 +98,7 @@ static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
(cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2)))
#define TABLE_HASH(index, side, line) \
- XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits)
+ XDL_HASHLONG((REC(index->env, side, line))->minimal_perfect_hash, index->table_bits)
static int scanA(struct histindex *index, int line1, int count1)
{