From 7b081d2f70feb7eadd1e93f52146e5d68371451d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:29 -0500 Subject: hash.h: introduce `unsafe_hash_algo()` In 253ed9ecff (hash.h: scaffolding for _unsafe hashing variants, 2024-09-26), we introduced "unsafe" variants of the SHA-1 hashing functions by introducing new functions like "unsafe_init_fn()" and so on. This approach has a major shortcoming that callers must remember to consistently use one variant or the other. Failing to consistently use (or not use) the unsafe variants can lead to crashes at best, or subtle memory corruption issues at worst. In the hashfile API, this isn't difficult to achieve, but verifying that all callers consistently use the unsafe variants is somewhat of a chore given how spread out all of the callers are. In the sha1 and sha1-unsafe test helpers, all of the calls to various hash functions are guarded by an "if (unsafe)" conditional, which is repetitive and cumbersome. Address these issues by introducing a new pattern whereby one 'git_hash_algo' can return a pointer to another 'git_hash_algo' that represents the unsafe version of itself. So instead of having something like: if (unsafe) the_hash_algo->init_fn(...); the_hash_algo->update_fn(...); the_hash_algo->final_fn(...); else the_hash_algo->unsafe_init_fn(...); the_hash_algo->unsafe_update_fn(...); the_hash_algo->unsafe_final_fn(...); we can instead write: struct git_hash_algo *algop = the_hash_algo; if (unsafe) algop = unsafe_hash_algo(algop); algop->init_fn(...); algop->update_fn(...); algop->final_fn(...); This removes the existing shortcoming by no longer forcing the caller to "remember" which variant of the hash functions it wants to call, only to hold onto a 'struct git_hash_algo' pointer that is initialized once. Similarly, while there currently is still a way to "mix" safe and unsafe functions, this too will go away after subsequent commits remove all direct calls to the unsafe_ variants. Note that hash_algo_by_ptr() needs an adjustment to allow passing in the unsafe variant of a hash function. All other query functions on the hash_algos array will continue to return the safe variants of any function. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- object-file.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 5b792b3dd4..43efa4ca36 100644 --- a/object-file.c +++ b/object-file.c @@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, BUG("trying to finalize unknown hash"); } +static const struct git_hash_algo sha1_unsafe_algo = { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init_unsafe, + .clone_fn = git_hash_sha1_clone_unsafe, + .update_fn = git_hash_sha1_update_unsafe, + .final_fn = git_hash_sha1_final_unsafe, + .final_oid_fn = git_hash_sha1_final_oid_unsafe, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, +}; + const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { { .name = NULL, @@ -239,6 +255,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .unsafe_update_fn = git_hash_sha1_update_unsafe, .unsafe_final_fn = git_hash_sha1_final_unsafe, .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, + .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, .null_oid = &null_oid_sha1, @@ -305,6 +322,15 @@ int hash_algo_by_length(int len) return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) +{ + /* If we have a faster "unsafe" implementation, use that. */ + if (algop->unsafe) + return algop->unsafe; + /* Otherwise use the default one. */ + return algop; +} + /* * This is meant to hold a *small* number of objects that you would * want repo_read_object_file() to be able to return, but yet you do not want -- cgit 1.2.3-korg From 04292c3796bb92664f6111326215d9c060ef71c8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:42 -0500 Subject: hash.h: drop unsafe_ function variants Now that all callers have been converted from: the_hash_algo->unsafe_init_fn(); to unsafe_hash_algo(the_hash_algo)->init_fn(); and similar, we can remove the scaffolding for the unsafe_ function variants and force callers to use the new unsafe_hash_algo() mechanic instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- hash.h | 15 --------------- object-file.c | 15 --------------- 2 files changed, 30 deletions(-) (limited to 'object-file.c') diff --git a/hash.h b/hash.h index 0bf63cedfa..ad2c919991 100644 --- a/hash.h +++ b/hash.h @@ -282,21 +282,6 @@ struct git_hash_algo { /* The hash finalization function for object IDs. */ git_hash_final_oid_fn final_oid_fn; - /* The non-cryptographic hash initialization function. */ - git_hash_init_fn unsafe_init_fn; - - /* The non-cryptographic hash context cloning function. */ - git_hash_clone_fn unsafe_clone_fn; - - /* The non-cryptographic hash update function. */ - git_hash_update_fn unsafe_update_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_fn unsafe_final_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_oid_fn unsafe_final_oid_fn; - /* The OID of the empty tree. */ const struct object_id *empty_tree; diff --git a/object-file.c b/object-file.c index 43efa4ca36..c4b42dd4be 100644 --- a/object-file.c +++ b/object-file.c @@ -230,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_unknown_update, .final_fn = git_hash_unknown_final, .final_oid_fn = git_hash_unknown_final_oid, - .unsafe_init_fn = git_hash_unknown_init, - .unsafe_clone_fn = git_hash_unknown_clone, - .unsafe_update_fn = git_hash_unknown_update, - .unsafe_final_fn = git_hash_unknown_final, - .unsafe_final_oid_fn = git_hash_unknown_final_oid, .empty_tree = NULL, .empty_blob = NULL, .null_oid = NULL, @@ -250,11 +245,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha1_update, .final_fn = git_hash_sha1_final, .final_oid_fn = git_hash_sha1_final_oid, - .unsafe_init_fn = git_hash_sha1_init_unsafe, - .unsafe_clone_fn = git_hash_sha1_clone_unsafe, - .unsafe_update_fn = git_hash_sha1_update_unsafe, - .unsafe_final_fn = git_hash_sha1_final_unsafe, - .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, @@ -271,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha256_update, .final_fn = git_hash_sha256_final, .final_oid_fn = git_hash_sha256_final_oid, - .unsafe_init_fn = git_hash_sha256_init, - .unsafe_clone_fn = git_hash_sha256_clone, - .unsafe_update_fn = git_hash_sha256_update, - .unsafe_final_fn = git_hash_sha256_final, - .unsafe_final_oid_fn = git_hash_sha256_final_oid, .empty_tree = &empty_tree_oid_sha256, .empty_blob = &empty_blob_oid_sha256, .null_oid = &null_oid_sha256, -- cgit 1.2.3-korg