diff options
| author | Junio C Hamano <gitster@pobox.com> | 2024-05-30 14:15:13 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2024-05-30 14:15:14 -0700 |
| commit | 6c5be97e4eee040a2d5303e5650fa7cc8a37dbd8 (patch) | |
| tree | e473e1ea7754091c4e4b13e1af93c9cc3638dcba | |
| parent | 988499e2955f052fa5f58434e13d12285cb8a361 (diff) | |
| parent | 4674ab682dc1a875fd29de8f4e9568196a88b97b (diff) | |
| download | git-6c5be97e4eee040a2d5303e5650fa7cc8a37dbd8.tar.gz | |
Merge branch 'jc/undecided-is-not-necessarily-sha1-fix'
The base topic started to make it an error for a command to leave
the hash algorithm unspecified, which revealed a few commands that
were not ready for the change. Give users a knob to revert back to
the "default is sha-1" behaviour as an escape hatch, and start
fixing these breakages.
* jc/undecided-is-not-necessarily-sha1-fix:
apply: fix uninitialized hash function
builtin/hash-object: fix uninitialized hash function
builtin/patch-id: fix uninitialized hash function
t1517: test commands that are designed to be run outside repository
setup: add an escape hatch for "no more default hash algorithm" change
| -rw-r--r-- | builtin/apply.c | 10 | ||||
| -rw-r--r-- | builtin/hash-object.c | 3 | ||||
| -rw-r--r-- | builtin/patch-id.c | 13 | ||||
| -rw-r--r-- | repository.c | 44 | ||||
| -rwxr-xr-x | t/t1007-hash-object.sh | 6 | ||||
| -rwxr-xr-x | t/t1517-outside-repo.sh | 59 | ||||
| -rwxr-xr-x | t/t4204-patch-id.sh | 34 |
7 files changed, 169 insertions, 0 deletions
diff --git a/builtin/apply.c b/builtin/apply.c index 861a01910c..d623c52f78 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "gettext.h" #include "repository.h" +#include "hash.h" #include "apply.h" static const char * const apply_usage[] = { @@ -18,6 +19,15 @@ int cmd_apply(int argc, const char **argv, const char *prefix) if (init_apply_state(&state, the_repository, prefix)) exit(128); + /* + * We could to redo the "apply.c" machinery to make this + * arbitrary fallback unnecessary, but it is dubious that it + * is worth the effort. + * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + argc = apply_parse_options(argc, argv, &state, &force_apply, &options, apply_usage); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 82ca6d2bfd..c767414a0c 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); + if (nongit && !the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); vpath = vpath_free; diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3894d2b970..583099cacf 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -5,6 +5,7 @@ #include "hash.h" #include "hex.h" #include "parse-options.h" +#include "setup.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, builtin_patch_id_options, patch_id_usage, 0); + /* + * We rely on `the_hash_algo` to compute patch IDs. This is dubious as + * it means that the hash algorithm now depends on the object hash of + * the repository, even though git-patch-id(1) clearly defines that + * patch IDs always use SHA1. + * + * NEEDSWORK: This hack should be removed in favor of converting + * the code that computes patch IDs to always use SHA1. + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); return 0; diff --git a/repository.c b/repository.c index 9f3014b3b5..deb68625e0 100644 --- a/repository.c +++ b/repository.c @@ -20,6 +20,28 @@ static struct repository the_repo; struct repository *the_repository = &the_repo; +/* + * An escape hatch: if we hit a bug in the production code that fails + * to set an appropriate hash algorithm (most likely to happen when + * running outside a repository), we can tell the user who reported + * the crash to set the environment variable to "sha1" (all lowercase) + * to revert to the historical behaviour of defaulting to SHA-1. + */ +static void set_default_hash_algo(struct repository *repo) +{ + const char *hash_name; + int algo; + + hash_name = getenv("GIT_TEST_DEFAULT_HASH_ALGO"); + if (!hash_name) + return; + algo = hash_algo_by_name(hash_name); + if (algo == GIT_HASH_UNKNOWN) + return; + + repo_set_hash_algo(repo, algo); +} + void initialize_repository(struct repository *repo) { repo->objects = raw_object_store_new(); @@ -27,6 +49,28 @@ void initialize_repository(struct repository *repo) repo->parsed_objects = parsed_object_pool_new(); ALLOC_ARRAY(repo->index, 1); index_state_init(repo->index, repo); + + /* + * When a command runs inside a repository, it learns what + * hash algorithm is in use from the repository, but some + * commands are designed to work outside a repository, yet + * they want to access the_hash_algo, if only for the length + * of the hashed value to see if their input looks like a + * plausible hash value. + * + * We are in the process of identifying such code paths and + * giving them an appropriate default individually; any + * unconverted code paths that try to access the_hash_algo + * will thus fail. The end-users however have an escape hatch + * to set GIT_TEST_DEFAULT_HASH_ALGO environment variable to + * "sha1" to get back the old behaviour of defaulting to SHA-1. + * + * This escape hatch is deliberately kept unadvertised, so + * that they see crashes and we can get a report before + * telling them about it. + */ + if (repo == the_repository) + set_default_hash_algo(repo); } static void expand_base_dir(char **out, const char *in, diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64aea38486..d73a5cc237 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_success '--stdin outside of repository (uses SHA-1)' ' + nongit git hash-object --stdin <hello >actual && + echo "$(test_oid --hash=sha1 hello)" >expect && + test_cmp expect actual +' + test_done diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh new file mode 100755 index 0000000000..557808ffa7 --- /dev/null +++ b/t/t1517-outside-repo.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='check random commands outside repo' + +TEST_PASSES_SANITIZE_LEAK=true +. ./test-lib.sh + +test_expect_success 'set up a non-repo directory and test file' ' + GIT_CEILING_DIRECTORIES=$(pwd) && + export GIT_CEILING_DIRECTORIES && + mkdir non-repo && + ( + cd non-repo && + # confirm that git does not find a repo + test_must_fail git rev-parse --git-dir + ) && + test_write_lines one two three four >nums && + git add nums && + cp nums nums.old && + test_write_lines five >>nums && + git diff >sample.patch +' + +test_expect_success 'compute a patch-id outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git patch-id <sample.patch >patch-id.expect && + nongit \ + git patch-id <sample.patch >patch-id.actual && + test_cmp patch-id.expect patch-id.actual +' + +test_expect_success 'hash-object outside repository (uses SHA-1)' ' + nongit env GIT_DEFAULT_HASH=sha1 \ + git hash-object --stdin <sample.patch >hash.expect && + nongit \ + git hash-object --stdin <sample.patch >hash.actual && + test_cmp hash.expect hash.actual +' + +test_expect_success 'apply a patch outside repository' ' + ( + cd non-repo && + cp ../nums.old nums && + git apply ../sample.patch + ) && + test_cmp nums non-repo/nums +' + +test_expect_success 'grep outside repository' ' + git grep --cached two >expect && + ( + cd non-repo && + cp ../nums.old nums && + git grep --no-index two >../actual + ) && + test_cmp expect actual +' + +test_done diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index a7fa94ce0a..605faea0c7 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' ' test_config patchid.stable true && calc_patch_id diffu1stable <diffu1 ' + +test_expect_failure 'patch-id computes same ID with different object hashes' ' + test_when_finished "rm -rf repo-sha1 repo-sha256" && + + cat >diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + + git init --object-format=sha1 repo-sha1 && + git -C repo-sha1 patch-id <diff >patch-id-sha1 && + git init --object-format=sha256 repo-sha256 && + git -C repo-sha256 patch-id <diff >patch-id-sha256 && + test_cmp patch-id-sha1 patch-id-sha256 +' + +test_expect_success 'patch-id without repository' ' + cat >diff <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + EOF + nongit git patch-id <diff +' + test_done |
