aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2024-05-30 14:15:13 -0700
committerJunio C Hamano <gitster@pobox.com>2024-05-30 14:15:14 -0700
commit6c5be97e4eee040a2d5303e5650fa7cc8a37dbd8 (patch)
treee473e1ea7754091c4e4b13e1af93c9cc3638dcba
parent988499e2955f052fa5f58434e13d12285cb8a361 (diff)
parent4674ab682dc1a875fd29de8f4e9568196a88b97b (diff)
downloadgit-6c5be97e4eee040a2d5303e5650fa7cc8a37dbd8.tar.gz
Merge branch 'jc/undecided-is-not-necessarily-sha1-fix'
The base topic started to make it an error for a command to leave the hash algorithm unspecified, which revealed a few commands that were not ready for the change. Give users a knob to revert back to the "default is sha-1" behaviour as an escape hatch, and start fixing these breakages. * jc/undecided-is-not-necessarily-sha1-fix: apply: fix uninitialized hash function builtin/hash-object: fix uninitialized hash function builtin/patch-id: fix uninitialized hash function t1517: test commands that are designed to be run outside repository setup: add an escape hatch for "no more default hash algorithm" change
-rw-r--r--builtin/apply.c10
-rw-r--r--builtin/hash-object.c3
-rw-r--r--builtin/patch-id.c13
-rw-r--r--repository.c44
-rwxr-xr-xt/t1007-hash-object.sh6
-rwxr-xr-xt/t1517-outside-repo.sh59
-rwxr-xr-xt/t4204-patch-id.sh34
7 files changed, 169 insertions, 0 deletions
diff --git a/builtin/apply.c b/builtin/apply.c
index 861a01910c..d623c52f78 100644
--- a/builtin/apply.c
+++ b/builtin/apply.c
@@ -1,6 +1,7 @@
#include "builtin.h"
#include "gettext.h"
#include "repository.h"
+#include "hash.h"
#include "apply.h"
static const char * const apply_usage[] = {
@@ -18,6 +19,15 @@ int cmd_apply(int argc, const char **argv, const char *prefix)
if (init_apply_state(&state, the_repository, prefix))
exit(128);
+ /*
+ * We could to redo the "apply.c" machinery to make this
+ * arbitrary fallback unnecessary, but it is dubious that it
+ * is worth the effort.
+ * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/
+ */
+ if (!the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
argc = apply_parse_options(argc, argv,
&state, &force_apply, &options,
apply_usage);
diff --git a/builtin/hash-object.c b/builtin/hash-object.c
index 82ca6d2bfd..c767414a0c 100644
--- a/builtin/hash-object.c
+++ b/builtin/hash-object.c
@@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix)
else
prefix = setup_git_directory_gently(&nongit);
+ if (nongit && !the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
if (vpath && prefix) {
vpath_free = prefix_filename(prefix, vpath);
vpath = vpath_free;
diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3894d2b970..583099cacf 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -5,6 +5,7 @@
#include "hash.h"
#include "hex.h"
#include "parse-options.h"
+#include "setup.h"
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
{
@@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix)
argc = parse_options(argc, argv, prefix, builtin_patch_id_options,
patch_id_usage, 0);
+ /*
+ * We rely on `the_hash_algo` to compute patch IDs. This is dubious as
+ * it means that the hash algorithm now depends on the object hash of
+ * the repository, even though git-patch-id(1) clearly defines that
+ * patch IDs always use SHA1.
+ *
+ * NEEDSWORK: This hack should be removed in favor of converting
+ * the code that computes patch IDs to always use SHA1.
+ */
+ if (!the_hash_algo)
+ repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
+
generate_id_list(opts ? opts > 1 : config.stable,
opts ? opts == 3 : config.verbatim);
return 0;
diff --git a/repository.c b/repository.c
index 9f3014b3b5..deb68625e0 100644
--- a/repository.c
+++ b/repository.c
@@ -20,6 +20,28 @@
static struct repository the_repo;
struct repository *the_repository = &the_repo;
+/*
+ * An escape hatch: if we hit a bug in the production code that fails
+ * to set an appropriate hash algorithm (most likely to happen when
+ * running outside a repository), we can tell the user who reported
+ * the crash to set the environment variable to "sha1" (all lowercase)
+ * to revert to the historical behaviour of defaulting to SHA-1.
+ */
+static void set_default_hash_algo(struct repository *repo)
+{
+ const char *hash_name;
+ int algo;
+
+ hash_name = getenv("GIT_TEST_DEFAULT_HASH_ALGO");
+ if (!hash_name)
+ return;
+ algo = hash_algo_by_name(hash_name);
+ if (algo == GIT_HASH_UNKNOWN)
+ return;
+
+ repo_set_hash_algo(repo, algo);
+}
+
void initialize_repository(struct repository *repo)
{
repo->objects = raw_object_store_new();
@@ -27,6 +49,28 @@ void initialize_repository(struct repository *repo)
repo->parsed_objects = parsed_object_pool_new();
ALLOC_ARRAY(repo->index, 1);
index_state_init(repo->index, repo);
+
+ /*
+ * When a command runs inside a repository, it learns what
+ * hash algorithm is in use from the repository, but some
+ * commands are designed to work outside a repository, yet
+ * they want to access the_hash_algo, if only for the length
+ * of the hashed value to see if their input looks like a
+ * plausible hash value.
+ *
+ * We are in the process of identifying such code paths and
+ * giving them an appropriate default individually; any
+ * unconverted code paths that try to access the_hash_algo
+ * will thus fail. The end-users however have an escape hatch
+ * to set GIT_TEST_DEFAULT_HASH_ALGO environment variable to
+ * "sha1" to get back the old behaviour of defaulting to SHA-1.
+ *
+ * This escape hatch is deliberately kept unadvertised, so
+ * that they see crashes and we can get a report before
+ * telling them about it.
+ */
+ if (repo == the_repository)
+ set_default_hash_algo(repo);
}
static void expand_base_dir(char **out, const char *in,
diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index 64aea38486..d73a5cc237 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' '
echo example | git hash-object -t $t --literally --stdin
'
+test_expect_success '--stdin outside of repository (uses SHA-1)' '
+ nongit git hash-object --stdin <hello >actual &&
+ echo "$(test_oid --hash=sha1 hello)" >expect &&
+ test_cmp expect actual
+'
+
test_done
diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh
new file mode 100755
index 0000000000..557808ffa7
--- /dev/null
+++ b/t/t1517-outside-repo.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+test_description='check random commands outside repo'
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'set up a non-repo directory and test file' '
+ GIT_CEILING_DIRECTORIES=$(pwd) &&
+ export GIT_CEILING_DIRECTORIES &&
+ mkdir non-repo &&
+ (
+ cd non-repo &&
+ # confirm that git does not find a repo
+ test_must_fail git rev-parse --git-dir
+ ) &&
+ test_write_lines one two three four >nums &&
+ git add nums &&
+ cp nums nums.old &&
+ test_write_lines five >>nums &&
+ git diff >sample.patch
+'
+
+test_expect_success 'compute a patch-id outside repository (uses SHA-1)' '
+ nongit env GIT_DEFAULT_HASH=sha1 \
+ git patch-id <sample.patch >patch-id.expect &&
+ nongit \
+ git patch-id <sample.patch >patch-id.actual &&
+ test_cmp patch-id.expect patch-id.actual
+'
+
+test_expect_success 'hash-object outside repository (uses SHA-1)' '
+ nongit env GIT_DEFAULT_HASH=sha1 \
+ git hash-object --stdin <sample.patch >hash.expect &&
+ nongit \
+ git hash-object --stdin <sample.patch >hash.actual &&
+ test_cmp hash.expect hash.actual
+'
+
+test_expect_success 'apply a patch outside repository' '
+ (
+ cd non-repo &&
+ cp ../nums.old nums &&
+ git apply ../sample.patch
+ ) &&
+ test_cmp nums non-repo/nums
+'
+
+test_expect_success 'grep outside repository' '
+ git grep --cached two >expect &&
+ (
+ cd non-repo &&
+ cp ../nums.old nums &&
+ git grep --no-index two >../actual
+ ) &&
+ test_cmp expect actual
+'
+
+test_done
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index a7fa94ce0a..605faea0c7 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -310,4 +310,38 @@ test_expect_success 'patch-id handles diffs with one line of before/after' '
test_config patchid.stable true &&
calc_patch_id diffu1stable <diffu1
'
+
+test_expect_failure 'patch-id computes same ID with different object hashes' '
+ test_when_finished "rm -rf repo-sha1 repo-sha256" &&
+
+ cat >diff <<-\EOF &&
+ diff --git a/bar b/bar
+ index bdaf90f..31051f6 100644
+ --- a/bar
+ +++ b/bar
+ @@ -2 +2,2 @@
+ b
+ +c
+ EOF
+
+ git init --object-format=sha1 repo-sha1 &&
+ git -C repo-sha1 patch-id <diff >patch-id-sha1 &&
+ git init --object-format=sha256 repo-sha256 &&
+ git -C repo-sha256 patch-id <diff >patch-id-sha256 &&
+ test_cmp patch-id-sha1 patch-id-sha256
+'
+
+test_expect_success 'patch-id without repository' '
+ cat >diff <<-\EOF &&
+ diff --git a/bar b/bar
+ index bdaf90f..31051f6 100644
+ --- a/bar
+ +++ b/bar
+ @@ -2 +2,2 @@
+ b
+ +c
+ EOF
+ nongit git patch-id <diff
+'
+
test_done