From 5e9d802a33ef2912a04984d431defe9809c809e1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 1 Oct 2023 21:40:05 -0500 Subject: object-file-convert: stubs for converting from one object format to another Two basic functions are provided: - convert_object_file Takes an object file it's type and hash algorithm and converts it into the equivalent object file that would have been generated with hash algorithm "to". For blob objects there is no conversation to be done and it is an error to use this function on them. For commit, tree, and tag objects embedded oids are replaced by the oids of the objects they refer to with those objects and their object ids reencoded in with the hash algorithm "to". Signatures are rearranged so that they remain valid after the object has been reencoded. - repo_oid_to_algop which takes an oid that refers to an object file and returns the oid of the equivalent object file generated with the target hash algorithm. The pair of files object-file-convert.c and object-file-convert.h are introduced to hold as much of this logic as possible to keep this conversion logic cleanly separated from everything else and in the hopes that someday the code will be clean enough git can support compiling out support for sha1 and the various conversion functions. Signed-off-by: "Eric W. Biederman" Signed-off-by: Junio C Hamano --- object-file-convert.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 object-file-convert.c (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c new file mode 100644 index 0000000000..4777aba836 --- /dev/null +++ b/object-file-convert.c @@ -0,0 +1,57 @@ +#include "git-compat-util.h" +#include "gettext.h" +#include "strbuf.h" +#include "repository.h" +#include "hash-ll.h" +#include "object.h" +#include "object-file-convert.h" + +int repo_oid_to_algop(struct repository *repo, const struct object_id *src, + const struct git_hash_algo *to, struct object_id *dest) +{ + /* + * If the source algorithm is not set, then we're using the + * default hash algorithm for that object. + */ + const struct git_hash_algo *from = + src->algo ? &hash_algos[src->algo] : repo->hash_algo; + + if (from == to) { + if (src != dest) + oidcpy(dest, src); + return 0; + } + return -1; +} + +int convert_object_file(struct strbuf *outbuf, + const struct git_hash_algo *from, + const struct git_hash_algo *to, + const void *buf, size_t len, + enum object_type type, + int gentle) +{ + int ret; + + /* Don't call this function when no conversion is necessary */ + if ((from == to) || (type == OBJ_BLOB)) + BUG("Refusing noop object file conversion"); + + switch (type) { + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_TAG: + default: + /* Not implemented yet, so fail. */ + ret = -1; + break; + } + if (!ret) + return 0; + if (gentle) { + strbuf_release(outbuf); + return ret; + } + die(_("Failed to convert object from %s to %s"), + from->name, to->name); +} -- cgit 1.2.3-korg From 23b2c7e95b6f8f3045665835d2dc5028701eff18 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 1 Oct 2023 21:40:09 -0500 Subject: loose: add a mapping between SHA-1 and SHA-256 for loose objects As part of the transition plan, we'd like to add a file in the .git directory that maps loose objects between SHA-1 and SHA-256. Let's implement the specification in the transition plan and store this data on a per-repository basis in struct repository. Signed-off-by: brian m. carlson Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- Makefile | 1 + loose.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++ loose.h | 22 +++++ object-file-convert.c | 14 ++- object-store-ll.h | 3 + object.c | 2 + repository.c | 6 ++ 7 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 loose.c create mode 100644 loose.h (limited to 'object-file-convert.c') diff --git a/Makefile b/Makefile index f7e824f25c..3c18664def 100644 --- a/Makefile +++ b/Makefile @@ -1053,6 +1053,7 @@ LIB_OBJS += list-objects-filter.o LIB_OBJS += list-objects.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o +LIB_OBJS += loose.o LIB_OBJS += ls-refs.o LIB_OBJS += mailinfo.o LIB_OBJS += mailmap.o diff --git a/loose.c b/loose.c new file mode 100644 index 0000000000..6ba73cc84d --- /dev/null +++ b/loose.c @@ -0,0 +1,246 @@ +#include "git-compat-util.h" +#include "hash.h" +#include "path.h" +#include "object-store.h" +#include "hex.h" +#include "wrapper.h" +#include "gettext.h" +#include "loose.h" +#include "lockfile.h" + +static const char *loose_object_header = "# loose-object-idx\n"; + +static inline int should_use_loose_object_map(struct repository *repo) +{ + return repo->compat_hash_algo && repo->gitdir; +} + +void loose_object_map_init(struct loose_object_map **map) +{ + struct loose_object_map *m; + m = xmalloc(sizeof(**map)); + m->to_compat = kh_init_oid_map(); + m->to_storage = kh_init_oid_map(); + *map = m; +} + +static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const struct object_id *value) +{ + khiter_t pos; + int ret; + struct object_id *stored; + + pos = kh_put_oid_map(map, *key, &ret); + + /* This item already exists in the map. */ + if (ret == 0) + return 0; + + stored = xmalloc(sizeof(*stored)); + oidcpy(stored, value); + kh_value(map, pos) = stored; + return 1; +} + +static int load_one_loose_object_map(struct repository *repo, struct object_directory *dir) +{ + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; + FILE *fp; + + if (!dir->loose_map) + loose_object_map_init(&dir->loose_map); + + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_tree, repo->hash_algo->empty_tree); + + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_blob, repo->hash_algo->empty_blob); + + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->null_oid, repo->hash_algo->null_oid); + + strbuf_git_common_path(&path, repo, "objects/loose-object-idx"); + fp = fopen(path.buf, "rb"); + if (!fp) { + strbuf_release(&path); + return 0; + } + + errno = 0; + if (strbuf_getwholeline(&buf, fp, '\n') || strcmp(buf.buf, loose_object_header)) + goto err; + while (!strbuf_getline_lf(&buf, fp)) { + const char *p; + struct object_id oid, compat_oid; + if (parse_oid_hex_algop(buf.buf, &oid, &p, repo->hash_algo) || + *p++ != ' ' || + parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) || + p != buf.buf + buf.len) + goto err; + insert_oid_pair(dir->loose_map->to_compat, &oid, &compat_oid); + insert_oid_pair(dir->loose_map->to_storage, &compat_oid, &oid); + } + + strbuf_release(&buf); + strbuf_release(&path); + return errno ? -1 : 0; +err: + strbuf_release(&buf); + strbuf_release(&path); + return -1; +} + +int repo_read_loose_object_map(struct repository *repo) +{ + struct object_directory *dir; + + if (!should_use_loose_object_map(repo)) + return 0; + + prepare_alt_odb(repo); + + for (dir = repo->objects->odb; dir; dir = dir->next) { + if (load_one_loose_object_map(repo, dir) < 0) { + return -1; + } + } + return 0; +} + +int repo_write_loose_object_map(struct repository *repo) +{ + kh_oid_map_t *map = repo->objects->odb->loose_map->to_compat; + struct lock_file lock; + int fd; + khiter_t iter; + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; + + if (!should_use_loose_object_map(repo)) + return 0; + + strbuf_git_common_path(&path, repo, "objects/loose-object-idx"); + fd = hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); + iter = kh_begin(map); + if (write_in_full(fd, loose_object_header, strlen(loose_object_header)) < 0) + goto errout; + + for (; iter != kh_end(map); iter++) { + if (kh_exist(map, iter)) { + if (oideq(&kh_key(map, iter), the_hash_algo->empty_tree) || + oideq(&kh_key(map, iter), the_hash_algo->empty_blob)) + continue; + strbuf_addf(&buf, "%s %s\n", oid_to_hex(&kh_key(map, iter)), oid_to_hex(kh_value(map, iter))); + if (write_in_full(fd, buf.buf, buf.len) < 0) + goto errout; + strbuf_reset(&buf); + } + } + strbuf_release(&buf); + if (commit_lock_file(&lock) < 0) { + error_errno(_("could not write loose object index %s"), path.buf); + strbuf_release(&path); + return -1; + } + strbuf_release(&path); + return 0; +errout: + rollback_lock_file(&lock); + strbuf_release(&buf); + error_errno(_("failed to write loose object index %s\n"), path.buf); + strbuf_release(&path); + return -1; +} + +static int write_one_object(struct repository *repo, const struct object_id *oid, + const struct object_id *compat_oid) +{ + struct lock_file lock; + int fd; + struct stat st; + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; + + strbuf_git_common_path(&path, repo, "objects/loose-object-idx"); + hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); + + fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666); + if (fd < 0) + goto errout; + if (fstat(fd, &st) < 0) + goto errout; + if (!st.st_size && write_in_full(fd, loose_object_header, strlen(loose_object_header)) < 0) + goto errout; + + strbuf_addf(&buf, "%s %s\n", oid_to_hex(oid), oid_to_hex(compat_oid)); + if (write_in_full(fd, buf.buf, buf.len) < 0) + goto errout; + if (close(fd)) + goto errout; + adjust_shared_perm(path.buf); + rollback_lock_file(&lock); + strbuf_release(&buf); + strbuf_release(&path); + return 0; +errout: + error_errno(_("failed to write loose object index %s\n"), path.buf); + close(fd); + rollback_lock_file(&lock); + strbuf_release(&buf); + strbuf_release(&path); + return -1; +} + +int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid, + const struct object_id *compat_oid) +{ + int inserted = 0; + + if (!should_use_loose_object_map(repo)) + return 0; + + inserted |= insert_oid_pair(repo->objects->odb->loose_map->to_compat, oid, compat_oid); + inserted |= insert_oid_pair(repo->objects->odb->loose_map->to_storage, compat_oid, oid); + if (inserted) + return write_one_object(repo, oid, compat_oid); + return 0; +} + +int repo_loose_object_map_oid(struct repository *repo, + const struct object_id *src, + const struct git_hash_algo *to, + struct object_id *dest) +{ + struct object_directory *dir; + kh_oid_map_t *map; + khiter_t pos; + + for (dir = repo->objects->odb; dir; dir = dir->next) { + struct loose_object_map *loose_map = dir->loose_map; + if (!loose_map) + continue; + map = (to == repo->compat_hash_algo) ? + loose_map->to_compat : + loose_map->to_storage; + pos = kh_get_oid_map(map, *src); + if (pos < kh_end(map)) { + oidcpy(dest, kh_value(map, pos)); + return 0; + } + } + return -1; +} + +void loose_object_map_clear(struct loose_object_map **map) +{ + struct loose_object_map *m = *map; + struct object_id *oid; + + if (!m) + return; + + kh_foreach_value(m->to_compat, oid, free(oid)); + kh_foreach_value(m->to_storage, oid, free(oid)); + kh_destroy_oid_map(m->to_compat); + kh_destroy_oid_map(m->to_storage); + free(m); + *map = NULL; +} diff --git a/loose.h b/loose.h new file mode 100644 index 0000000000..2c2957072c --- /dev/null +++ b/loose.h @@ -0,0 +1,22 @@ +#ifndef LOOSE_H +#define LOOSE_H + +#include "khash.h" + +struct loose_object_map { + kh_oid_map_t *to_compat; + kh_oid_map_t *to_storage; +}; + +void loose_object_map_init(struct loose_object_map **map); +void loose_object_map_clear(struct loose_object_map **map); +int repo_loose_object_map_oid(struct repository *repo, + const struct object_id *src, + const struct git_hash_algo *dest_algo, + struct object_id *dest); +int repo_add_loose_object_map(struct repository *repo, const struct object_id *oid, + const struct object_id *compat_oid); +int repo_read_loose_object_map(struct repository *repo); +int repo_write_loose_object_map(struct repository *repo); + +#endif diff --git a/object-file-convert.c b/object-file-convert.c index 4777aba836..1ec945eaa1 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -4,6 +4,7 @@ #include "repository.h" #include "hash-ll.h" #include "object.h" +#include "loose.h" #include "object-file-convert.h" int repo_oid_to_algop(struct repository *repo, const struct object_id *src, @@ -21,7 +22,18 @@ int repo_oid_to_algop(struct repository *repo, const struct object_id *src, oidcpy(dest, src); return 0; } - return -1; + if (repo_loose_object_map_oid(repo, src, to, dest)) { + /* + * We may have loaded the object map at repo initialization but + * another process (perhaps upstream of a pipe from us) may have + * written a new object into the map. If the object is missing, + * let's reload the map to see if the object has appeared. + */ + repo_read_loose_object_map(repo); + if (repo_loose_object_map_oid(repo, src, to, dest)) + return -1; + } + return 0; } int convert_object_file(struct strbuf *outbuf, diff --git a/object-store-ll.h b/object-store-ll.h index 26a3895c82..bc76d6bec8 100644 --- a/object-store-ll.h +++ b/object-store-ll.h @@ -26,6 +26,9 @@ struct object_directory { uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ struct oidtree *loose_objects_cache; + /* Map between object IDs for loose objects. */ + struct loose_object_map *loose_map; + /* * This is a temporary object store created by the tmp_objdir * facility. Disable ref updates since the objects in the store diff --git a/object.c b/object.c index 2c61e4c862..186a0a47c0 100644 --- a/object.c +++ b/object.c @@ -13,6 +13,7 @@ #include "alloc.h" #include "packfile.h" #include "commit-graph.h" +#include "loose.h" unsigned int get_max_object_index(void) { @@ -540,6 +541,7 @@ void free_object_directory(struct object_directory *odb) { free(odb->path); odb_clear_loose_cache(odb); + loose_object_map_clear(&odb->loose_map); free(odb); } diff --git a/repository.c b/repository.c index 80252b79e9..6214f61cf4 100644 --- a/repository.c +++ b/repository.c @@ -14,6 +14,7 @@ #include "read-cache-ll.h" #include "remote.h" #include "setup.h" +#include "loose.h" #include "submodule-config.h" #include "sparse-index.h" #include "trace2.h" @@ -109,6 +110,8 @@ void repo_set_compat_hash_algo(struct repository *repo, int algo) if (hash_algo_by_ptr(repo->hash_algo) == algo) BUG("hash_algo and compat_hash_algo match"); repo->compat_hash_algo = algo ? &hash_algos[algo] : NULL; + if (repo->compat_hash_algo) + repo_read_loose_object_map(repo); } /* @@ -201,6 +204,9 @@ int repo_init(struct repository *repo, if (worktree) repo_set_worktree(repo, worktree); + if (repo->compat_hash_algo) + repo_read_loose_object_map(repo); + clear_repository_format(&format); return 0; -- cgit 1.2.3-korg From 33a14e81ae0673f8b3f7cf85168c7c573fa2fb58 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 1 Oct 2023 21:40:19 -0500 Subject: object-file-convert: add a function to convert trees between algorithms In the future, we're going to want to provide SHA-256 repositories that have compatibility support for SHA-1 as well. In order to do so, we'll need to be able to convert tree objects from SHA-256 to SHA-1 by writing a tree with each SHA-256 object ID mapped to a SHA-1 object ID. We implement a function, convert_tree_object, that takes an existing tree buffer and writes it to a new strbuf, converting between algorithms. Let's make this function generic, because while we only need it to convert from the main algorithm to the compatibility algorithm now, we may need to do the other way around in the future, such as for transport. We avoid reusing the code in decode_tree_entry because that code normalizes data, and we don't want that here. We want to produce a complete round trip of data, so if, for example, the old entry had a wrongly zero-padded mode, we'd want to preserve that when converting to ensure a stable hash value. Signed-off-by: brian m. carlson Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- object-file-convert.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c index 1ec945eaa1..70b80fb61e 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -1,8 +1,10 @@ #include "git-compat-util.h" #include "gettext.h" #include "strbuf.h" +#include "hex.h" #include "repository.h" #include "hash-ll.h" +#include "hash.h" #include "object.h" #include "loose.h" #include "object-file-convert.h" @@ -36,6 +38,51 @@ int repo_oid_to_algop(struct repository *repo, const struct object_id *src, return 0; } +static int decode_tree_entry_raw(struct object_id *oid, const char **path, + size_t *len, const struct git_hash_algo *algo, + const char *buf, unsigned long size) +{ + uint16_t mode; + const unsigned hashsz = algo->rawsz; + + if (size < hashsz + 3 || buf[size - (hashsz + 1)]) { + return -1; + } + + *path = parse_mode(buf, &mode); + if (!*path || !**path) + return -1; + *len = strlen(*path) + 1; + + oidread_algop(oid, (const unsigned char *)*path + *len, algo); + return 0; +} + +static int convert_tree_object(struct strbuf *out, + const struct git_hash_algo *from, + const struct git_hash_algo *to, + const char *buffer, size_t size) +{ + const char *p = buffer, *end = buffer + size; + + while (p < end) { + struct object_id entry_oid, mapped_oid; + const char *path = NULL; + size_t pathlen; + + if (decode_tree_entry_raw(&entry_oid, &path, &pathlen, from, p, + end - p)) + return error(_("failed to decode tree entry")); + if (repo_oid_to_algop(the_repository, &entry_oid, to, &mapped_oid)) + return error(_("failed to map tree entry for %s"), oid_to_hex(&entry_oid)); + strbuf_add(out, p, path - p); + strbuf_add(out, path, pathlen); + strbuf_add(out, mapped_oid.hash, to->rawsz); + p = path + pathlen + from->rawsz; + } + return 0; +} + int convert_object_file(struct strbuf *outbuf, const struct git_hash_algo *from, const struct git_hash_algo *to, @@ -50,8 +97,10 @@ int convert_object_file(struct strbuf *outbuf, BUG("Refusing noop object file conversion"); switch (type) { - case OBJ_COMMIT: case OBJ_TREE: + ret = convert_tree_object(outbuf, from, to, buf, len); + break; + case OBJ_COMMIT: case OBJ_TAG: default: /* Not implemented yet, so fail. */ -- cgit 1.2.3-korg From c8762c30df5b5c0a2a91e14e77cfc94b459d089b Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 1 Oct 2023 21:40:20 -0500 Subject: object-file-convert: convert tag objects when writing When writing a tag object in a repository with both SHA-1 and SHA-256, we'll need to convert our commit objects so that we can write the hash values for both into the repository. To do so, let's add a function to convert tag objects. Note that signatures for tag objects in the current algorithm trail the message, and those for the alternate algorithm are in headers. Therefore, we parse the tag object for both a trailing signature and a header and then, when writing the other format, swap the two around. Signed-off-by: brian m. carlson Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- object-file-convert.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c index 70b80fb61e..089b68442d 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -7,6 +7,8 @@ #include "hash.h" #include "object.h" #include "loose.h" +#include "commit.h" +#include "gpg-interface.h" #include "object-file-convert.h" int repo_oid_to_algop(struct repository *repo, const struct object_id *src, @@ -83,6 +85,52 @@ static int convert_tree_object(struct strbuf *out, return 0; } +static int convert_tag_object(struct strbuf *out, + const struct git_hash_algo *from, + const struct git_hash_algo *to, + const char *buffer, size_t size) +{ + struct strbuf payload = STRBUF_INIT, temp = STRBUF_INIT, oursig = STRBUF_INIT, othersig = STRBUF_INIT; + size_t payload_size; + struct object_id oid, mapped_oid; + const char *p; + + /* Add some slop for longer signature header in the new algorithm. */ + strbuf_grow(out, size + 7); + + /* Is there a signature for our algorithm? */ + payload_size = parse_signed_buffer(buffer, size); + strbuf_add(&payload, buffer, payload_size); + if (payload_size != size) { + /* Yes, there is. */ + strbuf_add(&oursig, buffer + payload_size, size - payload_size); + } + /* Now, is there a signature for the other algorithm? */ + if (parse_buffer_signed_by_header(payload.buf, payload.len, &temp, &othersig, to)) { + /* Yes, there is. */ + strbuf_swap(&payload, &temp); + strbuf_release(&temp); + } + + /* + * Our payload is now in payload and we may have up to two signatrures + * in oursig and othersig. + */ + if (strncmp(payload.buf, "object ", 7) || payload.buf[from->hexsz + 7] != '\n') + return error("bogus tag object"); + if (parse_oid_hex_algop(payload.buf + 7, &oid, &p, from) < 0) + return error("bad tag object ID"); + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error("unable to map tree %s in tag object", + oid_to_hex(&oid)); + strbuf_addf(out, "object %s", oid_to_hex(&mapped_oid)); + strbuf_add(out, p, payload.len - (p - payload.buf)); + strbuf_addbuf(out, &othersig); + if (oursig.len) + add_header_signature(out, &oursig, from); + return 0; +} + int convert_object_file(struct strbuf *outbuf, const struct git_hash_algo *from, const struct git_hash_algo *to, @@ -100,8 +148,10 @@ int convert_object_file(struct strbuf *outbuf, case OBJ_TREE: ret = convert_tree_object(outbuf, from, to, buf, len); break; - case OBJ_COMMIT: case OBJ_TAG: + ret = convert_tag_object(outbuf, from, to, buf, len); + break; + case OBJ_COMMIT: default: /* Not implemented yet, so fail. */ ret = -1; -- cgit 1.2.3-korg From ac45d995f3329e5a7e85bf103bd94b48107b3803 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 1 Oct 2023 21:40:21 -0500 Subject: object-file-convert: don't leak when converting tag objects Upon close examination I discovered that while brian's code to convert tag objects was functionally correct, it leaked memory. Rearrange the code so that all error checking happens before any memory is allocated. Add code to release the temporary strbufs the code uses. The code pretty much assumes the tag object ends with a newline, so add an explict test to verify that is the case. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- object-file-convert.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c index 089b68442d..79e8e211ff 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -90,44 +90,49 @@ static int convert_tag_object(struct strbuf *out, const struct git_hash_algo *to, const char *buffer, size_t size) { - struct strbuf payload = STRBUF_INIT, temp = STRBUF_INIT, oursig = STRBUF_INIT, othersig = STRBUF_INIT; + struct strbuf payload = STRBUF_INIT, oursig = STRBUF_INIT, othersig = STRBUF_INIT; + const int entry_len = from->hexsz + 7; size_t payload_size; struct object_id oid, mapped_oid; const char *p; - /* Add some slop for longer signature header in the new algorithm. */ - strbuf_grow(out, size + 7); + /* Consume the object line */ + if ((entry_len >= size) || + memcmp(buffer, "object ", 7) || buffer[entry_len] != '\n') + return error("bogus tag object"); + if (parse_oid_hex_algop(buffer + 7, &oid, &p, from) < 0) + return error("bad tag object ID"); + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error("unable to map tree %s in tag object", + oid_to_hex(&oid)); + size -= ((p + 1) - buffer); + buffer = p + 1; /* Is there a signature for our algorithm? */ payload_size = parse_signed_buffer(buffer, size); - strbuf_add(&payload, buffer, payload_size); if (payload_size != size) { /* Yes, there is. */ strbuf_add(&oursig, buffer + payload_size, size - payload_size); } - /* Now, is there a signature for the other algorithm? */ - if (parse_buffer_signed_by_header(payload.buf, payload.len, &temp, &othersig, to)) { - /* Yes, there is. */ - strbuf_swap(&payload, &temp); - strbuf_release(&temp); - } + /* Now, is there a signature for the other algorithm? */ + parse_buffer_signed_by_header(buffer, payload_size, &payload, &othersig, to); /* * Our payload is now in payload and we may have up to two signatrures * in oursig and othersig. */ - if (strncmp(payload.buf, "object ", 7) || payload.buf[from->hexsz + 7] != '\n') - return error("bogus tag object"); - if (parse_oid_hex_algop(payload.buf + 7, &oid, &p, from) < 0) - return error("bad tag object ID"); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) - return error("unable to map tree %s in tag object", - oid_to_hex(&oid)); - strbuf_addf(out, "object %s", oid_to_hex(&mapped_oid)); - strbuf_add(out, p, payload.len - (p - payload.buf)); - strbuf_addbuf(out, &othersig); + + /* Add some slop for longer signature header in the new algorithm. */ + strbuf_grow(out, (7 + to->hexsz + 1) + size + 7); + strbuf_addf(out, "object %s\n", oid_to_hex(&mapped_oid)); + strbuf_addbuf(out, &payload); if (oursig.len) add_header_signature(out, &oursig, from); + strbuf_addbuf(out, &othersig); + + strbuf_release(&payload); + strbuf_release(&othersig); + strbuf_release(&oursig); return 0; } -- cgit 1.2.3-korg From 318b023e4a3f5e4f2ecf202aa87db7e5df2c4442 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 1 Oct 2023 21:40:22 -0500 Subject: object-file-convert: convert commit objects when writing When writing a commit object in a repository with both SHA-1 and SHA-256, we'll need to convert our commit objects so that we can write the hash values for both into the repository. To do so, let's add a function to convert commit objects. Read the commit object and map the tree value and any of the parent values, and copy the rest of the commit through unmodified. Note that we don't need to modify the signature headers, because they are the same under both algorithms. Signed-off-by: brian m. carlson Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- object-file-convert.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c index 79e8e211ff..0da081104e 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -136,6 +136,48 @@ static int convert_tag_object(struct strbuf *out, return 0; } +static int convert_commit_object(struct strbuf *out, + const struct git_hash_algo *from, + const struct git_hash_algo *to, + const char *buffer, size_t size) +{ + const char *tail = buffer; + const char *bufptr = buffer; + const int tree_entry_len = from->hexsz + 5; + const int parent_entry_len = from->hexsz + 7; + struct object_id oid, mapped_oid; + const char *p; + + tail += size; + if (tail <= bufptr + tree_entry_len + 1 || memcmp(bufptr, "tree ", 5) || + bufptr[tree_entry_len] != '\n') + return error("bogus commit object"); + if (parse_oid_hex_algop(bufptr + 5, &oid, &p, from) < 0) + return error("bad tree pointer"); + + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error("unable to map tree %s in commit object", + oid_to_hex(&oid)); + strbuf_addf(out, "tree %s\n", oid_to_hex(&mapped_oid)); + bufptr = p + 1; + + while (bufptr + parent_entry_len < tail && !memcmp(bufptr, "parent ", 7)) { + if (tail <= bufptr + parent_entry_len + 1 || + parse_oid_hex_algop(bufptr + 7, &oid, &p, from) || + *p != '\n') + return error("bad parents in commit"); + + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error("unable to map parent %s in commit object", + oid_to_hex(&oid)); + + strbuf_addf(out, "parent %s\n", oid_to_hex(&mapped_oid)); + bufptr = p + 1; + } + strbuf_add(out, bufptr, tail - bufptr); + return 0; +} + int convert_object_file(struct strbuf *outbuf, const struct git_hash_algo *from, const struct git_hash_algo *to, @@ -150,13 +192,15 @@ int convert_object_file(struct strbuf *outbuf, BUG("Refusing noop object file conversion"); switch (type) { + case OBJ_COMMIT: + ret = convert_commit_object(outbuf, from, to, buf, len); + break; case OBJ_TREE: ret = convert_tree_object(outbuf, from, to, buf, len); break; case OBJ_TAG: ret = convert_tag_object(outbuf, from, to, buf, len); break; - case OBJ_COMMIT: default: /* Not implemented yet, so fail. */ ret = -1; -- cgit 1.2.3-korg From 08a45903cb32304946c11dfb2239db448777aed7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 1 Oct 2023 21:40:23 -0500 Subject: object-file-convert: convert commits that embed signed tags As mentioned in the hash function transition plan commit mergetag lines need to be handled. The commit mergetag lines embed an entire tag object in a commit object. Keep the implementation sane if not fast by unembedding the tag object, converting the tag object, and embedding the new tag object, in the new commit object. In the long run I don't expect any other approach is maintainable, as tag objects may be extended in ways that require additional translation. To keep the implementation of convert_commit_object maintainable I have modified convert_commit_object to process the lines in any order, and to fail on unknown lines. We can't know ahead of time if a new line might embed something that needs translation or not so it is better to fail and require the code to be updated instead of silently mistranslating objects. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- object-file-convert.c | 104 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 22 deletions(-) (limited to 'object-file-convert.c') diff --git a/object-file-convert.c b/object-file-convert.c index 0da081104e..4f6189095b 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -146,35 +146,95 @@ static int convert_commit_object(struct strbuf *out, const int tree_entry_len = from->hexsz + 5; const int parent_entry_len = from->hexsz + 7; struct object_id oid, mapped_oid; - const char *p; + const char *p, *eol; tail += size; - if (tail <= bufptr + tree_entry_len + 1 || memcmp(bufptr, "tree ", 5) || - bufptr[tree_entry_len] != '\n') - return error("bogus commit object"); - if (parse_oid_hex_algop(bufptr + 5, &oid, &p, from) < 0) - return error("bad tree pointer"); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) - return error("unable to map tree %s in commit object", - oid_to_hex(&oid)); - strbuf_addf(out, "tree %s\n", oid_to_hex(&mapped_oid)); - bufptr = p + 1; + while ((bufptr < tail) && (*bufptr != '\n')) { + eol = memchr(bufptr, '\n', tail - bufptr); + if (!eol) + return error(_("bad %s in commit"), "line"); + + if (((bufptr + 5) < eol) && !memcmp(bufptr, "tree ", 5)) + { + if (((bufptr + tree_entry_len) != eol) || + parse_oid_hex_algop(bufptr + 5, &oid, &p, from) || + (p != eol)) + return error(_("bad %s in commit"), "tree"); + + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error(_("unable to map %s %s in commit object"), + "tree", oid_to_hex(&oid)); + strbuf_addf(out, "tree %s\n", oid_to_hex(&mapped_oid)); + } + else if (((bufptr + 7) < eol) && !memcmp(bufptr, "parent ", 7)) + { + if (((bufptr + parent_entry_len) != eol) || + parse_oid_hex_algop(bufptr + 7, &oid, &p, from) || + (p != eol)) + return error(_("bad %s in commit"), "parent"); - while (bufptr + parent_entry_len < tail && !memcmp(bufptr, "parent ", 7)) { - if (tail <= bufptr + parent_entry_len + 1 || - parse_oid_hex_algop(bufptr + 7, &oid, &p, from) || - *p != '\n') - return error("bad parents in commit"); + if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + return error(_("unable to map %s %s in commit object"), + "parent", oid_to_hex(&oid)); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) - return error("unable to map parent %s in commit object", - oid_to_hex(&oid)); + strbuf_addf(out, "parent %s\n", oid_to_hex(&mapped_oid)); + } + else if (((bufptr + 9) < eol) && !memcmp(bufptr, "mergetag ", 9)) + { + struct strbuf tag = STRBUF_INIT, new_tag = STRBUF_INIT; - strbuf_addf(out, "parent %s\n", oid_to_hex(&mapped_oid)); - bufptr = p + 1; + /* Recover the tag object from the mergetag */ + strbuf_add(&tag, bufptr + 9, (eol - (bufptr + 9)) + 1); + + bufptr = eol + 1; + while ((bufptr < tail) && (*bufptr == ' ')) { + eol = memchr(bufptr, '\n', tail - bufptr); + if (!eol) { + strbuf_release(&tag); + return error(_("bad %s in commit"), "mergetag continuation"); + } + strbuf_add(&tag, bufptr + 1, (eol - (bufptr + 1)) + 1); + bufptr = eol + 1; + } + + /* Compute the new tag object */ + if (convert_tag_object(&new_tag, from, to, tag.buf, tag.len)) { + strbuf_release(&tag); + strbuf_release(&new_tag); + return -1; + } + + /* Write the new mergetag */ + strbuf_addstr(out, "mergetag"); + strbuf_add_lines(out, " ", new_tag.buf, new_tag.len); + strbuf_release(&tag); + strbuf_release(&new_tag); + } + else if (((bufptr + 7) < tail) && !memcmp(bufptr, "author ", 7)) + strbuf_add(out, bufptr, (eol - bufptr) + 1); + else if (((bufptr + 10) < tail) && !memcmp(bufptr, "committer ", 10)) + strbuf_add(out, bufptr, (eol - bufptr) + 1); + else if (((bufptr + 9) < tail) && !memcmp(bufptr, "encoding ", 9)) + strbuf_add(out, bufptr, (eol - bufptr) + 1); + else if (((bufptr + 6) < tail) && !memcmp(bufptr, "gpgsig", 6)) + strbuf_add(out, bufptr, (eol - bufptr) + 1); + else { + /* Unknown line fail it might embed an oid */ + return -1; + } + /* Consume any trailing continuation lines */ + bufptr = eol + 1; + while ((bufptr < tail) && (*bufptr == ' ')) { + eol = memchr(bufptr, '\n', tail - bufptr); + if (!eol) + return error(_("bad %s in commit"), "continuation"); + strbuf_add(out, bufptr, (eol - bufptr) + 1); + bufptr = eol + 1; + } } - strbuf_add(out, bufptr, tail - bufptr); + if (bufptr < tail) + strbuf_add(out, bufptr, tail - bufptr); return 0; } -- cgit 1.2.3-korg