diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-07-21 09:14:26 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-07-21 09:14:26 -0700 |
| commit | 205493d56d98dd340b4ac5944d11878762f15df1 (patch) | |
| tree | 034c34ab827cef0f6ba942a7d9742d0fff4448f7 | |
| parent | a636d395ff0b0ccecb4569c0fc80f7c55f2e5f2e (diff) | |
| parent | 5ee86c273bfc83fa432910a13c6ce28b74361896 (diff) | |
| download | git-205493d56d98dd340b4ac5944d11878762f15df1.tar.gz | |
Merge branch 'tb/midx-avoid-cruft-packs'
"pack-objects" has been taught to avoid pointing into objects in
cruft packs from midx.
* tb/midx-avoid-cruft-packs:
repack: exclude cruft pack(s) from the MIDX where possible
pack-objects: introduce '--stdin-packs=follow'
pack-objects: swap 'show_{object,commit}_pack_hint'
pack-objects: fix typo in 'show_object_pack_hint()'
pack-objects: perform name-hash traversal for unpacked objects
pack-objects: declare 'rev_info' for '--stdin-packs' earlier
pack-objects: factor out handling '--stdin-packs'
pack-objects: limit scope in 'add_object_entry_from_pack()'
pack-objects: use standard option incompatibility functions
| -rw-r--r-- | Documentation/config/repack.adoc | 7 | ||||
| -rw-r--r-- | Documentation/git-pack-objects.adoc | 10 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 189 | ||||
| -rw-r--r-- | builtin/repack.c | 187 | ||||
| -rwxr-xr-x | t/t5331-pack-objects-stdin.sh | 122 | ||||
| -rwxr-xr-x | t/t7704-repack-cruft.sh | 145 |
6 files changed, 571 insertions, 89 deletions
diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index c79af6d7b8..e9e78dcb19 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -39,3 +39,10 @@ repack.cruftThreads:: a cruft pack and the respective parameters are not given over the command line. See similarly named `pack.*` configuration variables for defaults and meaning. + +repack.midxMustContainCruft:: + When set to true, linkgit:git-repack[1] will unconditionally include + cruft pack(s), if any, in the multi-pack index when invoked with + `--write-midx`. When false, cruft packs are only included in the MIDX + when necessary (e.g., because they might be required to form a + reachability closure with MIDX bitmaps). Defaults to true. diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index b1c5aa27da..eba014c406 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -87,13 +87,21 @@ base-name:: reference was included in the resulting packfile. This can be useful to send new tags to native Git clients. ---stdin-packs:: +--stdin-packs[=<mode>]:: Read the basenames of packfiles (e.g., `pack-1234abcd.pack`) from the standard input, instead of object names or revision arguments. The resulting pack contains all objects listed in the included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + +When `mode` is "follow", objects from packs not listed on stdin receive +special treatment. Objects within unlisted packs will be included if +those objects are (1) reachable from the included packs, and (2) not +found in any excluded packs. This mode is useful, for example, to +resurrect once-unreachable objects found in cruft packs to generate +packs which are closed under reachability up to the boundary set by the +excluded packs. ++ Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5781dec980..067b9e322a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -284,6 +284,12 @@ static struct oidmap configured_exclusions; static struct oidset excluded_by_config; static int name_hash_version = -1; +enum stdin_packs_mode { + STDIN_PACKS_MODE_NONE, + STDIN_PACKS_MODE_STANDARD, + STDIN_PACKS_MODE_FOLLOW, +}; + /** * Check whether the name_hash_version chosen by user input is appropriate, * and also validate whether it is compatible with other features. @@ -3727,7 +3733,6 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; if (p) { - struct rev_info *revs = _data; struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; @@ -3735,6 +3740,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; /* * commits in included packs are used as starting points for the * subsequent revision walk @@ -3750,32 +3756,48 @@ static int add_object_entry_from_pack(const struct object_id *oid, return 0; } -static void show_commit_pack_hint(struct commit *commit UNUSED, - void *data UNUSED) +static void show_object_pack_hint(struct object *object, const char *name, + void *data) { - /* nothing to do; commits don't have a namehash */ + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + if (mode == STDIN_PACKS_MODE_FOLLOW) { + if (object->type == OBJ_BLOB && + !has_object(the_repository, &object->oid, 0)) + return; + add_object_entry(&object->oid, object->type, name, 0); + } else { + struct object_entry *oe = packlist_find(&to_pack, &object->oid); + if (!oe) + return; + + /* + * Our 'to_pack' list was constructed by iterating all + * objects packed in included packs, and so doesn't have + * a non-zero hash field that you would typically pick + * up during a reachability traversal. + * + * Make a best-effort attempt to fill in the ->hash and + * ->no_try_delta fields here in order to perhaps + * improve the delta selection process. + */ + oe->hash = pack_name_hash_fn(name); + oe->no_try_delta = name && no_try_delta(name); + + stdin_packs_hints_nr++; + } } -static void show_object_pack_hint(struct object *object, const char *name, - void *data UNUSED) +static void show_commit_pack_hint(struct commit *commit, void *data) { - struct object_entry *oe = packlist_find(&to_pack, &object->oid); - if (!oe) + enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data; + + if (mode == STDIN_PACKS_MODE_FOLLOW) { + show_object_pack_hint((struct object *)commit, "", data); return; + } - /* - * Our 'to_pack' list was constructed by iterating all objects packed in - * included packs, and so doesn't have a non-zero hash field that you - * would typically pick up during a reachability traversal. - * - * Make a best-effort attempt to fill in the ->hash and ->no_try_delta - * here using a now in order to perhaps improve the delta selection - * process. - */ - oe->hash = pack_name_hash_fn(name); - oe->no_try_delta = name && no_try_delta(name); + /* nothing to do; commits don't have a namehash */ - stdin_packs_hints_nr++; } static int pack_mtime_cmp(const void *_a, const void *_b) @@ -3795,7 +3817,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } -static void read_packs_list_from_stdin(void) +static void read_packs_list_from_stdin(struct rev_info *revs) { struct strbuf buf = STRBUF_INIT; struct string_list include_packs = STRING_LIST_INIT_DUP; @@ -3803,24 +3825,6 @@ static void read_packs_list_from_stdin(void) struct string_list_item *item = NULL; struct packed_git *p; - struct rev_info revs; - - repo_init_revisions(the_repository, &revs, NULL); - /* - * Use a revision walk to fill in the namehash of objects in the include - * packs. To save time, we'll avoid traversing through objects that are - * in excluded packs. - * - * That may cause us to avoid populating all of the namehash fields of - * all included objects, but our goal is best-effort, since this is only - * an optimization during delta selection. - */ - revs.no_kept_objects = 1; - revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; - revs.blob_objects = 1; - revs.tree_objects = 1; - revs.tag_objects = 1; - revs.ignore_missing_links = 1; while (strbuf_getline(&buf, stdin) != EOF) { if (!buf.len) @@ -3890,25 +3894,55 @@ static void read_packs_list_from_stdin(void) struct packed_git *p = item->util; for_each_object_in_pack(p, add_object_entry_from_pack, - &revs, + revs, FOR_EACH_OBJECT_PACK_ORDER); } + strbuf_release(&buf); + string_list_clear(&include_packs, 0); + string_list_clear(&exclude_packs, 0); +} + +static void add_unreachable_loose_objects(struct rev_info *revs); + +static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) +{ + struct rev_info revs; + + repo_init_revisions(the_repository, &revs, NULL); + /* + * Use a revision walk to fill in the namehash of objects in the include + * packs. To save time, we'll avoid traversing through objects that are + * in excluded packs. + * + * That may cause us to avoid populating all of the namehash fields of + * all included objects, but our goal is best-effort, since this is only + * an optimization during delta selection. + */ + revs.no_kept_objects = 1; + revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS; + revs.blob_objects = 1; + revs.tree_objects = 1; + revs.tag_objects = 1; + revs.ignore_missing_links = 1; + + /* avoids adding objects in excluded packs */ + ignore_packed_keep_in_core = 1; + read_packs_list_from_stdin(&revs); + if (rev_list_unpacked) + add_unreachable_loose_objects(&revs); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); traverse_commit_list(&revs, show_commit_pack_hint, show_object_pack_hint, - NULL); + &mode); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", stdin_packs_hints_nr); - - strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); } static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, @@ -4006,7 +4040,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) } } -static void add_unreachable_loose_objects(void); static void add_objects_in_unpacked_packs(void); static void enumerate_cruft_objects(void) @@ -4016,7 +4049,7 @@ static void enumerate_cruft_objects(void) _("Enumerating cruft objects"), 0); add_objects_in_unpacked_packs(); - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); stop_progress(&progress_state); } @@ -4294,8 +4327,9 @@ static void add_objects_in_unpacked_packs(void) } static int add_loose_object(const struct object_id *oid, const char *path, - void *data UNUSED) + void *data) { + struct rev_info *revs = data; enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL); if (type < 0) { @@ -4316,6 +4350,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, } else { add_object_entry(oid, type, "", 0); } + + if (revs && type == OBJ_COMMIT) + add_pending_oid(revs, NULL, oid, 0); + return 0; } @@ -4324,11 +4362,10 @@ static int add_loose_object(const struct object_id *oid, const char *path, * add_object_entry will weed out duplicates, so we just add every * loose object we find. */ -static void add_unreachable_loose_objects(void) +static void add_unreachable_loose_objects(struct rev_info *revs) { for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), - add_loose_object, - NULL, NULL, NULL); + add_loose_object, NULL, NULL, revs); } static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) @@ -4675,7 +4712,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (keep_unreachable) add_objects_in_unpacked_packs(); if (pack_loose_unreachable) - add_unreachable_loose_objects(); + add_unreachable_loose_objects(NULL); if (unpack_unreachable) loosen_unused_packed_objects(); @@ -4782,6 +4819,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) { return is_not_in_promisor_pack_obj((struct object *) commit, data); } +static int parse_stdin_packs_mode(const struct option *opt, const char *arg, + int unset) +{ + enum stdin_packs_mode *mode = opt->value; + + if (unset) + *mode = STDIN_PACKS_MODE_NONE; + else if (!arg || !*arg) + *mode = STDIN_PACKS_MODE_STANDARD; + else if (!strcmp(arg, "follow")) + *mode = STDIN_PACKS_MODE_FOLLOW; + else + die(_("invalid value for '%s': '%s'"), opt->long_name, arg); + + return 0; +} + int cmd_pack_objects(int argc, const char **argv, const char *prefix, @@ -4792,7 +4846,7 @@ int cmd_pack_objects(int argc, struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_index = 0; - int stdin_packs = 0; + enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; @@ -4847,6 +4901,9 @@ int cmd_pack_objects(int argc, OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, N_("include objects referred to by the index"), 1, PARSE_OPT_NONEG), + OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"), + N_("read packs from stdin"), + PARSE_OPT_OPTARG, parse_stdin_packs_mode), OPT_BOOL(0, "stdin-packs", &stdin_packs, N_("read packs from stdin")), OPT_BOOL(0, "stdout", &pack_to_stdout, @@ -5012,9 +5069,10 @@ int cmd_pack_objects(int argc, strvec_push(&rp, "--unpacked"); } - if (exclude_promisor_objects && exclude_promisor_objects_best_effort) - die(_("options '%s' and '%s' cannot be used together"), - "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort"); + die_for_incompatible_opt2(exclude_promisor_objects, + "--exclude-promisor-objects", + exclude_promisor_objects_best_effort, + "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; @@ -5052,13 +5110,14 @@ int cmd_pack_objects(int argc, if (!pack_to_stdout && thin) die(_("--thin cannot be used to build an indexable pack")); - if (keep_unreachable && unpack_unreachable) - die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable"); + die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable", + unpack_unreachable, "--unpack-unreachable"); if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; - if (stdin_packs && filter_options.choice) - die(_("cannot use --filter with --stdin-packs")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + filter_options.choice, "--filter"); + if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); @@ -5066,8 +5125,8 @@ int cmd_pack_objects(int argc, if (cruft) { if (use_internal_rev_list) die(_("cannot use internal rev list with --cruft")); - if (stdin_packs) - die(_("cannot use --stdin-packs with --cruft")); + die_for_incompatible_opt2(stdin_packs, "--stdin-packs", + cruft, "--cruft"); } /* @@ -5135,11 +5194,7 @@ int cmd_pack_objects(int argc, progress_state = start_progress(the_repository, _("Enumerating objects"), 0); if (stdin_packs) { - /* avoids adding objects in excluded packs */ - ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(); - if (rev_list_unpacked) - add_unreachable_loose_objects(); + read_stdin_packs(stdin_packs, rev_list_unpacked); } else if (cruft) { read_cruft_objects(); } else if (!use_internal_rev_list) { diff --git a/builtin/repack.c b/builtin/repack.c index b05ffe4623..75158d7776 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -39,6 +39,7 @@ static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; +static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" @@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value, free(cruft_po_args->threads); return git_config_string(&cruft_po_args->threads, var, value); } + if (!strcmp(var, "repack.midxmustcontaincruft")) { + midx_must_contain_cruft = git_config_bool(var, value); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry) free(geometry->pack); } +static int midx_has_unknown_packs(char **midx_pack_names, + size_t midx_pack_names_nr, + struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + size_t i; + + string_list_sort(include); + + for (i = 0; i < midx_pack_names_nr; i++) { + const char *pack_name = midx_pack_names[i]; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + struct midx_snapshot_ref_data { struct tempfile *f; struct oidset seen; @@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f) static void midx_included_packs(struct string_list *include, struct existing_packs *existing, + char **midx_pack_names, + size_t midx_pack_names_nr, struct string_list *names, struct pack_geometry *geometry) { @@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include, } } - for_each_string_list_item(item, &existing->cruft_packs) { + if (midx_must_contain_cruft || + midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, + include, geometry, existing)) { /* - * When doing a --geometric repack, there is no need to check - * for deleted packs, since we're by definition not doing an - * ALL_INTO_ONE repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any packs which are - * enqueued for deletion. + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. * - * So we could omit the conditional below in the --geometric - * case, but doing so is unnecessary since no packs are marked - * as pending deletion (since we only call - * `mark_packs_for_deletion()` when doing an all-into-one - * repack). + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. */ - if (pack_is_marked_for_deletion(item)) - continue; + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `mark_packs_for_deletion()` when doing an + * all-into-one repack). + */ + if (pack_is_marked_for_deletion(item)) + continue; - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; } strbuf_release(&buf); @@ -1145,6 +1253,8 @@ int cmd_repack(int argc, struct tempfile *refs_snapshot = NULL; int i, ext, ret; int show_progress; + char **midx_pack_names = NULL; + size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ int delete_redundant = 0; @@ -1362,7 +1472,10 @@ int cmd_repack(int argc, !(pack_everything & PACK_CRUFT)) strvec_push(&cmd.args, "--pack-loose-unreachable"); } else if (geometry.split_factor) { - strvec_push(&cmd.args, "--stdin-packs"); + if (midx_must_contain_cruft) + strvec_push(&cmd.args, "--stdin-packs"); + else + strvec_push(&cmd.args, "--stdin-packs=follow"); strvec_push(&cmd.args, "--unpacked"); } else { strvec_push(&cmd.args, "--unpacked"); @@ -1402,8 +1515,25 @@ int cmd_repack(int argc, if (ret) goto cleanup; - if (!names.nr && !po_args.quiet) - printf_ln(_("Nothing new to pack.")); + if (!names.nr) { + if (!po_args.quiet) + printf_ln(_("Nothing new to pack.")); + /* + * If we didn't write any new packs, the non-cruft packs + * may refer to once-unreachable objects in the cruft + * pack(s). + * + * If there isn't already a MIDX, the one we write + * must include the cruft pack(s), in case the + * non-cruft pack(s) refer to once-cruft objects. + * + * If there is already a MIDX, we can punt here, since + * midx_has_unknown_packs() will make the decision for + * us. + */ + if (!get_local_multi_pack_index(the_repository)) + midx_must_contain_cruft = 1; + } if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); @@ -1484,6 +1614,19 @@ int cmd_repack(int argc, string_list_sort(&names); + if (get_local_multi_pack_index(the_repository)) { + struct multi_pack_index *m = + get_local_multi_pack_index(the_repository); + + ALLOC_ARRAY(midx_pack_names, + m->num_packs + m->num_packs_in_base); + + for (; m; m = m->base_midx) + for (uint32_t i = 0; i < m->num_packs; i++) + midx_pack_names[midx_pack_names_nr++] = + xstrdup(m->pack_names[i]); + } + close_object_store(the_repository->objects); /* @@ -1525,7 +1668,8 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &existing, midx_pack_names, + midx_pack_names_nr, &names, &geometry); ret = write_midx_included_packs(&include, &geometry, &names, refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1576,6 +1720,9 @@ cleanup: string_list_clear(&names, 1); existing_packs_release(&existing); free_pack_geometry(&geometry); + for (size_t i = 0; i < midx_pack_names_nr; i++) + free(midx_pack_names[i]); + free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index b48c0cbe8f..4a8df5a389 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -64,7 +64,7 @@ test_expect_success '--stdin-packs is incompatible with --filter' ' cd stdin-packs && test_must_fail git pack-objects --stdin-packs --stdout \ --filter=blob:none </dev/null 2>err && - test_grep "cannot use --filter with --stdin-packs" err + test_grep "options .--stdin-packs. and .--filter. cannot be used together" err ) ' @@ -236,4 +236,124 @@ test_expect_success 'pack-objects --stdin with packfiles from main and alternate test_cmp expected-objects actual-objects ' +objdir=.git/objects +packdir=$objdir/pack + +objects_in_packs () { + for p in "$@" + do + git show-index <"$packdir/pack-$p.idx" || return 1 + done >objects.raw && + + cut -d' ' -f2 objects.raw | sort && + rm -f objects.raw +} + +test_expect_success '--stdin-packs=follow walks into unknown packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + + for c in A B C D + do + test_commit "$c" || return 1 + done && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo C..D | git pack-objects --revs $packdir/pack)" && + test_commit E && + + git prune-packed && + + cat >in <<-EOF && + pack-$B.pack + ^pack-$C.pack + pack-$D.pack + EOF + + # With just --stdin-packs, pack "A" is unknown to us, so + # only objects from packs "B" and "D" are included in + # the output pack. + P=$(git pack-objects --stdin-packs $packdir/pack <in) && + objects_in_packs $B $D >expect && + objects_in_packs $P >actual && + test_cmp expect actual && + + # But with --stdin-packs=follow, objects from both + # included packs reach objects from the unknown pack, so + # objects from pack "A" is included in the output pack + # in addition to the above. + P=$(git pack-objects --stdin-packs=follow $packdir/pack <in) && + objects_in_packs $A $B $D >expect && + objects_in_packs $P >actual && + test_cmp expect actual && + + # And with --unpacked, we will pick up objects from unknown + # packs that are reachable from loose objects. Loose object E + # reaches objects in pack A, but there are three excluded packs + # in between. + # + # The resulting pack should include objects reachable from E + # that are not present in packs B, C, or D, along with those + # present in pack A. + cat >in <<-EOF && + ^pack-$B.pack + ^pack-$C.pack + ^pack-$D.pack + EOF + + P=$(git pack-objects --stdin-packs=follow --unpacked \ + $packdir/pack <in) && + + { + objects_in_packs $A && + git rev-list --objects --no-object-names D..E + }>expect.raw && + sort expect.raw >expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +stdin_packs__follow_with_only () { + rm -fr stdin_packs__follow_with_only && + git init stdin_packs__follow_with_only && + ( + cd stdin_packs__follow_with_only && + + test_commit A && + test_commit B && + + git rev-parse "$@" >B.objects && + + echo A | git pack-objects --revs $packdir/pack && + B="$(git pack-objects $packdir/pack <B.objects)" && + + git cat-file --batch-check="%(objectname)" --batch-all-objects >objs && + for obj in $(cat objs) + do + rm -f $objdir/$(test_oid_to_path $obj) || return 1 + done && + + ( cd $packdir && ls pack-*.pack ) >in && + git pack-objects --stdin-packs=follow --stdout >/dev/null <in + ) +} + +test_expect_success '--stdin-packs=follow tolerates missing blobs' ' + stdin_packs__follow_with_only HEAD HEAD^{tree} +' + +test_expect_success '--stdin-packs=follow tolerates missing trees' ' + stdin_packs__follow_with_only HEAD HEAD:B.t +' + +test_expect_success '--stdin-packs=follow tolerates missing commits' ' + stdin_packs__follow_with_only HEAD HEAD^{tree} +' + test_done diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 8aebfb45f5..aa2e2e6ad8 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -724,4 +724,149 @@ test_expect_success 'cruft repack respects --quiet' ' ) ' +setup_cruft_exclude_tests() { + git init "$1" && + ( + cd "$1" && + + git config repack.midxMustContainCruft false && + + test_commit one && + + test_commit --no-tag two && + two="$(git rev-parse HEAD)" && + test_commit --no-tag three && + three="$(git rev-parse HEAD)" && + git reset --hard one && + git reflog expire --all --expire=all && + + GIT_TEST_MULTI_PACK_INDEX=0 git repack --cruft -d && + + git merge $two && + test_commit four + ) +} + +test_expect_success 'repack --write-midx excludes cruft where possible' ' + setup_cruft_exclude_tests exclude-cruft-when-possible && + ( + cd exclude-cruft-when-possible && + + GIT_TEST_MULTI_PACK_INDEX=0 \ + git repack -d --geometric=2 --write-midx --write-bitmap-index && + + test-tool read-midx --show-objects $objdir >midx && + cruft="$(ls $packdir/*.mtimes)" && + test_grep ! "$(basename "$cruft" .mtimes).idx" midx && + + git rev-list --all --objects --no-object-names >reachable.raw && + sort reachable.raw >reachable.objects && + awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects && + + test_cmp reachable.objects midx.objects + ) +' + +test_expect_success 'repack --write-midx includes cruft when instructed' ' + setup_cruft_exclude_tests exclude-cruft-when-instructed && + ( + cd exclude-cruft-when-instructed && + + GIT_TEST_MULTI_PACK_INDEX=0 \ + git -c repack.midxMustContainCruft=true repack \ + -d --geometric=2 --write-midx --write-bitmap-index && + + test-tool read-midx --show-objects $objdir >midx && + cruft="$(ls $packdir/*.mtimes)" && + test_grep "$(basename "$cruft" .mtimes).idx" midx && + + git cat-file --batch-check="%(objectname)" --batch-all-objects \ + >all.objects && + awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects && + + test_cmp all.objects midx.objects + ) +' + +test_expect_success 'repack --write-midx includes cruft when necessary' ' + setup_cruft_exclude_tests exclude-cruft-when-necessary && + ( + cd exclude-cruft-when-necessary && + + test_path_is_file $(ls $packdir/pack-*.mtimes) && + ( cd $packdir && ls pack-*.idx ) | sort >packs.all && + git multi-pack-index write --stdin-packs --bitmap <packs.all && + + test_commit five && + GIT_TEST_MULTI_PACK_INDEX=0 \ + git repack -d --geometric=2 --write-midx --write-bitmap-index && + + test-tool read-midx --show-objects $objdir >midx && + awk "/\.pack$/ { print \$1 }" <midx | sort >midx.objects && + git cat-file --batch-all-objects --batch-check="%(objectname)" \ + >expect.objects && + test_cmp expect.objects midx.objects && + + grep "^pack-" midx >midx.packs && + test_line_count = "$(($(wc -l <packs.all) + 1))" midx.packs + ) +' + +test_expect_success 'repack --write-midx includes cruft when already geometric' ' + git init repack--write-midx-geometric-noop && + ( + cd repack--write-midx-geometric-noop && + + git branch -M main && + test_commit A && + test_commit B && + + git checkout -B side && + test_commit --no-tag C && + C="$(git rev-parse HEAD)" && + + git checkout main && + git branch -D side && + git reflog expire --all --expire=all && + + # At this point we have two packs: one containing the + # objects belonging to commits A and B, and another + # (cruft) pack containing the objects belonging to + # commit C. + git repack --cruft -d && + + # Create a third pack which contains a merge commit + # making commit C reachable again. + # + # --no-ff is important here, as it ensures that we + # actually write a new object and subsequently a new + # pack to contain it. + git merge --no-ff $C && + git repack -d && + + ls $packdir/pack-*.idx | sort >packs.all && + cruft="$(ls $packdir/pack-*.mtimes)" && + cruft="${cruft%.mtimes}.idx" && + + for idx in $(grep -v $cruft <packs.all) + do + git show-index <$idx >out && + wc -l <out || return 1 + done >sizes.raw && + + # Make sure that there are two non-cruft packs, and + # that one of them contains at least twice as many + # objects as the other, ensuring that they are already + # in a geometric progression. + sort -n sizes.raw >sizes && + test_line_count = 2 sizes && + s1=$(head -n 1 sizes) && + s2=$(tail -n 1 sizes) && + test "$s2" -gt "$((2 * $s1))" && + + git -c repack.midxMustContainCruft=false repack --geometric=2 \ + --write-midx --write-bitmap-index + ) +' + test_done |
