diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-07-15 12:06:57 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-07-15 12:06:57 -0700 |
| commit | c29998d1d4cc2de064069adb2738af56a9d2fbd0 (patch) | |
| tree | a2b87cf25f2fd870e14dc9c812dcbd3f15660c61 /builtin/repack.c | |
| parent | db0583b3fd18596b8e360b0fd1554ac3e5c15793 (diff) | |
| parent | 5ee86c273bfc83fa432910a13c6ce28b74361896 (diff) | |
| download | git-c29998d1d4cc2de064069adb2738af56a9d2fbd0.tar.gz | |
Merge branch 'tb/midx-avoid-cruft-packs' into ps/object-store-midx
* tb/midx-avoid-cruft-packs:
repack: exclude cruft pack(s) from the MIDX where possible
pack-objects: introduce '--stdin-packs=follow'
pack-objects: swap 'show_{object,commit}_pack_hint'
pack-objects: fix typo in 'show_object_pack_hint()'
pack-objects: perform name-hash traversal for unpacked objects
pack-objects: declare 'rev_info' for '--stdin-packs' earlier
pack-objects: factor out handling '--stdin-packs'
pack-objects: limit scope in 'add_object_entry_from_pack()'
pack-objects: use standard option incompatibility functions
Diffstat (limited to 'builtin/repack.c')
| -rw-r--r-- | builtin/repack.c | 187 |
1 files changed, 167 insertions, 20 deletions
diff --git a/builtin/repack.c b/builtin/repack.c index 9bbf032b6d..5e89d96df1 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -39,6 +39,7 @@ static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; +static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" @@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value, free(cruft_po_args->threads); return git_config_string(&cruft_po_args->threads, var, value); } + if (!strcmp(var, "repack.midxmustcontaincruft")) { + midx_must_contain_cruft = git_config_bool(var, value); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry) free(geometry->pack); } +static int midx_has_unknown_packs(char **midx_pack_names, + size_t midx_pack_names_nr, + struct string_list *include, + struct pack_geometry *geometry, + struct existing_packs *existing) +{ + size_t i; + + string_list_sort(include); + + for (i = 0; i < midx_pack_names_nr; i++) { + const char *pack_name = midx_pack_names[i]; + + /* + * Determine whether or not each MIDX'd pack from the existing + * MIDX (if any) is represented in the new MIDX. For each pack + * in the MIDX, it must either be: + * + * - In the "include" list of packs to be included in the new + * MIDX. Note this function is called before the include + * list is populated with any cruft pack(s). + * + * - Below the geometric split line (if using pack geometry), + * indicating that the pack won't be included in the new + * MIDX, but its contents were rolled up as part of the + * geometric repack. + * + * - In the existing non-kept packs list (if not using pack + * geometry), and marked as non-deleted. + */ + if (string_list_has_string(include, pack_name)) { + continue; + } else if (geometry) { + struct strbuf buf = STRBUF_INIT; + uint32_t j; + + for (j = 0; j < geometry->split; j++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(geometry->pack[j])); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (!strcmp(pack_name, buf.buf)) { + strbuf_release(&buf); + break; + } + } + + strbuf_release(&buf); + + if (j < geometry->split) + continue; + } else { + struct string_list_item *item; + + item = string_list_lookup(&existing->non_kept_packs, + pack_name); + if (item && !pack_is_marked_for_deletion(item)) + continue; + } + + /* + * If we got to this point, the MIDX includes some pack that we + * don't know about. + */ + return 1; + } + + return 0; +} + struct midx_snapshot_ref_data { struct tempfile *f; struct oidset seen; @@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f) static void midx_included_packs(struct string_list *include, struct existing_packs *existing, + char **midx_pack_names, + size_t midx_pack_names_nr, struct string_list *names, struct pack_geometry *geometry) { @@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include, } } - for_each_string_list_item(item, &existing->cruft_packs) { + if (midx_must_contain_cruft || + midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr, + include, geometry, existing)) { /* - * When doing a --geometric repack, there is no need to check - * for deleted packs, since we're by definition not doing an - * ALL_INTO_ONE repack (hence no packs will be deleted). - * Otherwise we must check for and exclude any packs which are - * enqueued for deletion. + * If there are one or more unknown pack(s) present (see + * midx_has_unknown_packs() for what makes a pack + * "unknown") in the MIDX before the repack, keep them + * as they may be required to form a reachability + * closure if the MIDX is bitmapped. * - * So we could omit the conditional below in the --geometric - * case, but doing so is unnecessary since no packs are marked - * as pending deletion (since we only call - * `mark_packs_for_deletion()` when doing an all-into-one - * repack). + * For example, a cruft pack can be required to form a + * reachability closure if the MIDX is bitmapped and one + * or more of the bitmap's selected commits reaches a + * once-cruft object that was later made reachable. */ - if (pack_is_marked_for_deletion(item)) - continue; + for_each_string_list_item(item, &existing->cruft_packs) { + /* + * When doing a --geometric repack, there is no + * need to check for deleted packs, since we're + * by definition not doing an ALL_INTO_ONE + * repack (hence no packs will be deleted). + * Otherwise we must check for and exclude any + * packs which are enqueued for deletion. + * + * So we could omit the conditional below in the + * --geometric case, but doing so is unnecessary + * since no packs are marked as pending + * deletion (since we only call + * `mark_packs_for_deletion()` when doing an + * all-into-one repack). + */ + if (pack_is_marked_for_deletion(item)) + continue; - strbuf_reset(&buf); - strbuf_addf(&buf, "%s.idx", item->string); - string_list_insert(include, buf.buf); + strbuf_reset(&buf); + strbuf_addf(&buf, "%s.idx", item->string); + string_list_insert(include, buf.buf); + } + } else { + /* + * Modern versions of Git (with the appropriate + * configuration setting) will write new copies of + * once-cruft objects when doing a --geometric repack. + * + * If the MIDX has no cruft pack, new packs written + * during a --geometric repack will not rely on the + * cruft pack to form a reachability closure, so we can + * avoid including them in the MIDX in that case. + */ + ; } strbuf_release(&buf); @@ -1145,6 +1253,8 @@ int cmd_repack(int argc, struct tempfile *refs_snapshot = NULL; int i, ext, ret; int show_progress; + char **midx_pack_names = NULL; + size_t midx_pack_names_nr = 0; /* variables to be filled by option parsing */ int delete_redundant = 0; @@ -1362,7 +1472,10 @@ int cmd_repack(int argc, !(pack_everything & PACK_CRUFT)) strvec_push(&cmd.args, "--pack-loose-unreachable"); } else if (geometry.split_factor) { - strvec_push(&cmd.args, "--stdin-packs"); + if (midx_must_contain_cruft) + strvec_push(&cmd.args, "--stdin-packs"); + else + strvec_push(&cmd.args, "--stdin-packs=follow"); strvec_push(&cmd.args, "--unpacked"); } else { strvec_push(&cmd.args, "--unpacked"); @@ -1402,8 +1515,25 @@ int cmd_repack(int argc, if (ret) goto cleanup; - if (!names.nr && !po_args.quiet) - printf_ln(_("Nothing new to pack.")); + if (!names.nr) { + if (!po_args.quiet) + printf_ln(_("Nothing new to pack.")); + /* + * If we didn't write any new packs, the non-cruft packs + * may refer to once-unreachable objects in the cruft + * pack(s). + * + * If there isn't already a MIDX, the one we write + * must include the cruft pack(s), in case the + * non-cruft pack(s) refer to once-cruft objects. + * + * If there is already a MIDX, we can punt here, since + * midx_has_unknown_packs() will make the decision for + * us. + */ + if (!get_local_multi_pack_index(the_repository)) + midx_must_contain_cruft = 1; + } if (pack_everything & PACK_CRUFT) { const char *pack_prefix = find_pack_prefix(packdir, packtmp); @@ -1484,6 +1614,19 @@ int cmd_repack(int argc, string_list_sort(&names); + if (get_local_multi_pack_index(the_repository)) { + struct multi_pack_index *m = + get_local_multi_pack_index(the_repository); + + ALLOC_ARRAY(midx_pack_names, + m->num_packs + m->num_packs_in_base); + + for (; m; m = m->base_midx) + for (uint32_t i = 0; i < m->num_packs; i++) + midx_pack_names[midx_pack_names_nr++] = + xstrdup(m->pack_names[i]); + } + close_object_store(the_repository->objects); /* @@ -1525,7 +1668,8 @@ int cmd_repack(int argc, if (write_midx) { struct string_list include = STRING_LIST_INIT_DUP; - midx_included_packs(&include, &existing, &names, &geometry); + midx_included_packs(&include, &existing, midx_pack_names, + midx_pack_names_nr, &names, &geometry); ret = write_midx_included_packs(&include, &geometry, &names, refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL, @@ -1576,6 +1720,9 @@ cleanup: string_list_clear(&names, 1); existing_packs_release(&existing); free_pack_geometry(&geometry); + for (size_t i = 0; i < midx_pack_names_nr; i++) + free(midx_pack_names[i]); + free(midx_pack_names); pack_objects_args_release(&po_args); pack_objects_args_release(&cruft_po_args); |
