aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2022-06-03 14:30:36 -0700
committerJunio C Hamano <gitster@pobox.com>2022-06-03 14:30:36 -0700
commit16a0e92ddc39ff6266c9b78aaad56b1e3ac687a4 (patch)
tree780cfa05178178eadaafbe74568db1b495a11d7d
parentc276c21da6d055060b1c216de1b1c04fb058425f (diff)
parent66731ff921a56cf3e7049fae614fcf7eec663bde (diff)
downloadgit-16a0e92ddc39ff6266c9b78aaad56b1e3ac687a4.tar.gz
Merge branch 'tb/geom-repack-with-keep-and-max'
Teach "git repack --geometric" work better with "--keep-pack" and avoid corrupting the repository when packsize limit is used. * tb/geom-repack-with-keep-and-max: builtin/repack.c: ensure that `names` is sorted t7703: demonstrate object corruption with pack.packSizeLimit repack: respect --keep-pack with geometric repack
-rw-r--r--builtin/repack.c49
-rwxr-xr-xt/t7703-repack-geometric.sh75
2 files changed, 112 insertions, 12 deletions
diff --git a/builtin/repack.c b/builtin/repack.c
index d1a563d5b6..0e4aae80c0 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -137,6 +137,8 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list,
string_list_append_nodup(fname_nonkept_list, fname);
}
closedir(dir);
+
+ string_list_sort(fname_kept_list);
}
static void remove_redundant_pack(const char *dir_name, const char *base_name)
@@ -332,17 +334,38 @@ static int geometry_cmp(const void *va, const void *vb)
return 0;
}
-static void init_pack_geometry(struct pack_geometry **geometry_p)
+static void init_pack_geometry(struct pack_geometry **geometry_p,
+ struct string_list *existing_kept_packs)
{
struct packed_git *p;
struct pack_geometry *geometry;
+ struct strbuf buf = STRBUF_INIT;
*geometry_p = xcalloc(1, sizeof(struct pack_geometry));
geometry = *geometry_p;
for (p = get_all_packs(the_repository); p; p = p->next) {
- if (!pack_kept_objects && p->pack_keep)
- continue;
+ if (!pack_kept_objects) {
+ /*
+ * Any pack that has its pack_keep bit set will appear
+ * in existing_kept_packs below, but this saves us from
+ * doing a more expensive check.
+ */
+ if (p->pack_keep)
+ continue;
+
+ /*
+ * The pack may be kept via the --keep-pack option;
+ * check 'existing_kept_packs' to determine whether to
+ * ignore it.
+ */
+ strbuf_reset(&buf);
+ strbuf_addstr(&buf, pack_basename(p));
+ strbuf_strip_suffix(&buf, ".pack");
+
+ if (string_list_has_string(existing_kept_packs, buf.buf))
+ continue;
+ }
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
@@ -353,6 +376,7 @@ static void init_pack_geometry(struct pack_geometry **geometry_p)
}
QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
+ strbuf_release(&buf);
}
static void split_pack_geometry(struct pack_geometry *geometry, int factor)
@@ -714,17 +738,20 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strbuf_release(&path);
}
+ packdir = mkpathdup("%s/pack", get_object_directory());
+ packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
+ packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
+
+ collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
+ &keep_pack_list);
+
if (geometric_factor) {
if (pack_everything)
die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a");
- init_pack_geometry(&geometry);
+ init_pack_geometry(&geometry, &existing_kept_packs);
split_pack_geometry(geometry, geometric_factor);
}
- packdir = mkpathdup("%s/pack", get_object_directory());
- packtmp_name = xstrfmt(".tmp-%d-pack", (int)getpid());
- packtmp = mkpathdup("%s/%s", packdir, packtmp_name);
-
sigchain_push_common(remove_pack_on_signal);
prepare_pack_objects(&cmd, &po_args);
@@ -764,9 +791,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (use_delta_islands)
strvec_push(&cmd.args, "--delta-islands");
- collect_pack_filenames(&existing_nonkept_packs, &existing_kept_packs,
- &keep_pack_list);
-
if (pack_everything & ALL_INTO_ONE) {
repack_promisor_objects(&po_args, &names);
@@ -832,6 +856,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
+ string_list_sort(&names);
+
for_each_string_list_item(item, &names) {
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
}
@@ -872,7 +898,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (delete_redundant && pack_everything & ALL_INTO_ONE) {
const int hexsz = the_hash_algo->hexsz;
- string_list_sort(&names);
for_each_string_list_item(item, &existing_nonkept_packs) {
char *sha1;
size_t len = strlen(item->string);
diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh
index bdbbcbf1ec..da87f8b2d8 100755
--- a/t/t7703-repack-geometric.sh
+++ b/t/t7703-repack-geometric.sh
@@ -7,6 +7,7 @@ test_description='git repack --geometric works correctly'
GIT_TEST_MULTI_PACK_INDEX=0
objdir=.git/objects
+packdir=$objdir/pack
midx=$objdir/pack/multi-pack-index
test_expect_success '--geometric with no packs' '
@@ -180,6 +181,34 @@ test_expect_success '--geometric ignores kept packs' '
)
'
+test_expect_success '--geometric ignores --keep-pack packs' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ # Create two equal-sized packs
+ test_commit kept && # 3 objects
+ git repack -d &&
+ test_commit pack && # 3 objects
+ git repack -d &&
+
+ find $objdir/pack -type f -name "*.pack" | sort >packs.before &&
+ git repack --geometric 2 -dm \
+ --keep-pack="$(basename "$(head -n 1 packs.before)")" >out &&
+ find $objdir/pack -type f -name "*.pack" | sort >packs.after &&
+
+ # Packs should not have changed (only one non-kept pack, no
+ # loose objects), but $midx should now exist.
+ grep "Nothing new to pack" out &&
+ test_path_is_file $midx &&
+
+ test_cmp packs.before packs.after &&
+
+ git fsck
+ )
+'
+
test_expect_success '--geometric chooses largest MIDX preferred pack' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
@@ -202,4 +231,50 @@ test_expect_success '--geometric chooses largest MIDX preferred pack' '
)
'
+test_expect_success '--geometric with pack.packSizeLimit' '
+ git init pack-rewrite &&
+ test_when_finished "rm -fr pack-rewrite" &&
+ (
+ cd pack-rewrite &&
+
+ test-tool genrandom foo 1048576 >foo &&
+ test-tool genrandom bar 1048576 >bar &&
+
+ git add foo bar &&
+ test_tick &&
+ git commit -m base &&
+
+ git rev-parse HEAD:foo HEAD:bar >p1.objects &&
+ git rev-parse HEAD HEAD^{tree} >p2.objects &&
+
+ # These two packs each contain two objects, so the following
+ # `--geometric` repack will try to combine them.
+ p1="$(git pack-objects $packdir/pack <p1.objects)" &&
+ p2="$(git pack-objects $packdir/pack <p2.objects)" &&
+
+ # Remove any loose objects in packs, since we do not want extra
+ # copies around (which would mask over potential object
+ # corruption issues).
+ git prune-packed &&
+
+ # Both p1 and p2 will be rolled up, but pack-objects will write
+ # three packs:
+ #
+ # - one containing object "foo",
+ # - another containing object "bar",
+ # - a final pack containing the commit and tree objects
+ # (identical to p2 above)
+ git repack --geometric 2 -d --max-pack-size=1048576 &&
+
+ # Ensure `repack` can detect that the third pack it wrote
+ # (containing just the tree and commit objects) was identical to
+ # one that was below the geometric split, so that we can save it
+ # from deletion.
+ #
+ # If `repack` fails to do that, we will incorrectly delete p2,
+ # causing object corruption.
+ git fsck
+ )
+'
+
test_done