diff options
| -rw-r--r-- | Documentation/Makefile | 1 | ||||
| -rw-r--r-- | Documentation/config/gc.txt | 21 | ||||
| -rw-r--r-- | Documentation/config/repack.txt | 9 | ||||
| -rw-r--r-- | Documentation/git-gc.txt | 5 | ||||
| -rw-r--r-- | Documentation/git-pack-objects.txt | 30 | ||||
| -rw-r--r-- | Documentation/git-repack.txt | 11 | ||||
| -rw-r--r-- | Documentation/technical/cruft-packs.txt | 123 | ||||
| -rw-r--r-- | Documentation/technical/pack-format.txt | 19 | ||||
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | builtin/gc.c | 10 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 304 | ||||
| -rw-r--r-- | builtin/repack.c | 185 | ||||
| -rw-r--r-- | bulk-checkin.c | 2 | ||||
| -rw-r--r-- | chunk-format.c | 12 | ||||
| -rw-r--r-- | chunk-format.h | 3 | ||||
| -rw-r--r-- | commit-graph.c | 18 | ||||
| -rw-r--r-- | midx.c | 18 | ||||
| -rw-r--r-- | object-file.c | 4 | ||||
| -rw-r--r-- | object-store.h | 12 | ||||
| -rw-r--r-- | pack-mtimes.c | 129 | ||||
| -rw-r--r-- | pack-mtimes.h | 26 | ||||
| -rw-r--r-- | pack-objects.c | 6 | ||||
| -rw-r--r-- | pack-objects.h | 25 | ||||
| -rw-r--r-- | pack-write.c | 93 | ||||
| -rw-r--r-- | pack.h | 4 | ||||
| -rw-r--r-- | packfile.c | 19 | ||||
| -rw-r--r-- | reachable.c | 58 | ||||
| -rw-r--r-- | reachable.h | 9 | ||||
| -rw-r--r-- | t/helper/test-pack-mtimes.c | 56 | ||||
| -rw-r--r-- | t/helper/test-tool.c | 1 | ||||
| -rw-r--r-- | t/helper/test-tool.h | 1 | ||||
| -rwxr-xr-x | t/t5329-pack-objects-cruft.sh | 739 |
32 files changed, 1853 insertions, 102 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile index d3f043f50d..f2e7fc1daa 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -95,6 +95,7 @@ TECH_DOCS += MyFirstObjectWalk TECH_DOCS += SubmittingPatches TECH_DOCS += ToolsForGit TECH_DOCS += technical/bundle-format +TECH_DOCS += technical/cruft-packs TECH_DOCS += technical/hash-function-transition TECH_DOCS += technical/http-protocol TECH_DOCS += technical/index-format diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index c834e07991..38fea076a2 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -81,14 +81,21 @@ gc.packRefs:: to enable it within all non-bare repos or it can be set to a boolean value. The default is `true`. +gc.cruftPacks:: + Store unreachable objects in a cruft pack (see + linkgit:git-repack[1]) instead of as loose objects. The default + is `false`. + gc.pruneExpire:: - When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'. - Override the grace period with this config variable. The value - "now" may be used to disable this grace period and always prune - unreachable objects immediately, or "never" may be used to - suppress pruning. This feature helps prevent corruption when - 'git gc' runs concurrently with another process writing to the - repository; see the "NOTES" section of linkgit:git-gc[1]. + When 'git gc' is run, it will call 'prune --expire 2.weeks.ago' + (and 'repack --cruft --cruft-expiration 2.weeks.ago' if using + cruft packs via `gc.cruftPacks` or `--cruft`). Override the + grace period with this config variable. The value "now" may be + used to disable this grace period and always prune unreachable + objects immediately, or "never" may be used to suppress pruning. + This feature helps prevent corruption when 'git gc' runs + concurrently with another process writing to the repository; see + the "NOTES" section of linkgit:git-gc[1]. gc.worktreePruneExpire:: When 'git gc' is run, it calls diff --git a/Documentation/config/repack.txt b/Documentation/config/repack.txt index 41ac6953c8..c79af6d7b8 100644 --- a/Documentation/config/repack.txt +++ b/Documentation/config/repack.txt @@ -30,3 +30,12 @@ repack.updateServerInfo:: If set to false, linkgit:git-repack[1] will not run linkgit:git-update-server-info[1]. Defaults to true. Can be overridden when true by the `-n` option of linkgit:git-repack[1]. + +repack.cruftWindow:: +repack.cruftWindowMemory:: +repack.cruftDepth:: +repack.cruftThreads:: + Parameters used by linkgit:git-pack-objects[1] when generating + a cruft pack and the respective parameters are not given over + the command line. See similarly named `pack.*` configuration + variables for defaults and meaning. diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 853967dea0..ba4e67700e 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -54,6 +54,11 @@ other housekeeping tasks (e.g. rerere, working trees, reflog...) will be performed as well. +--cruft:: + When expiring unreachable objects, pack them separately into a + cruft pack instead of storing the loose objects as loose + objects. + --prune=<date>:: Prune loose objects older than date (default is 2 weeks ago, overridable by the config variable `gc.pruneExpire`). diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index f8344e1e5b..a9995a932c 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -13,6 +13,7 @@ SYNOPSIS [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=<n>] [--depth=<n>] [--revs [--unpacked | --all]] [--keep-pack=<pack-name>] + [--cruft] [--cruft-expiration=<time>] [--stdout [--filter=<filter-spec>] | <base-name>] [--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list> @@ -95,6 +96,35 @@ base-name:: Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. +--cruft:: + Packs unreachable objects into a separate "cruft" pack, denoted + by the existence of a `.mtimes` file. Typically used by `git + repack --cruft`. Callers provide a list of pack names and + indicate which packs will remain in the repository, along with + which packs will be deleted (indicated by the `-` prefix). The + contents of the cruft pack are all objects not contained in the + surviving packs which have not exceeded the grace period (see + `--cruft-expiration` below), or which have exceeded the grace + period, but are reachable from an other object which hasn't. ++ +When the input lists a pack containing all reachable objects (and lists +all other packs as pending deletion), the corresponding cruft pack will +contain all unreachable objects (with mtime newer than the +`--cruft-expiration`) along with any unreachable objects whose mtime is +older than the `--cruft-expiration`, but are reachable from an +unreachable object whose mtime is newer than the `--cruft-expiration`). ++ +Incompatible with `--unpack-unreachable`, `--keep-unreachable`, +`--pack-loose-unreachable`, `--stdin-packs`, as well as any other +options which imply `--revs`. Also incompatible with `--max-pack-size`; +when this option is set, the maximum pack size is not inferred from +`pack.packSizeLimit`. + +--cruft-expiration=<approxidate>:: + If specified, objects are eliminated from the cruft pack if they + have an mtime older than `<approxidate>`. If unspecified (and + given `--cruft`), then no objects are eliminated. + --window=<n>:: --depth=<n>:: These two options affect how the objects contained in diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index ee30edc178..0bf13893d8 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -63,6 +63,17 @@ to the new separate pack will be written. Also run 'git prune-packed' to remove redundant loose object files. +--cruft:: + Same as `-a`, unless `-d` is used. Then any unreachable objects + are packed into a separate cruft pack. Unreachable objects can + be pruned using the normal expiry rules with the next `git gc` + invocation (see linkgit:git-gc[1]). Incompatible with `-k`. + +--cruft-expiration=<approxidate>:: + Expire unreachable objects older than `<approxidate>` + immediately instead of waiting for the next `git gc` invocation. + Only useful with `--cruft -d`. + -l:: Pass the `--local` option to 'git pack-objects'. See linkgit:git-pack-objects[1]. diff --git a/Documentation/technical/cruft-packs.txt b/Documentation/technical/cruft-packs.txt new file mode 100644 index 0000000000..d81f3a8982 --- /dev/null +++ b/Documentation/technical/cruft-packs.txt @@ -0,0 +1,123 @@ += Cruft packs + +The cruft packs feature offer an alternative to Git's traditional mechanism of +removing unreachable objects. This document provides an overview of Git's +pruning mechanism, and how a cruft pack can be used instead to accomplish the +same. + +== Background + +To remove unreachable objects from your repository, Git offers `git repack -Ad` +(see linkgit:git-repack[1]). Quoting from the documentation: + +[quote] +[...] unreachable objects in a previous pack become loose, unpacked objects, +instead of being left in the old pack. [...] loose unreachable objects will be +pruned according to normal expiry rules with the next 'git gc' invocation. + +Unreachable objects aren't removed immediately, since doing so could race with +an incoming push which may reference an object which is about to be deleted. +Instead, those unreachable objects are stored as loose objects and stay that way +until they are older than the expiration window, at which point they are removed +by linkgit:git-prune[1]. + +Git must store these unreachable objects loose in order to keep track of their +per-object mtimes. If these unreachable objects were written into one big pack, +then either freshening that pack (because an object contained within it was +re-written) or creating a new pack of unreachable objects would cause the pack's +mtime to get updated, and the objects within it would never leave the expiration +window. Instead, objects are stored loose in order to keep track of the +individual object mtimes and avoid a situation where all cruft objects are +freshened at once. + +This can lead to undesirable situations when a repository contains many +unreachable objects which have not yet left the grace period. Having large +directories in the shards of `.git/objects` can lead to decreased performance in +the repository. But given enough unreachable objects, this can lead to inode +starvation and degrade the performance of the whole system. Since we +can never pack those objects, these repositories often take up a large amount of +disk space, since we can only zlib compress them, but not store them in delta +chains. + +== Cruft packs + +A cruft pack eliminates the need for storing unreachable objects in a loose +state by including the per-object mtimes in a separate file alongside a single +pack containing all loose objects. + +A cruft pack is written by `git repack --cruft` when generating a new pack. +linkgit:git-pack-objects[1]'s `--cruft` option. Note that `git repack --cruft` +is a classic all-into-one repack, meaning that everything in the resulting pack is +reachable, and everything else is unreachable. Once written, the `--cruft` +option instructs `git repack` to generate another pack containing only objects +not packed in the previous step (which equates to packing all unreachable +objects together). This progresses as follows: + + 1. Enumerate every object, marking any object which is (a) not contained in a + kept-pack, and (b) whose mtime is within the grace period as a traversal + tip. + + 2. Perform a reachability traversal based on the tips gathered in the previous + step, adding every object along the way to the pack. + + 3. Write the pack out, along with a `.mtimes` file that records the per-object + timestamps. + +This mode is invoked internally by linkgit:git-repack[1] when instructed to +write a cruft pack. Crucially, the set of in-core kept packs is exactly the set +of packs which will not be deleted by the repack; in other words, they contain +all of the repository's reachable objects. + +When a repository already has a cruft pack, `git repack --cruft` typically only +adds objects to it. An exception to this is when `git repack` is given the +`--cruft-expiration` option, which allows the generated cruft pack to omit +expired objects instead of waiting for linkgit:git-gc[1] to expire those objects +later on. + +It is linkgit:git-gc[1] that is typically responsible for removing expired +unreachable objects. + +== Caution for mixed-version environments + +Repositories that have cruft packs in them will continue to work with any older +version of Git. Note, however, that previous versions of Git which do not +understand the `.mtimes` file will use the cruft pack's mtime as the mtime for +all of the objects in it. In other words, do not expect older (pre-cruft pack) +versions of Git to interpret or even read the contents of the `.mtimes` file. + +Note that having mixed versions of Git GC-ing the same repository can lead to +unreachable objects never being completely pruned. This can happen under the +following circumstances: + + - An older version of Git running GC explodes the contents of an existing + cruft pack loose, using the cruft pack's mtime. + - A newer version running GC collects those loose objects into a cruft pack, + where the .mtime file reflects the loose object's actual mtimes, but the + cruft pack mtime is "now". + +Repeating this process will lead to unreachable objects not getting pruned as a +result of repeatedly resetting the objects' mtimes to the present time. + +If you are GC-ing repositories in a mixed version environment, consider omitting +the `--cruft` option when using linkgit:git-repack[1] and linkgit:git-gc[1], and +leaving the `gc.cruftPacks` configuration unset until all writers understand +cruft packs. + +== Alternatives + +Notable alternatives to this design include: + + - The location of the per-object mtime data, and + - Storing unreachable objects in multiple cruft packs. + +On the location of mtime data, a new auxiliary file tied to the pack was chosen +to avoid complicating the `.idx` format. If the `.idx` format were ever to gain +support for optional chunks of data, it may make sense to consolidate the +`.mtimes` format into the `.idx` itself. + +Storing unreachable objects among multiple cruft packs (e.g., creating a new +cruft pack during each repacking operation including only unreachable objects +which aren't already stored in an earlier cruft pack) is significantly more +complicated to construct, and so aren't pursued here. The obvious drawback to +the current implementation is that the entire cruft pack must be re-written from +scratch. diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 6d3efb7d16..b520aa9c45 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -294,6 +294,25 @@ Pack file entry: <+ All 4-byte numbers are in network order. +== pack-*.mtimes files have the format: + +All 4-byte numbers are in network byte order. + + - A 4-byte magic number '0x4d544d45' ('MTME'). + + - A 4-byte version identifier (= 1). + + - A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256). + + - A table of 4-byte unsigned integers. The ith value is the + modification time (mtime) of the ith object in the corresponding + pack by lexicographic (index) order. The mtimes count standard + epoch seconds. + + - A trailer, containing a checksum of the corresponding packfile, + and a checksum of all of the above (each having length according + to the specified hash function). + == multi-pack-index (MIDX) files have the following format: The multi-pack-index files refer to multiple pack-files and loose objects. @@ -740,6 +740,7 @@ TEST_BUILTINS_OBJS += test-oid-array.o TEST_BUILTINS_OBJS += test-oidmap.o TEST_BUILTINS_OBJS += test-oidtree.o TEST_BUILTINS_OBJS += test-online-cpus.o +TEST_BUILTINS_OBJS += test-pack-mtimes.o TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o TEST_BUILTINS_OBJS += test-partial-clone.o @@ -996,6 +997,7 @@ LIB_OBJS += oidtree.o LIB_OBJS += pack-bitmap-write.o LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-check.o +LIB_OBJS += pack-mtimes.o LIB_OBJS += pack-objects.o LIB_OBJS += pack-revindex.o LIB_OBJS += pack-write.o diff --git a/builtin/gc.c b/builtin/gc.c index daa4535f1c..4ea70089c9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -42,6 +42,7 @@ static const char * const builtin_gc_usage[] = { static int pack_refs = 1; static int prune_reflogs = 1; +static int cruft_packs = 0; static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; @@ -152,6 +153,7 @@ static void gc_config(void) git_config_get_int("gc.auto", &gc_auto_threshold); git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); git_config_get_bool("gc.autodetach", &detach_auto); + git_config_get_bool("gc.cruftpacks", &cruft_packs); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); git_config_get_expiry("gc.logexpiry", &gc_log_expire); @@ -331,7 +333,11 @@ static void add_repack_all_option(struct string_list *keep_pack) { if (prune_expire && !strcmp(prune_expire, "now")) strvec_push(&repack, "-a"); - else { + else if (cruft_packs) { + strvec_push(&repack, "--cruft"); + if (prune_expire) + strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire); + } else { strvec_push(&repack, "-A"); if (prune_expire) strvec_pushf(&repack, "--unpack-unreachable=%s", prune_expire); @@ -551,6 +557,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) { OPTION_STRING, 0, "prune", &prune_expire, N_("date"), N_("prune unreferenced objects"), PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire }, + OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")), OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")), OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"), PARSE_OPT_NOCOMPLETE), @@ -670,6 +677,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) die(FAILED_RUN, repack.v[0]); if (prune_expire) { + /* run `git prune` even if using cruft packs */ strvec_push(&prune, prune_expire); if (quiet) strvec_push(&prune, "--no-progress"); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 0a26de166d..040cebafd2 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -36,6 +36,7 @@ #include "trace2.h" #include "shallow.h" #include "promisor-remote.h" +#include "pack-mtimes.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -194,6 +195,8 @@ static int reuse_delta = 1, reuse_object = 1; static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; +static int cruft; +static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; static int incremental; @@ -1260,9 +1263,13 @@ static void write_pack_file(void) &to_pack, written_list, nr_written); } + if (cruft) + pack_idx_opts.flags |= WRITE_MTIMES; + stage_tmp_packfiles(&tmpname, pack_tmp_name, written_list, nr_written, - &pack_idx_opts, hash, &idx_tmp_name); + &to_pack, &pack_idx_opts, hash, + &idx_tmp_name); if (write_bitmap_index) { size_t tmpname_len = tmpname.len; @@ -1521,13 +1528,13 @@ static int want_object_in_pack(const struct object_id *oid, return 1; } -static void create_object_entry(const struct object_id *oid, - enum object_type type, - uint32_t hash, - int exclude, - int no_try_delta, - struct packed_git *found_pack, - off_t found_offset) +static struct object_entry *create_object_entry(const struct object_id *oid, + enum object_type type, + uint32_t hash, + int exclude, + int no_try_delta, + struct packed_git *found_pack, + off_t found_offset) { struct object_entry *entry; @@ -1544,6 +1551,8 @@ static void create_object_entry(const struct object_id *oid, } entry->no_try_delta = no_try_delta; + + return entry; } static const char no_closure_warning[] = N_( @@ -3403,6 +3412,217 @@ static void read_packs_list_from_stdin(void) string_list_clear(&exclude_packs, 0); } +static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, + struct packed_git *pack, off_t offset, + const char *name, uint32_t mtime) +{ + struct object_entry *entry; + + display_progress(progress_state, ++nr_seen); + + entry = packlist_find(&to_pack, oid); + if (entry) { + if (name) { + entry->hash = pack_name_hash(name); + entry->no_try_delta = no_try_delta(name); + } + } else { + if (!want_object_in_pack(oid, 0, &pack, &offset)) + return; + if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { + /* + * If a traversed tree has a missing blob then we want + * to avoid adding that missing object to our pack. + * + * This only applies to missing blobs, not trees, + * because the traversal needs to parse sub-trees but + * not blobs. + * + * Note we only perform this check when we couldn't + * already find the object in a pack, so we're really + * limited to "ensure non-tip blobs which don't exist in + * packs do exist via loose objects". Confused? + */ + return; + } + + entry = create_object_entry(oid, type, pack_name_hash(name), + 0, name && no_try_delta(name), + pack, offset); + } + + if (mtime > oe_cruft_mtime(&to_pack, entry)) + oe_set_cruft_mtime(&to_pack, entry, mtime); + return; +} + +static void show_cruft_object(struct object *obj, const char *name, void *data) +{ + /* + * if we did not record it earlier, it's at least as old as our + * expiration value. Rather than find it exactly, just use that + * value. This may bump it forward from its real mtime, but it + * will still be "too old" next time we run with the same + * expiration. + * + * if obj does appear in the packing list, this call is a noop (or may + * set the namehash). + */ + add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration); +} + +static void show_cruft_commit(struct commit *commit, void *data) +{ + show_cruft_object((struct object*)commit, NULL, data); +} + +static int cruft_include_check_obj(struct object *obj, void *data) +{ + return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS); +} + +static int cruft_include_check(struct commit *commit, void *data) +{ + return cruft_include_check_obj((struct object*)commit, data); +} + +static void set_cruft_mtime(const struct object *object, + struct packed_git *pack, + off_t offset, time_t mtime) +{ + add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL, + mtime); +} + +static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) +{ + struct string_list_item *item = NULL; + for_each_string_list_item(item, packs) { + struct packed_git *p = item->util; + if (!p) + die(_("could not find pack '%s'"), item->string); + p->pack_keep_in_core = keep; + } +} + +static void add_unreachable_loose_objects(void); +static void add_objects_in_unpacked_packs(void); + +static void enumerate_cruft_objects(void) +{ + if (progress) + progress_state = start_progress(_("Enumerating cruft objects"), 0); + + add_objects_in_unpacked_packs(); + add_unreachable_loose_objects(); + + stop_progress(&progress_state); +} + +static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) +{ + struct packed_git *p; + struct rev_info revs; + int ret; + + repo_init_revisions(the_repository, &revs, NULL); + + revs.tag_objects = 1; + revs.tree_objects = 1; + revs.blob_objects = 1; + + revs.include_check = cruft_include_check; + revs.include_check_obj = cruft_include_check_obj; + + revs.ignore_missing_links = 1; + + if (progress) + progress_state = start_progress(_("Enumerating cruft objects"), 0); + ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, + set_cruft_mtime, 1); + stop_progress(&progress_state); + + if (ret) + die(_("unable to add cruft objects")); + + /* + * Re-mark only the fresh packs as kept so that objects in + * unknown packs do not halt the reachability traversal early. + */ + for (p = get_all_packs(the_repository); p; p = p->next) + p->pack_keep_in_core = 0; + mark_pack_kept_in_core(fresh_packs, 1); + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + if (progress) + progress_state = start_progress(_("Traversing cruft objects"), 0); + nr_seen = 0; + traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); + + stop_progress(&progress_state); +} + +static void read_cruft_objects(void) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list discard_packs = STRING_LIST_INIT_DUP; + struct string_list fresh_packs = STRING_LIST_INIT_DUP; + struct packed_git *p; + + ignore_packed_keep_in_core = 1; + + while (strbuf_getline(&buf, stdin) != EOF) { + if (!buf.len) + continue; + + if (*buf.buf == '-') + string_list_append(&discard_packs, buf.buf + 1); + else + string_list_append(&fresh_packs, buf.buf); + strbuf_reset(&buf); + } + + string_list_sort(&discard_packs); + string_list_sort(&fresh_packs); + + for (p = get_all_packs(the_repository); p; p = p->next) { + const char *pack_name = pack_basename(p); + struct string_list_item *item; + + item = string_list_lookup(&fresh_packs, pack_name); + if (!item) + item = string_list_lookup(&discard_packs, pack_name); + + if (item) { + item->util = p; + } else { + /* + * This pack wasn't mentioned in either the "fresh" or + * "discard" list, so the caller didn't know about it. + * + * Mark it as kept so that its objects are ignored by + * add_unseen_recent_objects_to_traversal(). We'll + * unmark it before starting the traversal so it doesn't + * halt the traversal early. + */ + p->pack_keep_in_core = 1; + } + } + + mark_pack_kept_in_core(&fresh_packs, 1); + mark_pack_kept_in_core(&discard_packs, 0); + + if (cruft_expiration) + enumerate_and_traverse_cruft_objects(&fresh_packs); + else + enumerate_cruft_objects(); + + strbuf_release(&buf); + string_list_clear(&discard_packs, 0); + string_list_clear(&fresh_packs, 0); +} + static void read_object_list_from_stdin(void) { char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2]; @@ -3535,7 +3755,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, uint32_t pos, void *_data) { - add_object_entry(oid, OBJ_NONE, "", 0); + if (cruft) { + off_t offset; + time_t mtime; + + if (pack->is_cruft) { + if (load_pack_mtimes(pack) < 0) + die(_("could not load cruft pack .mtimes")); + mtime = nth_packed_mtime(pack, pos); + } else { + mtime = pack->mtime; + } + offset = nth_packed_object_offset(pack, pos); + + add_cruft_object_entry(oid, OBJ_NONE, pack, offset, + NULL, mtime); + } else { + add_object_entry(oid, OBJ_NONE, "", 0); + } return 0; } @@ -3559,7 +3796,19 @@ static int add_loose_object(const struct object_id *oid, const char *path, return 0; } - add_object_entry(oid, type, "", 0); + if (cruft) { + struct stat st; + if (stat(path, &st) < 0) { + if (errno == ENOENT) + return 0; + return error_errno("unable to stat %s", oid_to_hex(oid)); + } + + add_cruft_object_entry(oid, type, NULL, 0, NULL, + st.st_mtime); + } else { + add_object_entry(oid, type, "", 0); + } return 0; } @@ -3799,7 +4048,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; if (add_unseen_recent_objects_to_traversal(revs, - unpack_unreachable_expiration)) + unpack_unreachable_expiration, NULL, 0)) die(_("unable to add recent objects")); if (prepare_revision_walk(revs)) die(_("revision walk setup failed")); @@ -3876,6 +4125,20 @@ static int option_parse_unpack_unreachable(const struct option *opt, return 0; } +static int option_parse_cruft_expiration(const struct option *opt, + const char *arg, int unset) +{ + if (unset) { + cruft = 0; + cruft_expiration = 0; + } else { + cruft = 1; + if (arg) + cruft_expiration = approxidate(arg); + } + return 0; +} + struct po_filter_data { unsigned have_revs:1; struct rev_info revs; @@ -3965,6 +4228,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than <time>"), PARSE_OPT_OPTARG, option_parse_unpack_unreachable), + OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")), + OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"), + N_("expire cruft objects older than <time>"), + PARSE_OPT_OPTARG, option_parse_cruft_expiration), OPT_BOOL(0, "sparse", &sparse, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, @@ -4091,7 +4358,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!HAVE_THREADS && delta_search_threads != 1) warning(_("no threads support, ignoring --threads")); - if (!pack_to_stdout && !pack_size_limit) + if (!pack_to_stdout && !pack_size_limit && !cruft) pack_size_limit = pack_size_limit_cfg; if (pack_to_stdout && pack_size_limit) die(_("--max-pack-size cannot be used to build a pack for transfer")); @@ -4118,6 +4385,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); + if (cruft) { + if (use_internal_rev_list) + die(_("cannot use internal rev list with --cruft")); + if (stdin_packs) + die(_("cannot use --stdin-packs with --cruft")); + if (pack_size_limit) + die(_("cannot use --max-pack-size with --cruft")); + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * @@ -4174,7 +4450,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) the_repository); prepare_packing_data(the_repository, &to_pack); - if (progress) + if (progress && !cruft) progress_state = start_progress(_("Enumerating objects"), 0); if (stdin_packs) { /* avoids adding objects in excluded packs */ @@ -4182,6 +4458,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_packs_list_from_stdin(); if (rev_list_unpacked) add_unreachable_loose_objects(); + } else if (cruft) { + read_cruft_objects(); } else if (!use_internal_rev_list) { read_object_list_from_stdin(); } else if (pfd.have_revs) { diff --git a/builtin/repack.c b/builtin/repack.c index 0e4aae80c0..c957b2959f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -18,12 +18,21 @@ #include "pack-bitmap.h" #include "refs.h" +#define ALL_INTO_ONE 1 +#define LOOSEN_UNREACHABLE 2 +#define PACK_CRUFT 4 + +#define DELETE_PACK 1 +#define CRUFT_PACK 2 + +static int pack_everything; static int delta_base_offset = 1; static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; static char *packdir, *packtmp_name, *packtmp; +static char *cruft_expiration; static const char *const git_repack_usage[] = { N_("git repack [<options>]"), @@ -35,9 +44,21 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writebitmaps configuration." ); +struct pack_objects_args { + const char *window; + const char *window_memory; + const char *depth; + const char *threads; + const char *max_pack_size; + int no_reuse_delta; + int no_reuse_object; + int quiet; + int local; +}; static int repack_config(const char *var, const char *value, void *cb) { + struct pack_objects_args *cruft_po_args = cb; if (!strcmp(var, "repack.usedeltabaseoffset")) { delta_base_offset = git_config_bool(var, value); return 0; @@ -59,6 +80,14 @@ static int repack_config(const char *var, const char *value, void *cb) run_update_server_info = git_config_bool(var, value); return 0; } + if (!strcmp(var, "repack.cruftwindow")) + return git_config_string(&cruft_po_args->window, var, value); + if (!strcmp(var, "repack.cruftwindowmemory")) + return git_config_string(&cruft_po_args->window_memory, var, value); + if (!strcmp(var, "repack.cruftdepth")) + return git_config_string(&cruft_po_args->depth, var, value); + if (!strcmp(var, "repack.cruftthreads")) + return git_config_string(&cruft_po_args->threads, var, value); return git_default_config(var, value, cb); } @@ -131,10 +160,15 @@ static void collect_pack_filenames(struct string_list *fname_nonkept_list, fname = xmemdupz(e->d_name, len); if ((extra_keep->nr > 0 && i < extra_keep->nr) || - (file_exists(mkpath("%s/%s.keep", packdir, fname)))) + (file_exists(mkpath("%s/%s.keep", packdir, fname)))) { string_list_append_nodup(fname_kept_list, fname); - else - string_list_append_nodup(fname_nonkept_list, fname); + } else { + struct string_list_item *item; + item = string_list_append_nodup(fname_nonkept_list, + fname); + if (file_exists(mkpath("%s/%s.mtimes", packdir, fname))) + item->util = (void*)(uintptr_t)CRUFT_PACK; + } } closedir(dir); @@ -153,18 +187,6 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name) strbuf_release(&buf); } -struct pack_objects_args { - const char *window; - const char *window_memory; - const char *depth; - const char *threads; - const char *max_pack_size; - int no_reuse_delta; - int no_reuse_object; - int quiet; - int local; -}; - static void prepare_pack_objects(struct child_process *cmd, const struct pack_objects_args *args) { @@ -219,6 +241,7 @@ static struct { } exts[] = { {".pack"}, {".rev", 1}, + {".mtimes", 1}, {".bitmap", 1}, {".promisor", 1}, {".idx"}, @@ -306,9 +329,6 @@ static void repack_promisor_objects(const struct pack_objects_args *args, die(_("could not finish pack-objects to repack promisor objects")); } -#define ALL_INTO_ONE 1 -#define LOOSEN_UNREACHABLE 2 - struct pack_geometry { struct packed_git **pack; uint32_t pack_nr, pack_alloc; @@ -366,6 +386,8 @@ static void init_pack_geometry(struct pack_geometry **geometry_p, if (string_list_has_string(existing_kept_packs, buf.buf)) continue; } + if (p->is_cruft) + continue; ALLOC_GROW(geometry->pack, geometry->pack_nr + 1, @@ -572,9 +594,20 @@ static void midx_included_packs(struct string_list *include, string_list_insert(include, strbuf_detach(&buf, NULL)); } + + for_each_string_list_item(item, existing_nonkept_packs) { + if (!((uintptr_t)item->util & CRUFT_PACK)) { + /* + * no need to check DELETE_PACK, since we're not + * doing an ALL_INTO_ONE repack + */ + continue; + } + string_list_insert(include, xstrfmt("%s.idx", item->string)); + } } else { for_each_string_list_item(item, existing_nonkept_packs) { - if (item->util) + if ((uintptr_t)item->util & DELETE_PACK) continue; string_list_insert(include, xstrfmt("%s.idx", item->string)); } @@ -628,6 +661,67 @@ static int write_midx_included_packs(struct string_list *include, return finish_command(&cmd); } +static int write_cruft_pack(const struct pack_objects_args *args, + const char *pack_prefix, + struct string_list *names, + struct string_list *existing_packs, + struct string_list *existing_kept_packs) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf line = STRBUF_INIT; + struct string_list_item *item; + FILE *in, *out; + int ret; + + prepare_pack_objects(&cmd, args); + + strvec_push(&cmd.args, "--cruft"); + if (cruft_expiration) + strvec_pushf(&cmd.args, "--cruft-expiration=%s", + cruft_expiration); + + strvec_push(&cmd.args, "--honor-pack-keep"); + strvec_push(&cmd.args, "--non-empty"); + strvec_push(&cmd.args, "--max-pack-size=0"); + + cmd.in = -1; + + ret = start_command(&cmd); + if (ret) + return ret; + + /* + * names has a confusing double use: it both provides the list + * of just-written new packs, and accepts the name of the cruft + * pack we are writing. + * + * By the time it is read here, it contains only the pack(s) + * that were just written, which is exactly the set of packs we + * want to consider kept. + */ + in = xfdopen(cmd.in, "w"); + for_each_string_list_item(item, names) + fprintf(in, "%s-%s.pack\n", pack_prefix, item->string); + for_each_string_list_item(item, existing_packs) + fprintf(in, "-%s.pack\n", item->string); + for_each_string_list_item(item, existing_kept_packs) + fprintf(in, "%s.pack\n", item->string); + fclose(in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + if (line.len != the_hash_algo->hexsz) + die(_("repack: Expecting full hex object ID lines only " + "from pack-objects.")); + string_list_append(names, line.buf); + } + fclose(out); + + strbuf_release(&line); + + return finish_command(&cmd); +} + int cmd_repack(int argc, const char **argv, const char *prefix) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -644,12 +738,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) int show_progress; /* variables to be filled by option parsing */ - int pack_everything = 0; int delete_redundant = 0; const char *unpack_unreachable = NULL; int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct pack_objects_args po_args = {NULL}; + struct pack_objects_args cruft_po_args = {NULL}; int geometric_factor = 0; int write_midx = 0; @@ -659,6 +753,11 @@ int cmd_repack(int argc, const char **argv, const char *prefix) OPT_BIT('A', NULL, &pack_everything, N_("same as -a, and turn unreachable objects loose"), LOOSEN_UNREACHABLE | ALL_INTO_ONE), + OPT_BIT(0, "cruft", &pack_everything, + N_("same as -a, pack unreachable cruft objects separately"), + PACK_CRUFT), + OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"), + N_("with -C, expire objects older than this")), OPT_BOOL('d', NULL, &delete_redundant, N_("remove redundant packs, and run git-prune-packed")), OPT_BOOL('f', NULL, &po_args.no_reuse_delta, @@ -699,7 +798,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) OPT_END() }; - git_config(repack_config, NULL); + git_config(repack_config, &cruft_po_args); argc = parse_options(argc, argv, prefix, builtin_repack_options, git_repack_usage, 0); @@ -711,6 +810,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix) (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE))) die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "-A"); + if (pack_everything & PACK_CRUFT) { + pack_everything |= ALL_INTO_ONE; + + if (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)) + die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-A"); + if (keep_unreachable) + die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-k"); + } + if (write_bitmaps < 0) { if (!write_midx && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) @@ -794,7 +902,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (pack_everything & ALL_INTO_ONE) { repack_promisor_objects(&po_args, &names); - if (existing_nonkept_packs.nr && delete_redundant) { + if (existing_nonkept_packs.nr && delete_redundant && + !(pack_everything & PACK_CRUFT)) { for_each_string_list_item(item, &names) { strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack", packtmp_name, item->string); @@ -856,6 +965,33 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!names.nr && !po_args.quiet) printf_ln(_("Nothing new to pack.")); + if (pack_everything & PACK_CRUFT) { + const char *pack_prefix; + if (!skip_prefix(packtmp, packdir, &pack_prefix)) + die(_("pack prefix %s does not begin with objdir %s"), + packtmp, packdir); + if (*pack_prefix == '/') + pack_prefix++; + + if (!cruft_po_args.window) + cruft_po_args.window = po_args.window; + if (!cruft_po_args.window_memory) + cruft_po_args.window_memory = po_args.window_memory; + if (!cruft_po_args.depth) + cruft_po_args.depth = po_args.depth; + if (!cruft_po_args.threads) + cruft_po_args.threads = po_args.threads; + + cruft_po_args.local = po_args.local; + cruft_po_args.quiet = po_args.quiet; + + ret = write_cruft_pack(&cruft_po_args, pack_prefix, &names, + &existing_nonkept_packs, + &existing_kept_packs); + if (ret) + return ret; + } + string_list_sort(&names); for_each_string_list_item(item, &names) { @@ -910,7 +1046,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) * was given) and that we will actually delete this pack * (if `-d` was given). */ - item->util = (void*)(intptr_t)!string_list_has_string(&names, sha1); + if (!string_list_has_string(&names, sha1)) + item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK); } } @@ -934,7 +1071,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (delete_redundant) { int opts = 0; for_each_string_list_item(item, &existing_nonkept_packs) { - if (!item->util) + if (!((uintptr_t)item->util & DELETE_PACK)) continue; remove_redundant_pack(packdir, item->string); } diff --git a/bulk-checkin.c b/bulk-checkin.c index bcf878460b..98ec893842 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -38,7 +38,7 @@ static void finish_tmp_packfile(struct strbuf *basename, char *idx_tmp_name = NULL; stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written, - pack_idx_opts, hash, &idx_tmp_name); + NULL, pack_idx_opts, hash, &idx_tmp_name); rename_tmp_packfile_idx(basename, &idx_tmp_name); free(idx_tmp_name); diff --git a/chunk-format.c b/chunk-format.c index 1c3dca62e2..0275b74a89 100644 --- a/chunk-format.c +++ b/chunk-format.c @@ -181,3 +181,15 @@ int read_chunk(struct chunkfile *cf, return CHUNK_NOT_FOUND; } + +uint8_t oid_version(const struct git_hash_algo *algop) +{ + switch (hash_algo_by_ptr(algop)) { + case GIT_HASH_SHA1: + return 1; + case GIT_HASH_SHA256: + return 2; + default: + die(_("invalid hash version")); + } +} diff --git a/chunk-format.h b/chunk-format.h index 9ccbe00377..7885aa0848 100644 --- a/chunk-format.h +++ b/chunk-format.h @@ -2,6 +2,7 @@ #define CHUNK_FORMAT_H #include "git-compat-util.h" +#include "hash.h" struct hashfile; struct chunkfile; @@ -65,4 +66,6 @@ int read_chunk(struct chunkfile *cf, chunk_read_fn fn, void *data); +uint8_t oid_version(const struct git_hash_algo *algop); + #endif diff --git a/commit-graph.c b/commit-graph.c index 7943da3848..92d4503336 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -193,18 +193,6 @@ char *get_commit_graph_chain_filename(struct object_directory *odb) return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path); } -static uint8_t oid_version(void) -{ - switch (hash_algo_by_ptr(the_hash_algo)) { - case GIT_HASH_SHA1: - return 1; - case GIT_HASH_SHA256: - return 2; - default: - die(_("invalid hash version")); - } -} - static struct commit_graph *alloc_commit_graph(void) { struct commit_graph *g = xcalloc(1, sizeof(*g)); @@ -365,9 +353,9 @@ struct commit_graph *parse_commit_graph(struct repository *r, } hash_version = *(unsigned char*)(data + 5); - if (hash_version != oid_version()) { + if (hash_version != oid_version(the_hash_algo)) { error(_("commit-graph hash version %X does not match version %X"), - hash_version, oid_version()); + hash_version, oid_version(the_hash_algo)); return NULL; } @@ -1924,7 +1912,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); + hashwrite_u8(f, oid_version(the_hash_algo)); hashwrite_u8(f, get_num_chunks(cf)); hashwrite_u8(f, ctx->num_commit_graphs_after - 1); @@ -41,18 +41,6 @@ #define PACK_EXPIRED UINT_MAX -static uint8_t oid_version(void) -{ - switch (hash_algo_by_ptr(the_hash_algo)) { - case GIT_HASH_SHA1: - return 1; - case GIT_HASH_SHA256: - return 2; - default: - die(_("invalid hash version")); - } -} - const unsigned char *get_midx_checksum(struct multi_pack_index *m) { return m->data + m->data_len - the_hash_algo->rawsz; @@ -134,9 +122,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->version); hash_version = m->data[MIDX_BYTE_HASH_VERSION]; - if (hash_version != oid_version()) { + if (hash_version != oid_version(the_hash_algo)) { error(_("multi-pack-index hash version %u does not match version %u"), - hash_version, oid_version()); + hash_version, oid_version(the_hash_algo)); goto cleanup_fail; } m->hash_len = the_hash_algo->rawsz; @@ -420,7 +408,7 @@ static size_t write_midx_header(struct hashfile *f, { hashwrite_be32(f, MIDX_SIGNATURE); hashwrite_u8(f, MIDX_VERSION); - hashwrite_u8(f, oid_version()); + hashwrite_u8(f, oid_version(the_hash_algo)); hashwrite_u8(f, num_chunks); hashwrite_u8(f, 0); /* unused */ hashwrite_be32(f, num_packs); diff --git a/object-file.c b/object-file.c index 857f3bdeba..79eb8339b6 100644 --- a/object-file.c +++ b/object-file.c @@ -997,7 +997,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) return check_and_freshen_nonlocal(oid, 0); } -static int has_loose_object(const struct object_id *oid) +int has_loose_object(const struct object_id *oid) { return check_and_freshen(oid, 0); } @@ -2040,6 +2040,8 @@ static int freshen_packed_object(const struct object_id *oid) struct pack_entry e; if (!find_pack_entry(the_repository, oid, &e)) return 0; + if (e.p->is_cruft) + return 0; if (e.p->freshened) return 1; if (!freshen_file(e.p->pack_name)) diff --git a/object-store.h b/object-store.h index 53996018c1..539ea43904 100644 --- a/object-store.h +++ b/object-store.h @@ -115,12 +115,20 @@ struct packed_git { freshened:1, do_not_close:1, pack_promisor:1, - multi_pack_index:1; + multi_pack_index:1, + is_cruft:1; unsigned char hash[GIT_MAX_RAWSZ]; struct revindex_entry *revindex; const uint32_t *revindex_data; const uint32_t *revindex_map; size_t revindex_size; + /* + * mtimes_map points at the beginning of the memory mapped region of + * this pack's corresponding .mtimes file, and mtimes_size is the size + * of that .mtimes file + */ + const uint32_t *mtimes_map; + size_t mtimes_size; /* something like ".git/objects/pack/xxxxx.pack" */ char pack_name[FLEX_ARRAY]; /* more */ }; @@ -327,6 +335,8 @@ int repo_has_object_file_with_flags(struct repository *r, */ int has_loose_object_nonlocal(const struct object_id *); +int has_loose_object(const struct object_id *); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial "<type> <obj-len>" part of the loose object diff --git a/pack-mtimes.c b/pack-mtimes.c new file mode 100644 index 0000000000..0e0aafdcb0 --- /dev/null +++ b/pack-mtimes.c @@ -0,0 +1,129 @@ +#include "git-compat-util.h" +#include "pack-mtimes.h" +#include "object-store.h" +#include "packfile.h" + +static char *pack_mtimes_filename(struct packed_git *p) +{ + size_t len; + if (!strip_suffix(p->pack_name, ".pack", &len)) + BUG("pack_name does not end in .pack"); + return xstrfmt("%.*s.mtimes", (int)len, p->pack_name); +} + +#define MTIMES_HEADER_SIZE (12) + +struct mtimes_header { + uint32_t signature; + uint32_t version; + uint32_t hash_id; +}; + +static int load_pack_mtimes_file(char *mtimes_file, + uint32_t num_objects, + const uint32_t **data_p, size_t *len_p) +{ + int fd, ret = 0; + struct stat st; + uint32_t *data = NULL; + size_t mtimes_size, expected_size; + struct mtimes_header header; + + fd = git_open(mtimes_file); + + if (fd < 0) { + ret = -1; + goto cleanup; + } + if (fstat(fd, &st)) { + ret = error_errno(_("failed to read %s"), mtimes_file); + goto cleanup; + } + + mtimes_size = xsize_t(st.st_size); + + if (mtimes_size < MTIMES_HEADER_SIZE) { + ret = error(_("mtimes file %s is too small"), mtimes_file); + goto cleanup; + } + + data = xmmap(NULL, mtimes_size, PROT_READ, MAP_PRIVATE, fd, 0); + + header.signature = ntohl(data[0]); + header.version = ntohl(data[1]); + header.hash_id = ntohl(data[2]); + + if (header.signature != MTIMES_SIGNATURE) { + ret = error(_("mtimes file %s has unknown signature"), mtimes_file); + goto cleanup; + } + + if (header.version != 1) { + ret = error(_("mtimes file %s has unsupported version %"PRIu32), + mtimes_file, header.version); + goto cleanup; + } + + if (!(header.hash_id == 1 || header.hash_id == 2)) { + ret = error(_("mtimes file %s has unsupported hash id %"PRIu32), + mtimes_file, header.hash_id); + goto cleanup; + } + + + expected_size = MTIMES_HEADER_SIZE; + expected_size = st_add(expected_size, st_mult(sizeof(uint32_t), num_objects)); + expected_size = st_add(expected_size, 2 * (header.hash_id == 1 ? GIT_SHA1_RAWSZ : GIT_SHA256_RAWSZ)); + + if (mtimes_size != expected_size) { + ret = error(_("mtimes file %s is corrupt"), mtimes_file); + goto cleanup; + } + +cleanup: + if (ret) { + if (data) + munmap(data, mtimes_size); + } else { + *len_p = mtimes_size; + *data_p = data; + } + + close(fd); + return ret; +} + +int load_pack_mtimes(struct packed_git *p) +{ + char *mtimes_name = NULL; + int ret = 0; + + if (!p->is_cruft) + return ret; /* not a cruft pack */ + if (p->mtimes_map) + return ret; /* already loaded */ + + ret = open_pack_index(p); + if (ret < 0) + goto cleanup; + + mtimes_name = pack_mtimes_filename(p); + ret = load_pack_mtimes_file(mtimes_name, + p->num_objects, + &p->mtimes_map, + &p->mtimes_size); +cleanup: + free(mtimes_name); + return ret; +} + +uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos) +{ + if (!p->mtimes_map) + BUG("pack .mtimes file not loaded for %s", p->pack_name); + if (p->num_objects <= pos) + BUG("pack .mtimes out-of-bounds (%"PRIu32" vs %"PRIu32")", + pos, p->num_objects); + + return get_be32(p->mtimes_map + pos + 3); +} diff --git a/pack-mtimes.h b/pack-mtimes.h new file mode 100644 index 0000000000..cc957b3e85 --- /dev/null +++ b/pack-mtimes.h @@ -0,0 +1,26 @@ +#ifndef PACK_MTIMES_H +#define PACK_MTIMES_H + +#include "git-compat-util.h" + +#define MTIMES_SIGNATURE 0x4d544d45 /* "MTME" */ +#define MTIMES_VERSION 1 + +struct packed_git; + +/* + * Loads the .mtimes file corresponding to "p", if any, returning zero + * on success. + */ +int load_pack_mtimes(struct packed_git *p); + +/* Returns the mtime associated with the object at position "pos" (in + * lexicographic/index order) in pack "p". + * + * Note that it is a BUG() to call this function if either (a) "p" does + * not have a corresponding .mtimes file, or (b) it does, but it hasn't + * been loaded + */ +uint32_t nth_packed_mtime(struct packed_git *p, uint32_t pos); + +#endif diff --git a/pack-objects.c b/pack-objects.c index fe2a4eace9..272e8d4517 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -170,6 +170,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->layer) REALLOC_ARRAY(pdata->layer, pdata->nr_alloc); + + if (pdata->cruft_mtime) + REALLOC_ARRAY(pdata->cruft_mtime, pdata->nr_alloc); } new_entry = pdata->objects + pdata->nr_objects++; @@ -198,6 +201,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->layer) pdata->layer[pdata->nr_objects - 1] = 0; + if (pdata->cruft_mtime) + pdata->cruft_mtime[pdata->nr_objects - 1] = 0; + return new_entry; } diff --git a/pack-objects.h b/pack-objects.h index dca2351ef9..393b9db546 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -168,6 +168,14 @@ struct packing_data { /* delta islands */ unsigned int *tree_depth; unsigned char *layer; + + /* + * Used when writing cruft packs. + * + * Object mtimes are stored in pack order when writing, but + * written out in lexicographic (index) order. + */ + uint32_t *cruft_mtime; }; void prepare_packing_data(struct repository *r, struct packing_data *pdata); @@ -289,4 +297,21 @@ static inline void oe_set_layer(struct packing_data *pack, pack->layer[e - pack->objects] = layer; } +static inline uint32_t oe_cruft_mtime(struct packing_data *pack, + struct object_entry *e) +{ + if (!pack->cruft_mtime) + return 0; + return pack->cruft_mtime[e - pack->objects]; +} + +static inline void oe_set_cruft_mtime(struct packing_data *pack, + struct object_entry *e, + uint32_t mtime) +{ + if (!pack->cruft_mtime) + CALLOC_ARRAY(pack->cruft_mtime, pack->nr_alloc); + pack->cruft_mtime[e - pack->objects] = mtime; +} + #endif diff --git a/pack-write.c b/pack-write.c index 51812cb129..23c0342018 100644 --- a/pack-write.c +++ b/pack-write.c @@ -2,6 +2,11 @@ #include "pack.h" #include "csum-file.h" #include "remote.h" +#include "chunk-format.h" +#include "pack-mtimes.h" +#include "oidmap.h" +#include "chunk-format.h" +#include "pack-objects.h" void reset_pack_idx_option(struct pack_idx_option *opts) { @@ -181,21 +186,9 @@ static int pack_order_cmp(const void *va, const void *vb, void *ctx) static void write_rev_header(struct hashfile *f) { - uint32_t oid_version; - switch (hash_algo_by_ptr(the_hash_algo)) { - case GIT_HASH_SHA1: - oid_version = 1; - break; - case GIT_HASH_SHA256: - oid_version = 2; - break; - default: - die("write_rev_header: unknown hash version"); - } - hashwrite_be32(f, RIDX_SIGNATURE); hashwrite_be32(f, RIDX_VERSION); - hashwrite_be32(f, oid_version); + hashwrite_be32(f, oid_version(the_hash_algo)); } static void write_rev_index_positions(struct hashfile *f, @@ -288,6 +281,70 @@ const char *write_rev_file_order(const char *rev_name, return rev_name; } +static void write_mtimes_header(struct hashfile *f) +{ + hashwrite_be32(f, MTIMES_SIGNATURE); + hashwrite_be32(f, MTIMES_VERSION); + hashwrite_be32(f, oid_version(the_hash_algo)); +} + +/* + * Writes the object mtimes of "objects" for use in a .mtimes file. + * Note that objects must be in lexicographic (index) order, which is + * the expected ordering of these values in the .mtimes file. + */ +static void write_mtimes_objects(struct hashfile *f, + struct packing_data *to_pack, + struct pack_idx_entry **objects, + uint32_t nr_objects) +{ + uint32_t i; + for (i = 0; i < nr_objects; i++) { + struct object_entry *e = (struct object_entry*)objects[i]; + hashwrite_be32(f, oe_cruft_mtime(to_pack, e)); + } +} + +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash) +{ + hashwrite(f, hash, the_hash_algo->rawsz); +} + +static const char *write_mtimes_file(const char *mtimes_name, + struct packing_data *to_pack, + struct pack_idx_entry **objects, + uint32_t nr_objects, + const unsigned char *hash) +{ + struct hashfile *f; + int fd; + + if (!to_pack) + BUG("cannot call write_mtimes_file with NULL packing_data"); + + if (!mtimes_name) { + struct strbuf tmp_file = STRBUF_INIT; + fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); + mtimes_name = strbuf_detach(&tmp_file, NULL); + } else { + unlink(mtimes_name); + fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + } + f = hashfd(fd, mtimes_name); + + write_mtimes_header(f); + write_mtimes_objects(f, to_pack, objects, nr_objects); + write_mtimes_trailer(f, hash); + + if (adjust_shared_perm(mtimes_name) < 0) + die(_("failed to make %s readable"), mtimes_name); + + finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA, + CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC); + + return mtimes_name; +} + off_t write_pack_header(struct hashfile *f, uint32_t nr_entries) { struct pack_header hdr; @@ -484,11 +541,13 @@ void stage_tmp_packfiles(struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, + struct packing_data *to_pack, struct pack_idx_option *pack_idx_opts, unsigned char hash[], char **idx_tmp_name) { const char *rev_tmp_name = NULL; + const char *mtimes_tmp_name = NULL; if (adjust_shared_perm(pack_tmp_name)) die_errno("unable to make temporary pack file readable"); @@ -501,9 +560,17 @@ void stage_tmp_packfiles(struct strbuf *name_buffer, rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash, pack_idx_opts->flags); + if (pack_idx_opts->flags & WRITE_MTIMES) { + mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list, + nr_written, + hash); + } + rename_tmp_packfile(name_buffer, pack_tmp_name, "pack"); if (rev_tmp_name) rename_tmp_packfile(name_buffer, rev_tmp_name, "rev"); + if (mtimes_tmp_name) + rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes"); } void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought) @@ -44,6 +44,7 @@ struct pack_idx_option { #define WRITE_IDX_STRICT 02 #define WRITE_REV 04 #define WRITE_REV_VERIFY 010 +#define WRITE_MTIMES 020 uint32_t version; uint32_t off32_limit; @@ -109,11 +110,14 @@ int encode_in_pack_object_header(unsigned char *hdr, int hdr_len, #define PH_ERROR_PROTOCOL (-3) int read_pack_header(int fd, struct pack_header *); +struct packing_data; + struct hashfile *create_tmp_packfile(char **pack_tmp_name); void stage_tmp_packfiles(struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, + struct packing_data *to_pack, struct pack_idx_option *pack_idx_opts, unsigned char hash[], char **idx_tmp_name); diff --git a/packfile.c b/packfile.c index 6b88a56025..8e812a84a3 100644 --- a/packfile.c +++ b/packfile.c @@ -334,12 +334,22 @@ static void close_pack_revindex(struct packed_git *p) p->revindex_data = NULL; } +static void close_pack_mtimes(struct packed_git *p) +{ + if (!p->mtimes_map) + return; + + munmap((void *)p->mtimes_map, p->mtimes_size); + p->mtimes_map = NULL; +} + void close_pack(struct packed_git *p) { close_pack_windows(p); close_pack_fd(p); close_pack_index(p); close_pack_revindex(p); + close_pack_mtimes(p); oidset_clear(&p->bad_objects); } @@ -363,7 +373,7 @@ void close_object_store(struct raw_object_store *o) void unlink_pack_path(const char *pack_name, int force_delete) { - static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"}; + static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; int i; struct strbuf buf = STRBUF_INIT; size_t plen; @@ -718,6 +728,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) if (!access(p->pack_name, F_OK)) p->pack_promisor = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes"); + if (!access(p->pack_name, F_OK)) + p->is_cruft = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); @@ -869,7 +883,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, ends_with(file_name, ".pack") || ends_with(file_name, ".bitmap") || ends_with(file_name, ".keep") || - ends_with(file_name, ".promisor")) + ends_with(file_name, ".promisor") || + ends_with(file_name, ".mtimes")) string_list_append(data->garbage, full_name); else report_garbage(PACKDIR_FILE_GARBAGE, full_name); diff --git a/reachable.c b/reachable.c index b9f4ad886e..aba63ebeb3 100644 --- a/reachable.c +++ b/reachable.c @@ -13,6 +13,7 @@ #include "worktree.h" #include "object-store.h" #include "pack-bitmap.h" +#include "pack-mtimes.h" struct connectivity_progress { struct progress *progress; @@ -60,9 +61,13 @@ static void mark_commit(struct commit *c, void *data) struct recent_data { struct rev_info *revs; timestamp_t timestamp; + report_recent_object_fn *cb; + int ignore_in_core_kept_packs; }; static void add_recent_object(const struct object_id *oid, + struct packed_git *pack, + off_t offset, timestamp_t mtime, struct recent_data *data) { @@ -103,13 +108,29 @@ static void add_recent_object(const struct object_id *oid, die("unable to lookup %s", oid_to_hex(oid)); add_pending_object(data->revs, obj, ""); + if (data->cb) + data->cb(obj, pack, offset, mtime); +} + +static int want_recent_object(struct recent_data *data, + const struct object_id *oid) +{ + if (data->ignore_in_core_kept_packs && + has_object_kept_pack(oid, IN_CORE_KEEP_PACKS)) + return 0; + return 1; } static int add_recent_loose(const struct object_id *oid, const char *path, void *data) { struct stat st; - struct object *obj = lookup_object(the_repository, oid); + struct object *obj; + + if (!want_recent_object(data, oid)) + return 0; + + obj = lookup_object(the_repository, oid); if (obj && obj->flags & SEEN) return 0; @@ -126,7 +147,7 @@ static int add_recent_loose(const struct object_id *oid, return error_errno("unable to stat %s", oid_to_hex(oid)); } - add_recent_object(oid, st.st_mtime, data); + add_recent_object(oid, NULL, 0, st.st_mtime, data); return 0; } @@ -134,29 +155,49 @@ static int add_recent_packed(const struct object_id *oid, struct packed_git *p, uint32_t pos, void *data) { - struct object *obj = lookup_object(the_repository, oid); + struct object *obj; + timestamp_t mtime = p->mtime; + + if (!want_recent_object(data, oid)) + return 0; + + obj = lookup_object(the_repository, oid); if (obj && obj->flags & SEEN) return 0; - add_recent_object(oid, p->mtime, data); + if (p->is_cruft) { + if (load_pack_mtimes(p) < 0) + die(_("could not load cruft pack .mtimes")); + mtime = nth_packed_mtime(p, pos); + } + add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data); return 0; } int add_unseen_recent_objects_to_traversal(struct rev_info *revs, - timestamp_t timestamp) + timestamp_t timestamp, + report_recent_object_fn *cb, + int ignore_in_core_kept_packs) { struct recent_data data; + enum for_each_object_flags flags; int r; data.revs = revs; data.timestamp = timestamp; + data.cb = cb; + data.ignore_in_core_kept_packs = ignore_in_core_kept_packs; r = for_each_loose_object(add_recent_loose, &data, FOR_EACH_OBJECT_LOCAL_ONLY); if (r) return r; - return for_each_packed_object(add_recent_packed, &data, - FOR_EACH_OBJECT_LOCAL_ONLY); + + flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER; + if (ignore_in_core_kept_packs) + flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS; + + return for_each_packed_object(add_recent_packed, &data, flags); } static int mark_object_seen(const struct object_id *oid, @@ -217,7 +258,8 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, if (mark_recent) { revs->ignore_missing_links = 1; - if (add_unseen_recent_objects_to_traversal(revs, mark_recent)) + if (add_unseen_recent_objects_to_traversal(revs, mark_recent, + NULL, 0)) die("unable to mark recent objects"); if (prepare_revision_walk(revs)) die("revision walk setup failed"); diff --git a/reachable.h b/reachable.h index 5df932ad8f..020a887b99 100644 --- a/reachable.h +++ b/reachable.h @@ -3,9 +3,16 @@ struct progress; struct rev_info; +struct object; +struct packed_git; + +typedef void report_recent_object_fn(const struct object *, struct packed_git *, + off_t, time_t); int add_unseen_recent_objects_to_traversal(struct rev_info *revs, - timestamp_t timestamp); + timestamp_t timestamp, + report_recent_object_fn cb, + int ignore_in_core_kept_packs); void mark_reachable_objects(struct rev_info *revs, int mark_reflog, timestamp_t mark_recent, struct progress *); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c new file mode 100644 index 0000000000..f7b79daf4c --- /dev/null +++ b/t/helper/test-pack-mtimes.c @@ -0,0 +1,56 @@ +#include "git-compat-util.h" +#include "test-tool.h" +#include "strbuf.h" +#include "object-store.h" +#include "packfile.h" +#include "pack-mtimes.h" + +static void dump_mtimes(struct packed_git *p) +{ + uint32_t i; + if (load_pack_mtimes(p) < 0) + die("could not load pack .mtimes"); + + for (i = 0; i < p->num_objects; i++) { + struct object_id oid; + if (nth_packed_object_id(&oid, p, i) < 0) + die("could not load object id at position %"PRIu32, i); + + printf("%s %"PRIu32"\n", + oid_to_hex(&oid), nth_packed_mtime(p, i)); + } +} + +static const char *pack_mtimes_usage = "\n" +" test-tool pack-mtimes <pack-name.mtimes>"; + +int cmd__pack_mtimes(int argc, const char **argv) +{ + struct strbuf buf = STRBUF_INIT; + struct packed_git *p; + + setup_git_directory(); + + if (argc != 2) + usage(pack_mtimes_usage); + + for (p = get_all_packs(the_repository); p; p = p->next) { + strbuf_addstr(&buf, basename(p->pack_name)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".mtimes"); + + if (!strcmp(buf.buf, argv[1])) + break; + + strbuf_reset(&buf); + } + + strbuf_release(&buf); + + if (!p) + die("could not find pack '%s'", argv[1]); + + dump_mtimes(p); + + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 0424f7adf5..d2eacd302d 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -48,6 +48,7 @@ static struct test_cmd cmds[] = { { "oidmap", cmd__oidmap }, { "oidtree", cmd__oidtree }, { "online-cpus", cmd__online_cpus }, + { "pack-mtimes", cmd__pack_mtimes }, { "parse-options", cmd__parse_options }, { "parse-pathspec-file", cmd__parse_pathspec_file }, { "partial-clone", cmd__partial_clone }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index c876e8246f..960cc27ef7 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -38,6 +38,7 @@ int cmd__mktemp(int argc, const char **argv); int cmd__oidmap(int argc, const char **argv); int cmd__oidtree(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv); +int cmd__pack_mtimes(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv); int cmd__parse_pathspec_file(int argc, const char** argv); int cmd__partial_clone(int argc, const char **argv); diff --git a/t/t5329-pack-objects-cruft.sh b/t/t5329-pack-objects-cruft.sh new file mode 100755 index 0000000000..b481224b93 --- /dev/null +++ b/t/t5329-pack-objects-cruft.sh @@ -0,0 +1,739 @@ +#!/bin/sh + +test_description='cruft pack related pack-objects tests' +. ./test-lib.sh + +objdir=.git/objects +packdir=$objdir/pack + +basic_cruft_pack_tests () { + expire="$1" + + test_expect_success "unreachable loose objects are packed (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + git repack -Ad && + test_commit loose && + + test-tool chmtime +2000 "$objdir/$(test_oid_to_path \ + $(git rev-parse loose:loose.t))" && + test-tool chmtime +1000 "$objdir/$(test_oid_to_path \ + $(git rev-parse loose^{tree}))" && + + ( + git rev-list --objects --no-object-names base..loose | + while read oid + do + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" + done | + sort -k1 + ) >expect && + + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + cruft="$(echo $keep | git pack-objects --cruft \ + --cruft-expiration="$expire" $packdir/pack)" && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual && + + test_cmp expect actual + ) + ' + + test_expect_success "unreachable packed objects are packed (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + test_commit other && + + git rev-list --objects --no-object-names packed.. >objects && + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + other="$(git pack-objects --delta-base-offset \ + $packdir/pack <objects)" && + git prune-packed && + + test-tool chmtime --get -100 "$packdir/pack-$other.pack" >expect && + + cruft="$(git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack <<-EOF + $keep + -pack-$other.pack + EOF + )" && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && + + cut -d" " -f2 <actual.raw | sort -u >actual && + + test_cmp expect actual + ) + ' + + test_expect_success "unreachable cruft objects are repacked (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + test_commit other && + + git rev-list --objects --no-object-names packed.. >objects && + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + + cruft_a="$(echo $keep | git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack)" && + git prune-packed && + cruft_b="$(git pack-objects --cruft --cruft-expiration="$expire" $packdir/pack <<-EOF + $keep + -pack-$cruft_a.pack + EOF + )" && + + test-tool pack-mtimes "pack-$cruft_a.mtimes" >expect.raw && + test-tool pack-mtimes "pack-$cruft_b.mtimes" >actual.raw && + + sort <expect.raw >expect && + sort <actual.raw >actual && + + test_cmp expect actual + ) + ' + + test_expect_success "multiple cruft packs (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git repack -Ad && + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + + test_commit cruft && + loose="$objdir/$(test_oid_to_path $(git rev-parse cruft))" && + + # generate three copies of the cruft object in different + # cruft packs, each with a unique mtime: + # - one expired (1000 seconds ago) + # - two non-expired (one 1000 seconds in the future, + # one 1500 seconds in the future) + test-tool chmtime =-1000 "$loose" && + git pack-objects --cruft $packdir/pack-A <<-EOF && + $keep + EOF + test-tool chmtime =+1000 "$loose" && + git pack-objects --cruft $packdir/pack-B <<-EOF && + $keep + -$(basename $(ls $packdir/pack-A-*.pack)) + EOF + test-tool chmtime =+1500 "$loose" && + git pack-objects --cruft $packdir/pack-C <<-EOF && + $keep + -$(basename $(ls $packdir/pack-A-*.pack)) + -$(basename $(ls $packdir/pack-B-*.pack)) + EOF + + # ensure the resulting cruft pack takes the most recent + # mtime among all copies + cruft="$(git pack-objects --cruft \ + --cruft-expiration="$expire" \ + $packdir/pack <<-EOF + $keep + -$(basename $(ls $packdir/pack-A-*.pack)) + -$(basename $(ls $packdir/pack-B-*.pack)) + -$(basename $(ls $packdir/pack-C-*.pack)) + EOF + )" && + + test-tool pack-mtimes "$(basename $(ls $packdir/pack-C-*.mtimes))" >expect.raw && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && + + sort expect.raw >expect && + sort actual.raw >actual && + test_cmp expect actual + ) + ' + + test_expect_success "cruft packs tolerate missing trees (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + test_commit cruft && + + tree="$(git rev-parse cruft^{tree})" && + + git reset --hard reachable && + git tag -d cruft && + git reflog expire --all --expire=all && + + # remove the unreachable tree, but leave the commit + # which has it as its root tree intact + rm -fr "$objdir/$(test_oid_to_path "$tree")" && + + git repack -Ad && + basename $(ls $packdir/pack-*.pack) >in && + git pack-objects --cruft --cruft-expiration="$expire" \ + $packdir/pack <in + ) + ' + + test_expect_success "cruft packs tolerate missing blobs (expire $expire)" ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + test_commit cruft && + + blob="$(git rev-parse cruft:cruft.t)" && + + git reset --hard reachable && + git tag -d cruft && + git reflog expire --all --expire=all && + + # remove the unreachable blob, but leave the commit (and + # the root tree of that commit) intact + rm -fr "$objdir/$(test_oid_to_path "$blob")" && + + git repack -Ad && + basename $(ls $packdir/pack-*.pack) >in && + git pack-objects --cruft --cruft-expiration="$expire" \ + $packdir/pack <in + ) + ' +} + +basic_cruft_pack_tests never +basic_cruft_pack_tests 2.weeks.ago + +test_expect_success 'cruft tags rescue tagged objects' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + + test_commit tagged && + git tag -a annotated -m tag && + + git rev-list --objects --no-object-names packed.. >objects && + while read oid + do + test-tool chmtime -1000 \ + "$objdir/$(test_oid_to_path $oid)" + done <objects && + + test-tool chmtime -500 \ + "$objdir/$(test_oid_to_path $(git rev-parse annotated))" && + + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + cruft="$(echo $keep | git pack-objects --cruft \ + --cruft-expiration=750.seconds.ago \ + $packdir/pack)" && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && + cut -f1 -d" " <actual.raw | sort >actual && + + ( + cat objects && + git rev-parse annotated + ) >expect.raw && + sort <expect.raw >expect && + + test_cmp expect actual && + cat actual + ) +' + +test_expect_success 'cruft commits rescue parents, trees' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + + test_commit old && + test_commit new && + + git rev-list --objects --no-object-names packed..new >objects && + while read object + do + test-tool chmtime -1000 \ + "$objdir/$(test_oid_to_path $object)" + done <objects && + test-tool chmtime +500 "$objdir/$(test_oid_to_path \ + $(git rev-parse HEAD))" && + + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + cruft="$(echo $keep | git pack-objects --cruft \ + --cruft-expiration=750.seconds.ago \ + $packdir/pack)" && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && + + cut -d" " -f1 <actual.raw | sort >actual && + sort <objects >expect && + + test_cmp expect actual + ) +' + +test_expect_success 'cruft trees rescue sub-trees, blobs' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + + mkdir -p dir/sub && + echo foo >foo && + echo bar >dir/bar && + echo baz >dir/sub/baz && + + test_tick && + git add . && + git commit -m "pruned" && + + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD))" && + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD^{tree}))" && + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:foo))" && + test-tool chmtime -500 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir))" && + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/bar))" && + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub))" && + test-tool chmtime -1000 "$objdir/$(test_oid_to_path $(git rev-parse HEAD:dir/sub/baz))" && + + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + cruft="$(echo $keep | git pack-objects --cruft \ + --cruft-expiration=750.seconds.ago \ + $packdir/pack)" && + test-tool pack-mtimes "pack-$cruft.mtimes" >actual.raw && + cut -f1 -d" " <actual.raw | sort >actual && + + git rev-parse HEAD:dir HEAD:dir/bar HEAD:dir/sub HEAD:dir/sub/baz >expect.raw && + sort <expect.raw >expect && + + test_cmp expect actual + ) +' + +test_expect_success 'expired objects are pruned' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit packed && + git repack -Ad && + + test_commit pruned && + + git rev-list --objects --no-object-names packed..pruned >objects && + while read object + do + test-tool chmtime -1000 \ + "$objdir/$(test_oid_to_path $object)" + done <objects && + + keep="$(basename "$(ls $packdir/pack-*.pack)")" && + cruft="$(echo $keep | git pack-objects --cruft \ + --cruft-expiration=750.seconds.ago \ + $packdir/pack)" && + + test-tool pack-mtimes "pack-$cruft.mtimes" >actual && + test_must_be_empty actual + ) +' + +test_expect_success 'repack --cruft generates a cruft pack' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git branch -M main && + git checkout --orphan other && + test_commit unreachable && + + git checkout main && + git branch -D other && + git tag -d unreachable && + # objects are not cruft if they are contained in the reflogs + git reflog expire --all --expire=all && + + git rev-list --objects --all --no-object-names >reachable.raw && + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects && + sort <reachable.raw >reachable && + comm -13 reachable objects >unreachable && + + git repack --cruft -d && + + cruft=$(basename $(ls $packdir/pack-*.mtimes) .mtimes) && + pack=$(basename $(ls $packdir/pack-*.pack | grep -v $cruft) .pack) && + + git show-index <$packdir/$pack.idx >actual.raw && + cut -f2 -d" " actual.raw | sort >actual && + test_cmp reachable actual && + + git show-index <$packdir/$cruft.idx >actual.raw && + cut -f2 -d" " actual.raw | sort >actual && + test_cmp unreachable actual + ) +' + +test_expect_success 'loose objects mtimes upsert others' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git repack -Ad && + git branch -M main && + + git checkout --orphan other && + test_commit cruft && + # incremental repack, leaving existing objects loose (so + # they can be "freshened") + git repack && + + tip="$(git rev-parse cruft)" && + path="$objdir/$(test_oid_to_path "$tip")" && + test-tool chmtime --get +1000 "$path" >expect && + + git checkout main && + git branch -D other && + git tag -d cruft && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + mtimes="$(basename $(ls $packdir/pack-*.mtimes))" && + test-tool pack-mtimes "$mtimes" >actual.raw && + grep "$tip" actual.raw | cut -d" " -f2 >actual && + test_cmp expect actual + ) +' + +test_expect_success 'expiring cruft objects with git gc' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git branch -M main && + git checkout --orphan other && + test_commit unreachable && + + git checkout main && + git branch -D other && + git tag -d unreachable && + # objects are not cruft if they are contained in the reflogs + git reflog expire --all --expire=all && + + git rev-list --objects --all --no-object-names >reachable.raw && + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects && + sort <reachable.raw >reachable && + comm -13 reachable objects >unreachable && + + git repack --cruft -d && + + mtimes=$(ls .git/objects/pack/pack-*.mtimes) && + test_path_is_file $mtimes && + + git gc --cruft --prune=now && + + git cat-file --batch-all-objects --batch-check="%(objectname)" >objects && + + comm -23 unreachable objects >removed && + test_cmp unreachable removed && + test_path_is_missing $mtimes + ) +' + +test_expect_success 'cruft packs are not included in geometric repack' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git repack -Ad && + git branch -M main && + + git checkout --orphan other && + test_commit cruft && + git repack -d && + + git checkout main && + git branch -D other && + git tag -d cruft && + git reflog expire --all --expire=all && + + git repack --cruft && + + find $packdir -type f | sort >before && + git repack --geometric=2 -d && + find $packdir -type f | sort >after && + + test_cmp before after + ) +' + +test_expect_success 'repack --geometric collects once-cruft objects' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + git repack -Ad && + git branch -M main && + + git checkout --orphan other && + git rm -rf . && + test_commit --no-tag cruft && + cruft="$(git rev-parse HEAD)" && + + git checkout main && + git branch -D other && + git reflog expire --all --expire=all && + + # Pack the objects created in the previous step into a cruft + # pack. Intentionally leave loose copies of those objects + # around so we can pick them up in a subsequent --geometric + # reapack. + git repack --cruft && + + # Now make those objects reachable, and ensure that they are + # packed into the new pack created via a --geometric repack. + git update-ref refs/heads/other $cruft && + + # Without this object, the set of unpacked objects is exactly + # the set of objects already in the cruft pack. Tweak that set + # to ensure we do not overwrite the cruft pack entirely. + test_commit reachable2 && + + find $packdir -name "pack-*.idx" | sort >before && + git repack --geometric=2 -d && + find $packdir -name "pack-*.idx" | sort >after && + + { + git rev-list --objects --no-object-names $cruft && + git rev-list --objects --no-object-names reachable..reachable2 + } >want.raw && + sort want.raw >want && + + pack=$(comm -13 before after) && + git show-index <$pack >objects.raw && + + cut -d" " -f2 objects.raw | sort >got && + + test_cmp want got + ) +' + +test_expect_success 'cruft repack with no reachable objects' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + git repack -ad && + + base="$(git rev-parse base)" && + + git for-each-ref --format="delete %(refname)" >in && + git update-ref --stdin <in && + git reflog expire --all --expire=all && + rm -fr .git/index && + + git repack --cruft -d && + + git cat-file -t $base + ) +' + +test_expect_success 'cruft repack ignores --max-pack-size' ' + git init max-pack-size && + ( + cd max-pack-size && + test_commit base && + # two cruft objects which exceed the maximum pack size + test-tool genrandom foo 1048576 | git hash-object --stdin -w && + test-tool genrandom bar 1048576 | git hash-object --stdin -w && + git repack --cruft --max-pack-size=1M && + find $packdir -name "*.mtimes" >cruft && + test_line_count = 1 cruft && + test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects && + test_line_count = 2 objects + ) +' + +test_expect_success 'cruft repack ignores pack.packSizeLimit' ' + ( + cd max-pack-size && + # repack everything back together to remove the existing cruft + # pack (but to keep its objects) + git repack -adk && + git -c pack.packSizeLimit=1M repack --cruft && + # ensure the same post condition is met when --max-pack-size + # would otherwise be inferred from the configuration + find $packdir -name "*.mtimes" >cruft && + test_line_count = 1 cruft && + test-tool pack-mtimes "$(basename "$(cat cruft)")" >objects && + test_line_count = 2 objects + ) +' + +test_expect_success 'cruft repack respects repack.cruftWindow' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + + GIT_TRACE2_EVENT=$(pwd)/event.trace \ + git -c pack.window=1 -c repack.cruftWindow=2 repack \ + --cruft --window=3 && + + grep "pack-objects.*--window=2.*--cruft" event.trace + ) +' + +test_expect_success 'cruft repack respects --window by default' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + + GIT_TRACE2_EVENT=$(pwd)/event.trace \ + git -c pack.window=2 repack --cruft --window=3 && + + grep "pack-objects.*--window=3.*--cruft" event.trace + ) +' + +test_expect_success 'cruft repack respects --quiet' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + GIT_PROGRESS_DELAY=0 git repack --cruft --quiet 2>err && + test_must_be_empty err + ) +' + +test_expect_success 'cruft --local drops unreachable objects' ' + git init alternate && + git init repo && + test_when_finished "rm -fr alternate repo" && + + test_commit -C alternate base && + # Pack all objects in alterate so that the cruft repack in "repo" sees + # the object it dropped due to `--local` as packed. Otherwise this + # object would not appear packed anywhere (since it is not packed in + # alternate and likewise not part of the cruft pack in the other repo + # because of `--local`). + git -C alternate repack -ad && + + ( + cd repo && + + object="$(git -C ../alternate rev-parse HEAD:base.t)" && + git -C ../alternate cat-file -p $object >contents && + + # Write some reachable objects and two unreachable ones: one + # that the alternate has and another that is unique. + test_commit other && + git hash-object -w -t blob contents && + cruft="$(echo cruft | git hash-object -w -t blob --stdin)" && + + ( cd ../alternate/.git/objects && pwd ) \ + >.git/objects/info/alternates && + + test_path_is_file $objdir/$(test_oid_to_path $cruft) && + test_path_is_file $objdir/$(test_oid_to_path $object) && + + git repack -d --cruft --local && + + test-tool pack-mtimes "$(basename $(ls $packdir/pack-*.mtimes))" \ + >objects && + ! grep $object objects && + grep $cruft objects + ) +' + +test_expect_success 'MIDX bitmaps tolerate reachable cruft objects' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit reachable && + test_commit cruft && + unreachable="$(git rev-parse cruft)" && + + git reset --hard $unreachable^ && + git tag -d cruft && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + # resurrect the unreachable object via a new commit. the + # new commit will get selected for a bitmap, but be + # missing one of its parents from the selected packs. + git reset --hard $unreachable && + test_commit resurrect && + + git repack --write-midx --write-bitmap-index --geometric=2 -d + ) +' + +test_expect_success 'cruft objects are freshend via loose' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + echo "cruft" >contents && + blob="$(git hash-object -w -t blob contents)" && + loose="$objdir/$(test_oid_to_path $blob)" && + + test_commit base && + + git repack --cruft -d && + + test_path_is_missing "$loose" && + test-tool pack-mtimes "$(basename "$(ls $packdir/pack-*.mtimes)")" >cruft && + grep "$blob" cruft && + + # write the same object again + git hash-object -w -t blob contents && + + test_path_is_file "$loose" + ) +' + +test_done |
