diff options
Diffstat (limited to 'builtin/pack-objects.c')
| -rw-r--r-- | builtin/pack-objects.c | 425 |
1 files changed, 390 insertions, 35 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8b33edc2ff..67941c8a60 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -41,6 +41,10 @@ #include "promisor-remote.h" #include "pack-mtimes.h" #include "parse-options.h" +#include "blob.h" +#include "tree.h" +#include "path-walk.h" +#include "trace2.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -184,8 +188,14 @@ static inline void oe_set_delta_size(struct packing_data *pack, #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) static const char *const pack_usage[] = { - N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"), - N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"), + N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n" + " [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n" + " [--local] [--incremental] [--window=<n>] [--depth=<n>]\n" + " [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n" + " [--cruft] [--cruft-expiration=<time>]\n" + " [--stdout [--filter=<filter-spec>] | <base-name>]\n" + " [--shallow] [--keep-true-parents] [--[no-]sparse]\n" + " [--name-hash-version=<n>] [--path-walk] < <object-list>"), NULL }; @@ -200,6 +210,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; static int cruft; +static int shallow = 0; static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; @@ -218,6 +229,7 @@ static int delta_search_threads; static int pack_to_stdout; static int sparse; static int thin; +static int path_walk = -1; static int num_preferred_base; static struct progress *progress_state; @@ -3041,6 +3053,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, struct thread_params { pthread_t thread; struct object_entry **list; + struct packing_region *regions; unsigned list_size; unsigned remaining; int window; @@ -3283,6 +3296,242 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons return 0; } +static int should_attempt_deltas(struct object_entry *entry) +{ + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + return 0; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + return 0; + + if (entry->no_try_delta) + return 0; + + if (!entry->preferred_base) { + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + return 0; + } + + return 1; +} + +static void find_deltas_for_region(struct object_entry *list, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + unsigned int delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (size_t i = 0; i < region->nr; i++) { + struct object_entry *entry = list + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + size_t start, size_t nr) +{ + unsigned int processed = 0; + size_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + +static void *threaded_find_deltas_by_path(void *arg) +{ + struct thread_params *me = arg; + + progress_lock(); + while (me->remaining) { + while (me->remaining) { + progress_unlock(); + find_deltas_for_region(to_pack.objects, + me->regions, + me->processed); + progress_lock(); + me->remaining--; + me->regions++; + } + + me->working = 0; + pthread_cond_signal(&progress_cond); + progress_unlock(); + + /* + * We must not set ->data_ready before we wait on the + * condition because the main thread may have set it to 1 + * before we get here. In order to be sure that new + * work is available if we see 1 in ->data_ready, it + * was initialized to 0 before this thread was spawned + * and we reset it to 0 right away. + */ + pthread_mutex_lock(&me->mutex); + while (!me->data_ready) + pthread_cond_wait(&me->cond, &me->mutex); + me->data_ready = 0; + pthread_mutex_unlock(&me->mutex); + + progress_lock(); + } + progress_unlock(); + /* leave ->working 1 so that this doesn't get more work assigned */ + return NULL; +} + +static void ll_find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + uint32_t start, uint32_t nr) +{ + struct thread_params *p; + int i, ret, active_threads = 0; + unsigned int processed = 0; + uint32_t progress_nr; + init_threaded_search(); + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + if (delta_search_threads <= 1) { + find_deltas_by_region(list, regions, start, nr); + cleanup_threaded_search(); + return; + } + + if (progress > pack_to_stdout) + fprintf_ln(stderr, + Q_("Path-based delta compression using up to %d thread", + "Path-based delta compression using up to %d threads", + delta_search_threads), + delta_search_threads); + CALLOC_ARRAY(p, delta_search_threads); + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + /* Partition the work amongst work threads. */ + for (i = 0; i < delta_search_threads; i++) { + unsigned sub_size = nr / (delta_search_threads - i); + + p[i].window = window; + p[i].depth = depth; + p[i].processed = &processed; + p[i].working = 1; + p[i].data_ready = 0; + + p[i].regions = regions; + p[i].list_size = sub_size; + p[i].remaining = sub_size; + + regions += sub_size; + nr -= sub_size; + } + + /* Start work threads. */ + for (i = 0; i < delta_search_threads; i++) { + if (!p[i].list_size) + continue; + pthread_mutex_init(&p[i].mutex, NULL); + pthread_cond_init(&p[i].cond, NULL); + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas_by_path, &p[i]); + if (ret) + die(_("unable to create thread: %s"), strerror(ret)); + active_threads++; + } + + /* + * Now let's wait for work completion. Each time a thread is done + * with its work, we steal half of the remaining work from the + * thread with the largest number of unprocessed objects and give + * it to that newly idle thread. This ensure good load balancing + * until the remaining object list segments are simply too short + * to be worth splitting anymore. + */ + while (active_threads) { + struct thread_params *target = NULL; + struct thread_params *victim = NULL; + unsigned sub_size = 0; + + progress_lock(); + for (;;) { + for (i = 0; !target && i < delta_search_threads; i++) + if (!p[i].working) + target = &p[i]; + if (target) + break; + pthread_cond_wait(&progress_cond, &progress_mutex); + } + + for (i = 0; i < delta_search_threads; i++) + if (p[i].remaining > 2*window && + (!victim || victim->remaining < p[i].remaining)) + victim = &p[i]; + if (victim) { + sub_size = victim->remaining / 2; + target->regions = victim->regions + victim->remaining - sub_size; + victim->list_size -= sub_size; + victim->remaining -= sub_size; + } + target->list_size = sub_size; + target->remaining = sub_size; + target->working = 1; + progress_unlock(); + + pthread_mutex_lock(&target->mutex); + target->data_ready = 1; + pthread_cond_signal(&target->cond); + pthread_mutex_unlock(&target->mutex); + + if (!sub_size) { + pthread_join(target->thread, NULL); + pthread_cond_destroy(&target->cond); + pthread_mutex_destroy(&target->mutex); + active_threads--; + } + } + cleanup_threaded_search(); + free(p); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3307,39 +3556,21 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + ll_find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (DELTA(entry)) - /* This happens if we decided to reuse existing - * delta from a pack. "reuse_delta &&" is implied. - */ - continue; - - if (!entry->type_valid || - oe_size_less_than(&to_pack, entry, 50)) - continue; - - if (entry->no_try_delta) + if (!should_attempt_deltas(entry)) continue; - if (!entry->preferred_base) { + if (!entry->preferred_base) nr_deltas++; - if (oe_type(entry) < 0) - die(_("unable to get type of object %s"), - oid_to_hex(&entry->idx.oid)); - } else { - if (oe_type(entry) < 0) { - /* - * This object is not found, but we - * don't have to include it anyway. - */ - continue; - } - } delta_list[n++] = entry; } @@ -4272,6 +4503,93 @@ static void mark_bitmap_preferred_tips(void) } } +static inline int is_oid_uninteresting(struct repository *repo, + struct object_id *oid) +{ + struct object *o = lookup_object(repo, oid); + return !o || (o->flags & UNINTERESTING); +} + +static int add_objects_by_path(const char *path, + struct oid_array *oids, + enum object_type type, + void *data) +{ + size_t oe_start = to_pack.nr_objects; + size_t oe_end; + unsigned int *processed = data; + + /* + * First, add all objects to the packing data, including the ones + * marked UNINTERESTING (translated to 'exclude') as they can be + * used as delta bases. + */ + for (size_t i = 0; i < oids->nr; i++) { + int exclude; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id *oid = &oids->oid[i]; + + /* Skip objects that do not exist locally. */ + if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) && + oid_object_info_extended(the_repository, oid, &oi, + OBJECT_INFO_FOR_PREFETCH) < 0) + continue; + + exclude = is_oid_uninteresting(the_repository, oid); + + if (exclude && !thin) + continue; + + add_object_entry(oid, type, path, exclude); + } + + oe_end = to_pack.nr_objects; + + /* We can skip delta calculations if it is a no-op. */ + if (oe_end == oe_start || !window) + return 0; + + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); + + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; + + *processed += oids->nr; + display_progress(progress_state, *processed); + + return 0; +} + +static void get_object_list_path_walk(struct rev_info *revs) +{ + struct path_walk_info info = PATH_WALK_INFO_INIT; + unsigned int processed = 0; + int result; + + info.revs = revs; + info.path_fn = add_objects_by_path; + info.path_fn_data = &processed; + + /* + * Allow the --[no-]sparse option to be interesting here, if only + * for testing purposes. Paths with no interesting objects will not + * contribute to the resulting pack, but only create noisy preferred + * base objects. + */ + info.prune_all_uninteresting = sparse; + info.edge_aggressive = shallow; + + trace2_region_enter("pack-objects", "path-walk", revs->repo); + result = walk_objects_by_path(&info); + trace2_region_leave("pack-objects", "path-walk", revs->repo); + + if (result) + die(_("failed to pack objects via path-walk")); +} + static void get_object_list(struct rev_info *revs, int ac, const char **av) { struct setup_revision_opt s_r_opt = { @@ -4327,15 +4645,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (write_bitmap_index) mark_bitmap_preferred_tips(); - if (prepare_revision_walk(revs)) - die(_("revision walk setup failed")); - mark_edges_uninteresting(revs, show_edge, sparse); - if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list(revs, - show_commit, fn_show_object, - NULL); + + if (path_walk) { + get_object_list_path_walk(revs); + } else { + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + mark_edges_uninteresting(revs, show_edge, sparse); + traverse_commit_list(revs, + show_commit, fn_show_object, + NULL); + } if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; @@ -4464,7 +4786,6 @@ int cmd_pack_objects(int argc, struct repository *repo UNUSED) { int use_internal_rev_list = 0; - int shallow = 0; int all_progress_implied = 0; struct strvec rp = STRVEC_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; @@ -4545,6 +4866,8 @@ int cmd_pack_objects(int argc, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, N_("create thin packs")), + OPT_BOOL(0, "path-walk", &path_walk, + N_("use the path-walk API to walk objects when possible")), OPT_BOOL(0, "shallow", &shallow, N_("create packs suitable for shallow fetches")), OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk, @@ -4614,6 +4937,17 @@ int cmd_pack_objects(int argc, if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); + if (path_walk < 0) { + if (use_bitmap_index > 0 || + !use_internal_rev_list) + path_walk = 0; + else if (the_repository->gitdir && + the_repository->settings.pack_use_path_walk) + path_walk = 1; + else + path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0); + } + if (depth < 0) depth = 0; if (depth >= (1 << OE_DEPTH_BITS)) { @@ -4630,7 +4964,28 @@ int cmd_pack_objects(int argc, window = 0; strvec_push(&rp, "pack-objects"); - if (thin) { + + if (path_walk) { + const char *option = NULL; + if (filter_options.choice) + option = "--filter"; + else if (use_delta_islands) + option = "--delta-islands"; + + if (option) { + warning(_("cannot use %s with %s"), + option, "--path-walk"); + path_walk = 0; + } + } + if (path_walk) { + strvec_push(&rp, "--boundary"); + /* + * We must disable the bitmaps because we are removing + * the --objects / --objects-edge[-aggressive] options. + */ + use_bitmap_index = 0; + } else if (thin) { use_internal_rev_list = 1; strvec_push(&rp, shallow ? "--objects-edge-aggressive" |
