From 4722e06edcbb57142387081684b41eda2a6cf635 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:23 -0400 Subject: pack-bitmap: move some initialization to `bitmap_writer_init()` The pack-bitmap-writer machinery uses a oidmap (backed by khash.h) to map from commits selected for bitmaps (by OID) to a bitmapped_commit structure (containing the bitmap itself, among other things like its XOR offset, etc.) This map was initialized at the end of `bitmap_writer_build()`. New entries are added in `pack-bitmap-write.c::store_selected()`, which is called by the bitmap_builder machinery (which is responsible for traversing history and generating the actual bitmaps). Reorganize when this field is initialized and when entries are added to it so that we can quickly determine whether a commit is a candidate for pseudo-merge selection, or not (since it was already selected to receive a bitmap, and thus storing it in a pseudo-merge would be redundant). The changes are as follows: - Introduce a new `bitmap_writer_init()` function which initializes the `writer.bitmaps` field (instead of waiting until the end of `bitmap_writer_build()`). - Add map entries in `push_bitmapped_commit()` (which is called via `bitmap_writer_select_commits()`) with OID keys and NULL values to track whether or not we *expect* to write a bitmap for some given commit. - Validate that a NULL entry is found matching the given key when we store a selected bitmap. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'pack-bitmap-write.c') diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 6cae670412..d887015583 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -27,9 +27,12 @@ struct bitmapped_commit { uint32_t commit_pos; }; -void bitmap_writer_init(struct bitmap_writer *writer) +void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r) { memset(writer, 0, sizeof(struct bitmap_writer)); + if (writer->bitmaps) + BUG("bitmap writer already initialized"); + writer->bitmaps = kh_init_oid_map(); } void bitmap_writer_free(struct bitmap_writer *writer) @@ -128,11 +131,21 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, static inline void push_bitmapped_commit(struct bitmap_writer *writer, struct commit *commit) { + int hash_ret; + khiter_t hash_pos; + if (writer->selected_nr >= writer->selected_alloc) { writer->selected_alloc = (writer->selected_alloc + 32) * 2; REALLOC_ARRAY(writer->selected, writer->selected_alloc); } + hash_pos = kh_put_oid_map(writer->bitmaps, commit->object.oid, + &hash_ret); + if (!hash_ret) + die(_("duplicate entry when writing bitmap index: %s"), + oid_to_hex(&commit->object.oid)); + kh_value(writer->bitmaps, hash_pos) = NULL; + writer->selected[writer->selected_nr].commit = commit; writer->selected[writer->selected_nr].bitmap = NULL; writer->selected[writer->selected_nr].write_as = NULL; @@ -483,14 +496,14 @@ static void store_selected(struct bitmap_writer *writer, { struct bitmapped_commit *stored = &writer->selected[ent->idx]; khiter_t hash_pos; - int hash_ret; stored->bitmap = bitmap_to_ewah(ent->bitmap); - hash_pos = kh_put_oid_map(writer->bitmaps, commit->object.oid, &hash_ret); - if (hash_ret == 0) - die("Duplicate entry when writing index: %s", + hash_pos = kh_get_oid_map(writer->bitmaps, commit->object.oid); + if (hash_pos == kh_end(writer->bitmaps)) + die(_("attempted to store non-selected commit: '%s'"), oid_to_hex(&commit->object.oid)); + kh_value(writer->bitmaps, hash_pos) = stored; } @@ -506,7 +519,6 @@ int bitmap_writer_build(struct bitmap_writer *writer, uint32_t *mapping; int closed = 1; /* until proven otherwise */ - writer->bitmaps = kh_init_oid_map(); writer->to_pack = to_pack; if (writer->show_progress) -- cgit 1.2.3-korg From 0d41b18317ece0d46e2cfdf1d99ba2b2166c0248 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:29 -0400 Subject: pack-bitmap-write: support storing pseudo-merge commits Prepare to write pseudo-merge bitmaps by annotating individual bitmapped commits (which are represented by the `bitmapped_commit` structure) with an extra bit indicating whether or not they are a pseudo-merge. In subsequent commits, pseudo-merge bitmaps will be generated by allocating a fake commit node with parents covering the full set of commits represented by the pseudo-merge bitmap. These commits will be added to the set of "selected" commits as usual, but will be written specially instead of being included with the rest of the selected commits. Mechanically speaking, there are two parts of this change: - The bitmapped_commit struct gets a new bit indicating whether it is a pseudo-merge, or an ordinary commit selected for bitmaps. - A handful of changes to only write out the non-pseudo-merge commits when enumerating through the selected array (see the new `bitmap_writer_selected_nr()` function). Pseudo-merge commits appear after all non-pseudo-merge commits, so it is safe to enumerate through the selected array like so: for (i = 0; i < bitmap_writer_selected_nr(); i++) if (writer.selected[i].pseudo_merge) BUG("unexpected pseudo-merge"); without encountering the BUG(). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- object.h | 2 +- pack-bitmap-write.c | 96 +++++++++++++++++++++++++++++++++++------------------ pack-bitmap.h | 3 ++ 3 files changed, 67 insertions(+), 34 deletions(-) (limited to 'pack-bitmap-write.c') diff --git a/object.h b/object.h index 99b9c8f114..e6f9e89d3c 100644 --- a/object.h +++ b/object.h @@ -81,7 +81,7 @@ void object_array_init(struct object_array *array); * reflog.c: 10--12 * builtin/show-branch.c: 0-------------------------------------------26 * builtin/unpack-objects.c: 2021 - * pack-bitmap.h: 22 + * pack-bitmap.h: 2122 */ #define FLAG_BITS 28 diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index d887015583..60eb1e71c9 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -25,8 +25,14 @@ struct bitmapped_commit { int flags; int xor_offset; uint32_t commit_pos; + unsigned pseudo_merge : 1; }; +static inline int bitmap_writer_nr_selected_commits(struct bitmap_writer *writer) +{ + return writer->selected_nr - writer->pseudo_merges_nr; +} + void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r) { memset(writer, 0, sizeof(struct bitmap_writer)); @@ -129,27 +135,31 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, */ static inline void push_bitmapped_commit(struct bitmap_writer *writer, - struct commit *commit) + struct commit *commit, + unsigned pseudo_merge) { - int hash_ret; - khiter_t hash_pos; - if (writer->selected_nr >= writer->selected_alloc) { writer->selected_alloc = (writer->selected_alloc + 32) * 2; REALLOC_ARRAY(writer->selected, writer->selected_alloc); } - hash_pos = kh_put_oid_map(writer->bitmaps, commit->object.oid, - &hash_ret); - if (!hash_ret) - die(_("duplicate entry when writing bitmap index: %s"), - oid_to_hex(&commit->object.oid)); - kh_value(writer->bitmaps, hash_pos) = NULL; + if (!pseudo_merge) { + int hash_ret; + khiter_t hash_pos = kh_put_oid_map(writer->bitmaps, + commit->object.oid, + &hash_ret); + + if (!hash_ret) + die(_("duplicate entry when writing bitmap index: %s"), + oid_to_hex(&commit->object.oid)); + kh_value(writer->bitmaps, hash_pos) = NULL; + } writer->selected[writer->selected_nr].commit = commit; writer->selected[writer->selected_nr].bitmap = NULL; writer->selected[writer->selected_nr].write_as = NULL; writer->selected[writer->selected_nr].flags = 0; + writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge; writer->selected_nr++; } @@ -180,16 +190,20 @@ static void compute_xor_offsets(struct bitmap_writer *writer) while (next < writer->selected_nr) { struct bitmapped_commit *stored = &writer->selected[next]; - int best_offset = 0; struct ewah_bitmap *best_bitmap = stored->bitmap; struct ewah_bitmap *test_xor; + if (stored->pseudo_merge) + goto next; + for (i = 1; i <= MAX_XOR_OFFSET_SEARCH; ++i) { int curr = next - i; if (curr < 0) break; + if (writer->selected[curr].pseudo_merge) + continue; test_xor = ewah_pool_new(); ewah_xor(writer->selected[curr].bitmap, stored->bitmap, test_xor); @@ -205,6 +219,7 @@ static void compute_xor_offsets(struct bitmap_writer *writer) } } +next: stored->xor_offset = best_offset; stored->write_as = best_bitmap; @@ -217,7 +232,8 @@ struct bb_commit { struct bitmap *commit_mask; struct bitmap *bitmap; unsigned selected:1, - maximal:1; + maximal:1, + pseudo_merge:1; unsigned idx; /* within selected array */ }; @@ -255,17 +271,18 @@ static void bitmap_builder_init(struct bitmap_builder *bb, revs.first_parent_only = 1; for (i = 0; i < writer->selected_nr; i++) { - struct commit *c = writer->selected[i].commit; - struct bb_commit *ent = bb_data_at(&bb->data, c); + struct bitmapped_commit *bc = &writer->selected[i]; + struct bb_commit *ent = bb_data_at(&bb->data, bc->commit); ent->selected = 1; ent->maximal = 1; + ent->pseudo_merge = bc->pseudo_merge; ent->idx = i; ent->commit_mask = bitmap_new(); bitmap_set(ent->commit_mask, i); - add_pending_object(&revs, &c->object, ""); + add_pending_object(&revs, &bc->commit->object, ""); } if (prepare_revision_walk(&revs)) @@ -444,8 +461,13 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, struct commit *c = prio_queue_get(queue); if (old_bitmap && mapping) { - struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c); + struct ewah_bitmap *old; struct bitmap *remapped = bitmap_new(); + + if (commit->object.flags & BITMAP_PSEUDO_MERGE) + old = NULL; + else + old = bitmap_for_commit(old_bitmap, c); /* * If this commit has an old bitmap, then translate that * bitmap and add its bits to this one. No need to walk @@ -464,12 +486,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, * Mark ourselves and queue our tree. The commit * walk ensures we cover all parents. */ - pos = find_object_pos(writer, &c->object.oid, &found); - if (!found) - return -1; - bitmap_set(ent->bitmap, pos); - prio_queue_put(tree_queue, - repo_get_commit_tree(the_repository, c)); + if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) { + pos = find_object_pos(writer, &c->object.oid, &found); + if (!found) + return -1; + bitmap_set(ent->bitmap, pos); + prio_queue_put(tree_queue, + repo_get_commit_tree(the_repository, c)); + } for (p = c->parents; p; p = p->next) { pos = find_object_pos(writer, &p->item->object.oid, @@ -499,6 +523,9 @@ static void store_selected(struct bitmap_writer *writer, stored->bitmap = bitmap_to_ewah(ent->bitmap); + if (ent->pseudo_merge) + return; + hash_pos = kh_get_oid_map(writer->bitmaps, commit->object.oid); if (hash_pos == kh_end(writer->bitmaps)) die(_("attempted to store non-selected commit: '%s'"), @@ -631,7 +658,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) - push_bitmapped_commit(writer, indexed_commits[i]); + push_bitmapped_commit(writer, indexed_commits[i], 0); return; } @@ -664,7 +691,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } } - push_bitmapped_commit(writer, chosen); + push_bitmapped_commit(writer, chosen, 0); i += next + 1; display_progress(writer->progress, i); @@ -701,8 +728,11 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, { int i; - for (i = 0; i < writer->selected_nr; ++i) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); ++i) { struct bitmapped_commit *stored = &writer->selected[i]; + if (stored->pseudo_merge) + BUG("unexpected pseudo-merge among selected: %s", + oid_to_hex(&stored->commit->object.oid)); if (offsets) offsets[i] = hashfile_total(f); @@ -735,10 +765,10 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, uint32_t i; uint32_t *table, *table_inv; - ALLOC_ARRAY(table, writer->selected_nr); - ALLOC_ARRAY(table_inv, writer->selected_nr); + ALLOC_ARRAY(table, bitmap_writer_nr_selected_commits(writer)); + ALLOC_ARRAY(table_inv, bitmap_writer_nr_selected_commits(writer)); - for (i = 0; i < writer->selected_nr; i++) + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) table[i] = i; /* @@ -746,16 +776,16 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, * bitmap corresponds to j'th bitmapped commit (among the selected * commits) in lex order of OIDs. */ - QSORT_S(table, writer->selected_nr, table_cmp, writer); + QSORT_S(table, bitmap_writer_nr_selected_commits(writer), table_cmp, writer); /* table_inv helps us discover that relationship (i'th bitmap * to j'th commit by j = table_inv[i]) */ - for (i = 0; i < writer->selected_nr; i++) + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) table_inv[table[i]] = i; trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository); - for (i = 0; i < writer->selected_nr; i++) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *selected = &writer->selected[table[i]]; uint32_t xor_offset = selected->xor_offset; uint32_t xor_row; @@ -827,7 +857,7 @@ void bitmap_writer_finish(struct bitmap_writer *writer, memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); header.version = htons(default_version); header.options = htons(flags | options); - header.entry_count = htonl(writer->selected_nr); + header.entry_count = htonl(bitmap_writer_nr_selected_commits(writer)); hashcpy(header.checksum, writer->pack_checksum); hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz); @@ -839,7 +869,7 @@ void bitmap_writer_finish(struct bitmap_writer *writer, if (options & BITMAP_OPT_LOOKUP_TABLE) CALLOC_ARRAY(offsets, index_nr); - for (i = 0; i < writer->selected_nr; i++) { + for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *stored = &writer->selected[i]; int commit_pos = oid_pos(&stored->commit->object.oid, index, index_nr, oid_access); diff --git a/pack-bitmap.h b/pack-bitmap.h index f87e60153d..6937a0f090 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -21,6 +21,7 @@ struct bitmap_disk_header { unsigned char checksum[GIT_MAX_RAWSZ]; }; +#define BITMAP_PSEUDO_MERGE (1u<<21) #define NEEDS_BITMAP (1u<<22) /* @@ -109,6 +110,8 @@ struct bitmap_writer { struct bitmapped_commit *selected; unsigned int selected_nr, selected_alloc; + uint32_t pseudo_merges_nr; + struct progress *progress; int show_progress; unsigned char pack_checksum[GIT_MAX_RAWSZ]; -- cgit 1.2.3-korg From 245a7f2e01dc86baa08ceb70937585c6389cb9db Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:32 -0400 Subject: pack-bitmap: implement `bitmap_writer_has_bitmapped_object_id()` Prepare to implement pseudo-merge bitmap selection by implementing a necessary new function, `bitmap_writer_has_bitmapped_object_id()`. This function returns whether or not the bitmap_writer selected the given object ID for bitmapping. This will allow the pseudo-merge machinery to reject candidates for pseudo-merges if they have already been selected as an ordinary bitmap tip. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 6 ++++++ pack-bitmap.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'pack-bitmap-write.c') diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 60eb1e71c9..299aa8af6f 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -130,6 +130,12 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, } } +int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, + const struct object_id *oid) +{ + return kh_get_oid_map(writer->bitmaps, *oid) != kh_end(writer->bitmaps); +} + /** * Compute the actual bitmaps */ diff --git a/pack-bitmap.h b/pack-bitmap.h index 6937a0f090..e175f28e0d 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -125,6 +125,8 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, struct packing_data *to_pack, struct pack_idx_entry **index, uint32_t index_nr); +int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, + const struct object_id *oid); uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, struct packing_data *mapping); int rebuild_bitmap(const uint32_t *reposition, -- cgit 1.2.3-korg From c059c8795e926cfe3ad8cb7a47f54c071227fcfa Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:36 -0400 Subject: pack-bitmap: make `bitmap_writer_push_bitmapped_commit()` public The pseudo-merge selection code will be added in a subsequent commit, and will need a way to push the allocated commit structures into the bitmap writer from a separate compilation unit. Make the `bitmap_writer_push_bitmapped_commit()` function part of the pack-bitmap.h header in order to make this possible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 9 ++++----- pack-bitmap.h | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'pack-bitmap-write.c') diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 299aa8af6f..bc19b33ad1 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -140,9 +140,8 @@ int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, * Compute the actual bitmaps */ -static inline void push_bitmapped_commit(struct bitmap_writer *writer, - struct commit *commit, - unsigned pseudo_merge) +void bitmap_writer_push_commit(struct bitmap_writer *writer, + struct commit *commit, unsigned pseudo_merge) { if (writer->selected_nr >= writer->selected_alloc) { writer->selected_alloc = (writer->selected_alloc + 32) * 2; @@ -664,7 +663,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) - push_bitmapped_commit(writer, indexed_commits[i], 0); + bitmap_writer_push_commit(writer, indexed_commits[i], 0); return; } @@ -697,7 +696,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } } - push_bitmapped_commit(writer, chosen, 0); + bitmap_writer_push_commit(writer, chosen, 0); i += next + 1; display_progress(writer->progress, i); diff --git a/pack-bitmap.h b/pack-bitmap.h index e175f28e0d..a7e2f56c97 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -127,6 +127,8 @@ void bitmap_writer_build_type_index(struct bitmap_writer *writer, uint32_t index_nr); int bitmap_writer_has_bitmapped_object_id(struct bitmap_writer *writer, const struct object_id *oid); +void bitmap_writer_push_commit(struct bitmap_writer *writer, + struct commit *commit, unsigned pseudo_merge); uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, struct packing_data *mapping); int rebuild_bitmap(const uint32_t *reposition, -- cgit 1.2.3-korg From faf558b23ef55b18f395d1f7a7c2714ccc1320e2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:42 -0400 Subject: pseudo-merge: implement support for selecting pseudo-merge commits Teach the new pseudo-merge machinery how to select non-bitmapped commits for inclusion in different pseudo-merge group(s) based on a handful of criteria. Note that the selected pseudo-merge commits aren't actually used or written anywhere yet. This will be done in the following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config.txt | 2 + Documentation/config/bitmap-pseudo-merge.txt | 91 ++++++ Documentation/gitpacking.txt | 83 +++++ pack-bitmap-write.c | 21 ++ pack-bitmap.h | 2 + pseudo-merge.c | 454 +++++++++++++++++++++++++++ pseudo-merge.h | 94 ++++++ 7 files changed, 747 insertions(+) create mode 100644 Documentation/config/bitmap-pseudo-merge.txt (limited to 'pack-bitmap-write.c') diff --git a/Documentation/config.txt b/Documentation/config.txt index 70b448b132..bbedb7b9a0 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -383,6 +383,8 @@ include::config/apply.txt[] include::config/attr.txt[] +include::config/bitmap-pseudo-merge.txt[] + include::config/blame.txt[] include::config/branch.txt[] diff --git a/Documentation/config/bitmap-pseudo-merge.txt b/Documentation/config/bitmap-pseudo-merge.txt new file mode 100644 index 0000000000..1f264eca99 --- /dev/null +++ b/Documentation/config/bitmap-pseudo-merge.txt @@ -0,0 +1,91 @@ +NOTE: The configuration options in `bitmapPseudoMerge.*` are considered +EXPERIMENTAL and may be subject to change or be removed entirely in the +future. For more information about the pseudo-merge bitmap feature, see +the "Pseudo-merge bitmaps" section of linkgit:gitpacking[7]. + +bitmapPseudoMerge..pattern:: + Regular expression used to match reference names. Commits + pointed to by references matching this pattern (and meeting + the below criteria, like `bitmapPseudoMerge..sampleRate` + and `bitmapPseudoMerge..threshold`) will be considered + for inclusion in a pseudo-merge bitmap. ++ +Commits are grouped into pseudo-merge groups based on whether or not +any reference(s) that point at a given commit match the pattern, which +is an extended regular expression. ++ +Within a pseudo-merge group, commits may be further grouped into +sub-groups based on the capture groups in the pattern. These +sub-groupings are formed from the regular expressions by concatenating +any capture groups from the regular expression, with a '-' dash in +between. ++ +For example, if the pattern is `refs/tags/`, then all tags (provided +they meet the below criteria) will be considered candidates for the +same pseudo-merge group. However, if the pattern is instead +`refs/remotes/([0-9])+/tags/`, then tags from different remotes will +be grouped into separate pseudo-merge groups, based on the remote +number. + +bitmapPseudoMerge..decay:: + Determines the rate at which consecutive pseudo-merge bitmap + groups decrease in size. Must be non-negative. This parameter + can be thought of as `k` in the function `f(n) = C * n^-k`, + where `f(n)` is the size of the `n`th group. ++ +Setting the decay rate equal to `0` will cause all groups to be the +same size. Setting the decay rate equal to `1` will cause the `n`th +group to be `1/n` the size of the initial group. Higher values of the +decay rate cause consecutive groups to shrink at an increasing rate. +The default is `1`. ++ +If all groups are the same size, it is possible that groups containing +newer commits will be able to be used less often than earlier groups, +since it is more likely that the references pointing at newer commits +will be updated more often than a reference pointing at an old commit. + +bitmapPseudoMerge..sampleRate:: + Determines the proportion of non-bitmapped commits (among + reference tips) which are selected for inclusion in an + unstable pseudo-merge bitmap. Must be between `0` and `1` + (inclusive). The default is `1`. + +bitmapPseudoMerge..threshold:: + Determines the minimum age of non-bitmapped commits (among + reference tips, as above) which are candidates for inclusion + in an unstable pseudo-merge bitmap. The default is + `1.week.ago`. + +bitmapPseudoMerge..maxMerges:: + Determines the maximum number of pseudo-merge commits among + which commits may be distributed. ++ +For pseudo-merge groups whose pattern does not contain any capture +groups, this setting is applied for all commits matching the regular +expression. For patterns that have one or more capture groups, this +setting is applied for each distinct capture group. ++ +For example, if your capture group is `refs/tags/`, then this setting +will distribute all tags into a maximum of `maxMerges` pseudo-merge +commits. However, if your capture group is, say, +`refs/remotes/([0-9]+)/tags/`, then this setting will be applied to +each remote's set of tags individually. ++ +Must be non-negative. The default value is 64. + +bitmapPseudoMerge..stableThreshold:: + Determines the minimum age of commits (among reference tips, + as above, however stable commits are still considered + candidates even when they have been covered by a bitmap) which + are candidates for a stable a pseudo-merge bitmap. The default + is `1.month.ago`. ++ +Setting this threshold to a smaller value (e.g., 1.week.ago) will cause +more stable groups to be generated (which impose a one-time generation +cost) but those groups will likely become stale over time. Using a +larger value incurs the opposite penalty (fewer stable groups which are +more useful). + +bitmapPseudoMerge..stableSize:: + Determines the size (in number of commits) of a stable + psuedo-merge bitmap. The default is `512`. diff --git a/Documentation/gitpacking.txt b/Documentation/gitpacking.txt index f24396f017..4a6fcba6f7 100644 --- a/Documentation/gitpacking.txt +++ b/Documentation/gitpacking.txt @@ -96,6 +96,89 @@ can take advantage of the fact that we only care about the union of objects reachable from all of those tags, and answer the query much faster. +=== Configuration + +Reference tips are grouped into different pseudo-merge groups according +to two criteria. A reference name matches one or more of the defined +pseudo-merge patterns, and optionally one or more capture groups within +that pattern which further partition the group. + +Within a group, commits may be considered "stable", or "unstable" +depending on their age. These are adjusted by setting the +`bitmapPseudoMerge..stableThreshold` and +`bitmapPseudoMerge..threshold` configuration values, respectively. + +All stable commits are grouped into pseudo-merges of equal size +(`bitmapPseudoMerge..stableSize`). If the `stableSize` +configuration is set to, say, 100, then the first 100 commits (ordered +by committer date) which are older than the `stableThreshold` value will +form one group, the next 100 commits will form another group, and so on. + +Among unstable commits, the pseudo-merge machinery will attempt to +combine older commits into large groups as opposed to newer commits +which will appear in smaller groups. This is based on the heuristic that +references whose tip commit is older are less likely to be modified to +point at a different commit than a reference whose tip commit is newer. + +The size of groups is determined by a power-law decay function, and the +decay parameter roughly corresponds to "k" in `f(n) = C*n^(-k/100)`, +where `f(n)` describes the size of the `n`-th pseudo-merge group. The +sample rate controls what percentage of eligible commits are considered +as candidates. The threshold parameter indicates the minimum age (so as +to avoid including too-recent commits in a pseudo-merge group, making it +less likely to be valid). The "maxMerges" parameter sets an upper-bound +on the number of pseudo-merge commits an individual group + +The "stable"-related parameters control "stable" pseudo-merge groups, +comprised of a fixed number of commits which are older than the +configured "stable threshold" value and may be grouped together in +chunks of "stableSize" in order of age. + +The exact configuration for pseudo-merges is as follows: + +include::config/bitmap-pseudo-merge.txt[] + +=== Examples + +Suppose that you have a repository with a large number of references, +and you want a bare-bones configuration of pseudo-merge bitmaps that +will enhance bitmap coverage of the `refs/` namespace. You may start +wiht a configuration like so: + + [bitmapPseudoMerge "all"] + pattern = "refs/" + threshold = now + stableThreshold = never + sampleRate = 100 + maxMerges = 64 + +This will create pseudo-merge bitmaps for all references, regardless of +their age, and group them into 64 pseudo-merge commits. + +If you wanted to separate tags from branches when generating +pseudo-merge commits, you would instead define the pattern with a +capture group, like so: + + [bitmapPseudoMerge "all"] + pattern = "refs/(heads/tags)/" + +Suppose instead that you are working in a fork-network repository, with +each fork specified by some numeric ID, and whose refs reside in +`refs/virtual/NNN/` (where `NNN` is the numeric ID corresponding to some +fork) in the network. In this instance, you may instead write something +like: + + [bitmapPseudoMerge "all"] + pattern = "refs/virtual/([0-9]+)/(heads|tags)/" + threshold = now + stableThreshold = never + sampleRate = 100 + maxMerges = 64 + +Which would generate pseudo-merge group identifiers like "1234-heads", +and "5678-tags" (for branches in fork "1234", and tags in remote "5678", +respectively). + SEE ALSO -------- linkgit:git-pack-objects[1] diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index bc19b33ad1..d5884ea5e9 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -17,6 +17,7 @@ #include "trace2.h" #include "tree.h" #include "tree-walk.h" +#include "pseudo-merge.h" struct bitmapped_commit { struct commit *commit; @@ -39,11 +40,25 @@ void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r) if (writer->bitmaps) BUG("bitmap writer already initialized"); writer->bitmaps = kh_init_oid_map(); + writer->pseudo_merge_commits = kh_init_oid_map(); + + string_list_init_dup(&writer->pseudo_merge_groups); + + load_pseudo_merges_from_config(&writer->pseudo_merge_groups); +} + +static void free_pseudo_merge_commit_idx(struct pseudo_merge_commit_idx *idx) +{ + if (!idx) + return; + free(idx->pseudo_merge); + free(idx); } void bitmap_writer_free(struct bitmap_writer *writer) { uint32_t i; + struct pseudo_merge_commit_idx *idx; if (!writer) return; @@ -55,6 +70,10 @@ void bitmap_writer_free(struct bitmap_writer *writer) kh_destroy_oid_map(writer->bitmaps); + kh_foreach_value(writer->pseudo_merge_commits, idx, + free_pseudo_merge_commit_idx(idx)); + kh_destroy_oid_map(writer->pseudo_merge_commits); + for (i = 0; i < writer->selected_nr; i++) { struct bitmapped_commit *bc = &writer->selected[i]; if (bc->write_as != bc->bitmap) @@ -703,6 +722,8 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } stop_progress(&writer->progress); + + select_pseudo_merges(writer, indexed_commits, indexed_commits_nr); } diff --git a/pack-bitmap.h b/pack-bitmap.h index a7e2f56c97..1e730ea1e5 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -110,6 +110,8 @@ struct bitmap_writer { struct bitmapped_commit *selected; unsigned int selected_nr, selected_alloc; + struct string_list pseudo_merge_groups; + kh_oid_map_t *pseudo_merge_commits; /* oid -> pseudo merge(s) */ uint32_t pseudo_merges_nr; struct progress *progress; diff --git a/pseudo-merge.c b/pseudo-merge.c index 37e037ba27..0f6854c753 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -1,2 +1,456 @@ #include "git-compat-util.h" #include "pseudo-merge.h" +#include "date.h" +#include "oid-array.h" +#include "strbuf.h" +#include "config.h" +#include "string-list.h" +#include "refs.h" +#include "pack-bitmap.h" +#include "commit.h" +#include "alloc.h" +#include "progress.h" + +#define DEFAULT_PSEUDO_MERGE_DECAY 1.0 +#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64 +#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1 +#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago") +#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512 + +static double gitexp(double base, int exp) +{ + double result = 1; + while (1) { + if (exp % 2) + result *= base; + exp >>= 1; + if (!exp) + break; + base *= base; + } + return result; +} + +static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group, + const struct pseudo_merge_matches *matches, + uint32_t i) +{ + double C = 0.0f; + uint32_t n; + + /* + * The size of pseudo-merge groups decays according to a power series, + * which looks like: + * + * f(n) = C * n^-k + * + * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k' + * is the decay rate, and 'C' is a scaling value. + * + * The value of C depends on the number of groups, decay rate, and total + * number of commits. It is computed such that if there are M and N + * total groups and commits, respectively, that: + * + * N = f(0) + f(1) + ... f(M-1) + * + * Rearranging to isolate C, we get: + * + * N = \sum_{n=1}^M C / n^k + * + * N / C = \sum_{n=1}^M n^-k + * + * C = N / \sum_{n=1}^M n^-k + * + * For example, if we have a decay rate of 'k' being equal to 1.5, 'N' + * total commits equal to 10,000, and 'M' being equal to 6 groups, then + * the (rounded) group sizes are: + * + * { 5469, 1934, 1053, 684, 489, 372 } + * + * increasing the number of total groups, say to 10, scales the group + * sizes appropriately: + * + * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 } + */ + for (n = 0; n < group->max_merges; n++) + C += 1.0 / gitexp(n + 1, group->decay); + C = matches->unstable_nr / C; + + return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5); +} + +static void pseudo_merge_group_init(struct pseudo_merge_group *group) +{ + memset(group, 0, sizeof(struct pseudo_merge_group)); + + strmap_init_with_options(&group->matches, NULL, 0); + + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD; + group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD; + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; +} + +static int pseudo_merge_config(const char *var, const char *value, + const struct config_context *ctx, + void *cb_data) +{ + struct string_list *list = cb_data; + struct string_list_item *item; + struct pseudo_merge_group *group; + struct strbuf buf = STRBUF_INIT; + const char *sub, *key; + size_t sub_len; + int ret = 0; + + if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key)) + goto done; + + if (!sub_len) + goto done; + + strbuf_add(&buf, sub, sub_len); + + item = string_list_lookup(list, buf.buf); + if (!item) { + item = string_list_insert(list, buf.buf); + + item->util = xmalloc(sizeof(struct pseudo_merge_group)); + pseudo_merge_group_init(item->util); + } + + group = item->util; + + if (!strcmp(key, "pattern")) { + struct strbuf re = STRBUF_INIT; + + free(group->pattern); + if (*value != '^') + strbuf_addch(&re, '^'); + strbuf_addstr(&re, value); + + group->pattern = xcalloc(1, sizeof(regex_t)); + if (regcomp(group->pattern, re.buf, REG_EXTENDED)) + die(_("failed to load pseudo-merge regex for %s: '%s'"), + sub, re.buf); + + strbuf_release(&re); + } else if (!strcmp(key, "decay")) { + group->decay = git_config_double(var, value, ctx->kvi); + if (group->decay < 0) { + warning(_("%s must be non-negative, using default"), var); + group->decay = DEFAULT_PSEUDO_MERGE_DECAY; + } + } else if (!strcmp(key, "samplerate")) { + group->sample_rate = git_config_double(var, value, ctx->kvi); + if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 and 1, using default"), var); + group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; + } + } else if (!strcmp(key, "threshold")) { + if (git_config_expiry_date(&group->threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "maxmerges")) { + group->max_merges = git_config_int(var, value, ctx->kvi); + if (group->max_merges < 0) { + warning(_("%s must be non-negative, using default"), var); + group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; + } + } else if (!strcmp(key, "stablethreshold")) { + if (git_config_expiry_date(&group->stable_threshold, var, value)) { + ret = -1; + goto done; + } + } else if (!strcmp(key, "stablesize")) { + group->stable_size = git_config_int(var, value, ctx->kvi); + if (group->stable_size <= 0) { + warning(_("%s must be positive, using default"), var); + group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; + } + } + +done: + strbuf_release(&buf); + + return ret; +} + +void load_pseudo_merges_from_config(struct string_list *list) +{ + struct string_list_item *item; + + git_config(pseudo_merge_config, list); + + for_each_string_list_item(item, list) { + struct pseudo_merge_group *group = item->util; + if (!group->pattern) + die(_("pseudo-merge group '%s' missing required pattern"), + item->string); + if (group->threshold < group->stable_threshold) + die(_("pseudo-merge group '%s' has unstable threshold " + "before stable one"), item->string); + } +} + +static int find_pseudo_merge_group_for_ref(const char *refname, + const struct object_id *oid, + int flags UNUSED, + void *_data) +{ + struct bitmap_writer *writer = _data; + struct object_id peeled; + struct commit *c; + uint32_t i; + int has_bitmap; + + if (!peel_iterated_oid(oid, &peeled)) + oid = &peeled; + + c = lookup_commit(the_repository, oid); + if (!c) + return 0; + + has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct pseudo_merge_matches *matches; + struct strbuf group_name = STRBUF_INIT; + regmatch_t captures[16]; + size_t j; + + group = writer->pseudo_merge_groups.items[i].util; + if (regexec(group->pattern, refname, ARRAY_SIZE(captures), + captures, 0)) + continue; + + if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1) + warning(_("pseudo-merge regex from config has too many capture " + "groups (max=%"PRIuMAX")"), + (uintmax_t)ARRAY_SIZE(captures) - 2); + + for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) { + regmatch_t *match = &captures[j]; + if (match->rm_so == -1) + continue; + + if (group_name.len) + strbuf_addch(&group_name, '-'); + + strbuf_add(&group_name, refname + match->rm_so, + match->rm_eo - match->rm_so); + } + + matches = strmap_get(&group->matches, group_name.buf); + if (!matches) { + matches = xcalloc(1, sizeof(*matches)); + strmap_put(&group->matches, strbuf_detach(&group_name, NULL), + matches); + } + + if (c->date <= group->stable_threshold) { + ALLOC_GROW(matches->stable, matches->stable_nr + 1, + matches->stable_alloc); + matches->stable[matches->stable_nr++] = c; + } else if (c->date <= group->threshold && !has_bitmap) { + ALLOC_GROW(matches->unstable, matches->unstable_nr + 1, + matches->unstable_alloc); + matches->unstable[matches->unstable_nr++] = c; + } + + strbuf_release(&group_name); + } + + return 0; +} + +static struct commit *push_pseudo_merge(struct pseudo_merge_group *group) +{ + struct commit *merge; + + ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc); + + merge = alloc_commit_node(the_repository); + merge->object.parsed = 1; + merge->object.flags |= BITMAP_PSEUDO_MERGE; + + group->merges[group->merges_nr++] = merge; + + return merge; +} + +static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits, + const struct object_id *oid) + +{ + struct pseudo_merge_commit_idx *pmc; + int hash_ret; + khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid, + &hash_ret); + + if (hash_ret) { + CALLOC_ARRAY(pmc, 1); + kh_value(pseudo_merge_commits, hash_pos) = pmc; + } else { + pmc = kh_value(pseudo_merge_commits, hash_pos); + } + + return pmc; +} + +#define MIN_PSEUDO_MERGE_SIZE 8 + +static void select_pseudo_merges_1(struct bitmap_writer *writer, + struct pseudo_merge_group *group, + struct pseudo_merge_matches *matches) +{ + uint32_t i, j; + uint32_t stable_merges_nr; + + if (!matches->stable_nr && !matches->unstable_nr) + return; /* all tips in this group already have bitmaps */ + + stable_merges_nr = matches->stable_nr / group->stable_size; + if (matches->stable_nr % group->stable_size) + stable_merges_nr++; + + /* make stable_merges_nr pseudo merges for stable commits */ + for (i = 0, j = 0; i < stable_merges_nr; i++) { + struct commit *merge; + struct commit_list **p; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + /* + * For each pseudo-merge created above, add parents to the + * allocated commit node from the stable set of commits + * (un-bitmapped, newer than the stable threshold). + */ + do { + struct commit *c; + struct pseudo_merge_commit_idx *pmc; + + if (j >= matches->stable_nr) + break; + + c = matches->stable[j++]; + /* + * Here and below, make sure that we keep our mapping of + * commits -> pseudo-merge(s) which include the key'd + * commit up-to-date. + */ + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } while (j % group->stable_size); + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + } + + /* make up to group->max_merges pseudo merges for unstable commits */ + for (i = 0, j = 0; i < group->max_merges; i++) { + struct commit *merge; + struct commit_list **p; + uint32_t size, end; + + merge = push_pseudo_merge(group); + p = &merge->parents; + + size = pseudo_merge_group_size(group, matches, i); + end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size; + + /* + * For each pseudo-merge commit created above, add parents to + * the allocated commit node from the unstable set of commits + * (newer than the stable threshold). + * + * Account for the sample rate, since not every candidate from + * the set of stable commits will be included as a pseudo-merge + * parent. + */ + for (; j < end && j < matches->unstable_nr; j++) { + struct commit *c = matches->unstable[j]; + struct pseudo_merge_commit_idx *pmc; + + if (j % (uint32_t)(1.0 / group->sample_rate)) + continue; + + pmc = pseudo_merge_idx(writer->pseudo_merge_commits, + &c->object.oid); + + ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); + + pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; + p = commit_list_append(c, p); + } + + bitmap_writer_push_commit(writer, merge, 1); + writer->pseudo_merges_nr++; + if (end >= matches->unstable_nr) + break; + } +} + +static int commit_date_cmp(const void *va, const void *vb) +{ + timestamp_t a = (*(const struct commit **)va)->date; + timestamp_t b = (*(const struct commit **)vb)->date; + + if (a < b) + return -1; + else if (a > b) + return 1; + return 0; +} + +static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches) +{ + QSORT(matches->stable, matches->stable_nr, commit_date_cmp); + QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp); +} + +void select_pseudo_merges(struct bitmap_writer *writer, + struct commit **commits, size_t commits_nr) +{ + struct progress *progress = NULL; + uint32_t i; + + if (!writer->pseudo_merge_groups.nr) + return; + + if (writer->show_progress) + progress = start_progress("Selecting pseudo-merge commits", + writer->pseudo_merge_groups.nr); + + for_each_ref(find_pseudo_merge_group_for_ref, writer); + + for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { + struct pseudo_merge_group *group; + struct hashmap_iter iter; + struct strmap_entry *e; + + group = writer->pseudo_merge_groups.items[i].util; + strmap_for_each_entry(&group->matches, &iter, e) { + struct pseudo_merge_matches *matches = e->value; + + sort_pseudo_merge_matches(matches); + + select_pseudo_merges_1(writer, group, matches); + } + + display_progress(progress, i + 1); + } + + stop_progress(&progress); +} diff --git a/pseudo-merge.h b/pseudo-merge.h index cab8ff6960..f809cf42ae 100644 --- a/pseudo-merge.h +++ b/pseudo-merge.h @@ -2,5 +2,99 @@ #define PSEUDO_MERGE_H #include "git-compat-util.h" +#include "strmap.h" +#include "khash.h" +#include "ewah/ewok.h" + +struct commit; +struct string_list; +struct bitmap_index; +struct bitmap_writer; + +/* + * A pseudo-merge group tracks the set of non-bitmapped reference tips + * that match the given pattern. + * + * Within those matches, they are further segmented by separating + * consecutive capture groups with '-' dash character capture groups + * with '-' dash characters. + * + * Those groups are then ordered by committer date and partitioned + * into individual pseudo-merge(s) according to the decay, max_merges, + * sample_rate, and threshold parameters. + */ +struct pseudo_merge_group { + regex_t *pattern; + + /* capture group(s) -> struct pseudo_merge_matches */ + struct strmap matches; + + /* + * The individual pseudo-merge(s) that are generated from the + * above array of matches, partitioned according to the below + * parameters. + */ + struct commit **merges; + size_t merges_nr; + size_t merges_alloc; + + /* + * Pseudo-merge grouping parameters. See git-config(1) for + * more information. + */ + double decay; + int max_merges; + double sample_rate; + int stable_size; + timestamp_t threshold; + timestamp_t stable_threshold; +}; + +struct pseudo_merge_matches { + struct commit **stable; + struct commit **unstable; + size_t stable_nr, stable_alloc; + size_t unstable_nr, unstable_alloc; +}; + +/* + * Read the repository's configuration: + * + * - bitmapPseudoMerge..pattern + * - bitmapPseudoMerge..decay + * - bitmapPseudoMerge..sampleRate + * - bitmapPseudoMerge..threshold + * - bitmapPseudoMerge..maxMerges + * - bitmapPseudoMerge..stableThreshold + * - bitmapPseudoMerge..stableSize + * + * and populates the given `list` with pseudo-merge groups. String + * entry keys are the pseudo-merge group names, and the values are + * pointers to the pseudo_merge_group structure itself. + */ +void load_pseudo_merges_from_config(struct string_list *list); + +/* + * A pseudo-merge commit index (pseudo_merge_commit_idx) maps a + * particular (non-pseudo-merge) commit to the list of pseudo-merge(s) + * it appears in. + */ +struct pseudo_merge_commit_idx { + uint32_t *pseudo_merge; + size_t nr, alloc; +}; + +/* + * Selects pseudo-merges from a list of commits, populating the given + * string_list of pseudo-merge groups. + * + * Populates the pseudo_merge_commits map with a commit_idx + * corresponding to each commit in the list. Counts the total number + * of pseudo-merges generated. + * + * Optionally shows a progress meter. + */ +void select_pseudo_merges(struct bitmap_writer *writer, + struct commit **commits, size_t commits_nr); #endif -- cgit 1.2.3-korg From 53ea3ec47990e9ad00a238fd962f36e5d87a6386 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:26:46 -0400 Subject: pack-bitmap-write.c: write pseudo-merge table Now that the pack-bitmap writer machinery understands how to select and store pseudo-merge commits, teach it how to write the new optional pseudo-merge .bitmap extension. No readers yet exist for this new extension to the .bitmap format. The following commits will take any preparatory step(s) necessary before then implementing the routines necessary to read this new table. In the meantime, the new `write_pseudo_merges()` function implements writing this new format as described by a previous commit in Documentation/technical/bitmap-format.txt. Writing this table is fairly straightforward and consists of a few sub-components: - a pair of bitmaps for each pseudo-merge (one for the pseudo-merge "parents", and another for the objects reachable from those parents) - for each commit, the offset of either (a) the pseudo-merge it belongs to, or (b) an extended lookup table if it belongs to >1 pseudo-merge groups - if there are any commits belonging to >1 pseudo-merge group, the extended lookup tables (which each consist of the number of pseudo-merge groups a commit appears in, and then that many 4-byte unsigned ) Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++ pack-bitmap.h | 1 + 2 files changed, 132 insertions(+) (limited to 'pack-bitmap-write.c') diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index d5884ea5e9..47250398aa 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -18,6 +18,7 @@ #include "tree.h" #include "tree-walk.h" #include "pseudo-merge.h" +#include "oid-array.h" struct bitmapped_commit { struct commit *commit; @@ -771,6 +772,130 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, } } +static void write_pseudo_merges(struct bitmap_writer *writer, + struct hashfile *f) +{ + struct oid_array commits = OID_ARRAY_INIT; + struct bitmap **commits_bitmap = NULL; + off_t *pseudo_merge_ofs = NULL; + off_t start, table_start, next_ext; + + uint32_t base = bitmap_writer_nr_selected_commits(writer); + size_t i, j = 0; + + CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr); + CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr); + + for (i = 0; i < writer->pseudo_merges_nr; i++) { + struct bitmapped_commit *merge = &writer->selected[base + i]; + struct commit_list *p; + + if (!merge->pseudo_merge) + BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i); + + commits_bitmap[i] = bitmap_new(); + + for (p = merge->commit->parents; p; p = p->next) + bitmap_set(commits_bitmap[i], + find_object_pos(writer, &p->item->object.oid, + NULL)); + } + + start = hashfile_total(f); + + for (i = 0; i < writer->pseudo_merges_nr; i++) { + struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]); + + pseudo_merge_ofs[i] = hashfile_total(f); + + dump_bitmap(f, commits_ewah); + dump_bitmap(f, writer->selected[base+i].write_as); + + ewah_free(commits_ewah); + } + + next_ext = st_add(hashfile_total(f), + st_mult(kh_size(writer->pseudo_merge_commits), + sizeof(uint64_t))); + + table_start = hashfile_total(f); + + commits.alloc = kh_size(writer->pseudo_merge_commits); + CALLOC_ARRAY(commits.oid, commits.alloc); + + for (i = kh_begin(writer->pseudo_merge_commits); i != kh_end(writer->pseudo_merge_commits); i++) { + if (!kh_exist(writer->pseudo_merge_commits, i)) + continue; + oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i)); + } + + oid_array_sort(&commits); + + /* write lookup table (non-extended) */ + for (i = 0; i < commits.nr; i++) { + int hash_pos; + struct pseudo_merge_commit_idx *c; + + hash_pos = kh_get_oid_map(writer->pseudo_merge_commits, + commits.oid[i]); + if (hash_pos == kh_end(writer->pseudo_merge_commits)) + BUG("could not find pseudo-merge commit %s", + oid_to_hex(&commits.oid[i])); + + c = kh_value(writer->pseudo_merge_commits, hash_pos); + + hashwrite_be32(f, find_object_pos(writer, &commits.oid[i], + NULL)); + if (c->nr == 1) + hashwrite_be64(f, pseudo_merge_ofs[c->pseudo_merge[0]]); + else if (c->nr > 1) { + if (next_ext & ((uint64_t)1<<63)) + die(_("too many pseudo-merges")); + hashwrite_be64(f, next_ext | ((uint64_t)1<<63)); + next_ext = st_add3(next_ext, + sizeof(uint32_t), + st_mult(c->nr, sizeof(uint64_t))); + } else + BUG("expected commit '%s' to have at least one " + "pseudo-merge", oid_to_hex(&commits.oid[i])); + } + + /* write lookup table (extended) */ + for (i = 0; i < commits.nr; i++) { + int hash_pos; + struct pseudo_merge_commit_idx *c; + + hash_pos = kh_get_oid_map(writer->pseudo_merge_commits, + commits.oid[i]); + if (hash_pos == kh_end(writer->pseudo_merge_commits)) + BUG("could not find pseudo-merge commit %s", + oid_to_hex(&commits.oid[i])); + + c = kh_value(writer->pseudo_merge_commits, hash_pos); + if (c->nr == 1) + continue; + + hashwrite_be32(f, c->nr); + for (j = 0; j < c->nr; j++) + hashwrite_be64(f, pseudo_merge_ofs[c->pseudo_merge[j]]); + } + + /* write positions for all pseudo merges */ + for (i = 0; i < writer->pseudo_merges_nr; i++) + hashwrite_be64(f, pseudo_merge_ofs[i]); + + hashwrite_be32(f, writer->pseudo_merges_nr); + hashwrite_be32(f, kh_size(writer->pseudo_merge_commits)); + hashwrite_be64(f, table_start - start); + hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t)); + + for (i = 0; i < writer->pseudo_merges_nr; i++) + bitmap_free(commits_bitmap[i]); + + free(pseudo_merge_ofs); + free(commits_bitmap); +} + static int table_cmp(const void *_va, const void *_vb, void *_data) { struct bitmap_writer *writer = _data; @@ -878,6 +1003,9 @@ void bitmap_writer_finish(struct bitmap_writer *writer, int fd = odb_mkstemp(&tmp_file, "pack/tmp_bitmap_XXXXXX"); + if (writer->pseudo_merges_nr) + options |= BITMAP_OPT_PSEUDO_MERGES; + f = hashfd(fd, tmp_file.buf); memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); @@ -907,6 +1035,9 @@ void bitmap_writer_finish(struct bitmap_writer *writer, write_selected_commits_v1(writer, f, offsets); + if (options & BITMAP_OPT_PSEUDO_MERGES) + write_pseudo_merges(writer, f); + if (options & BITMAP_OPT_LOOKUP_TABLE) write_lookup_table(writer, f, offsets); diff --git a/pack-bitmap.h b/pack-bitmap.h index 1e730ea1e5..db9ae554fa 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -37,6 +37,7 @@ enum pack_bitmap_opts { BITMAP_OPT_FULL_DAG = 0x1, BITMAP_OPT_HASH_CACHE = 0x4, BITMAP_OPT_LOOKUP_TABLE = 0x10, + BITMAP_OPT_PSEUDO_MERGES = 0x20, }; enum pack_bitmap_flags { -- cgit 1.2.3-korg From 7252d9a036fabb10f60dc09937fc39e252f3c4d4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 May 2024 17:27:21 -0400 Subject: pseudo-merge: implement support for finding existing merges This patch implements support for reusing existing pseudo-merge commits when writing bitmaps when there is an existing pseudo-merge bitmap which has exactly the same set of parents as one that we are about to write. Note that unstable pseudo-merges are likely to change between consecutive repacks, and so are generally poor candidates for reuse. However, stable pseudo-merges (see the configuration option 'bitmapPseudoMerge..stableThreshold') are by definition unlikely to change between runs (as they represent long-running branches). Because there is no index from a *set* of pseudo-merge parents to a matching pseudo-merge bitmap, we have to construct the bitmap corresponding to the set of parents for each pending pseudo-merge commit and see if a matching bitmap exists. This is technically quadratic in the number of pseudo-merges, but is OK in practice for a couple of reasons: - non-matching pseudo-merge bitmaps are rejected quickly as soon as they differ in a single bit - already-matched pseudo-merge bitmaps are discarded from subsequent rounds of search - the number of pseudo-merges is generally small, even for large repositories In order to do this, implement (a) a function that finds a matching pseudo-merge given some uncompressed bitset describing its parents, (b) a function that computes the bitset of parents for a given pseudo-merge commit, and (c) call that function before computing the set of reachable objects for some pending pseudo-merge. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 15 ++++++++-- pack-bitmap.c | 32 ++++++++++++++++++++ pack-bitmap.h | 2 ++ pseudo-merge.c | 55 ++++++++++++++++++++++++++++++++++ pseudo-merge.h | 7 +++++ t/t5333-pseudo-merge-bitmaps.sh | 65 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 2 deletions(-) (limited to 'pack-bitmap-write.c') diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 47250398aa..6e8060f8a0 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -19,6 +19,10 @@ #include "tree-walk.h" #include "pseudo-merge.h" #include "oid-array.h" +#include "config.h" +#include "alloc.h" +#include "refs.h" +#include "strmap.h" struct bitmapped_commit { struct commit *commit; @@ -465,6 +469,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, } static int reused_bitmaps_nr; +static int reused_pseudo_merge_bitmaps_nr; static int fill_bitmap_commit(struct bitmap_writer *writer, struct bb_commit *ent, @@ -490,7 +495,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, struct bitmap *remapped = bitmap_new(); if (commit->object.flags & BITMAP_PSEUDO_MERGE) - old = NULL; + old = pseudo_merge_bitmap_for_commit(old_bitmap, c); else old = bitmap_for_commit(old_bitmap, c); /* @@ -501,7 +506,10 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, if (old && !rebuild_bitmap(mapping, old, remapped)) { bitmap_or(ent->bitmap, remapped); bitmap_free(remapped); - reused_bitmaps_nr++; + if (commit->object.flags & BITMAP_PSEUDO_MERGE) + reused_pseudo_merge_bitmaps_nr++; + else + reused_bitmaps_nr++; continue; } bitmap_free(remapped); @@ -631,6 +639,9 @@ int bitmap_writer_build(struct bitmap_writer *writer, the_repository); trace2_data_intmax("pack-bitmap-write", the_repository, "building_bitmaps_reused", reused_bitmaps_nr); + trace2_data_intmax("pack-bitmap-write", the_repository, + "building_bitmaps_pseudo_merge_reused", + reused_pseudo_merge_bitmaps_nr); stop_progress(&writer->progress); diff --git a/pack-bitmap.c b/pack-bitmap.c index 1966b3b95f..70230e2647 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1316,6 +1316,37 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, return cb.base; } +struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit) +{ + struct commit_list *p; + struct bitmap *parents; + struct pseudo_merge *match = NULL; + + if (!bitmap_git->pseudo_merges.nr) + return NULL; + + parents = bitmap_new(); + + for (p = commit->parents; p; p = p->next) { + int pos = bitmap_position(bitmap_git, &p->item->object.oid); + if (pos < 0 || pos >= bitmap_num_objects(bitmap_git)) + goto done; + + bitmap_set(parents, pos); + } + + match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges, + parents); + +done: + bitmap_free(parents); + if (match) + return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match); + + return NULL; +} + static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git) { uint32_t i; @@ -2809,6 +2840,7 @@ void free_bitmap_index(struct bitmap_index *b) */ close_midx_revindex(b->midx); } + free_pseudo_merge_map(&b->pseudo_merges); free(b); } diff --git a/pack-bitmap.h b/pack-bitmap.h index 4466b5ad0f..1171e6d989 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -142,6 +142,8 @@ int rebuild_bitmap(const uint32_t *reposition, struct bitmap *dest); struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit); +struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit); void bitmap_writer_select_commits(struct bitmap_writer *writer, struct commit **indexed_commits, unsigned int indexed_commits_nr); diff --git a/pseudo-merge.c b/pseudo-merge.c index 7d13101149..a117520996 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -699,3 +699,58 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm, return ret; } + +struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm, + struct bitmap *parents) +{ + struct pseudo_merge *match = NULL; + size_t i; + + if (!pm->nr) + return NULL; + + /* + * NOTE: this loop is quadratic in the worst-case (where no + * matching pseudo-merge bitmaps are found), but in practice + * this is OK for a few reasons: + * + * - Rejecting pseudo-merge bitmaps that do not match the + * given commit is done quickly (i.e. `bitmap_equals_ewah()` + * returns early when we know the two bitmaps aren't equal. + * + * - Already matched pseudo-merge bitmaps (which we track with + * the `->satisfied` bit here) are skipped as potential + * candidates. + * + * - The number of pseudo-merges should be small (in the + * hundreds for most repositories). + * + * If in the future this semi-quadratic behavior does become a + * problem, another approach would be to keep track of which + * pseudo-merges are still "viable" after enumerating the + * pseudo-merge commit's parents: + * + * - A pseudo-merge bitmap becomes non-viable when the bit(s) + * corresponding to one or more parent(s) of the given + * commit are not set in a candidate pseudo-merge's commits + * bitmap. + * + * - After processing all bits, enumerate the remaining set of + * viable pseudo-merge bitmaps, and check that their + * popcount() matches the number of parents in the given + * commit. + */ + for (i = 0; i < pm->nr; i++) { + struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]); + if (!candidate || candidate->satisfied) + continue; + if (!bitmap_equals_ewah(parents, candidate->commits)) + continue; + + match = candidate; + match->satisfied = 1; + break; + } + + return match; +} diff --git a/pseudo-merge.h b/pseudo-merge.h index 755edc054a..2aca01d056 100644 --- a/pseudo-merge.h +++ b/pseudo-merge.h @@ -206,4 +206,11 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm, struct bitmap *result, struct bitmap *roots); +/* + * Returns a pseudo-merge which contains the exact set of commits + * listed in the "parents" bitamp, or NULL if none could be found. + */ +struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm, + struct bitmap *parents); + #endif diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 4c9aebcffd..f052f395a7 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -22,6 +22,10 @@ test_pseudo_merges_cascades () { test_trace2_data bitmap pseudo_merges_cascades "$1" } +test_pseudo_merges_reused () { + test_trace2_data pack-bitmap-write building_bitmaps_pseudo_merge_reused "$1" +} + tag_everything () { git rev-list --all --no-object-names >in && perl -lne ' @@ -325,4 +329,65 @@ test_expect_success 'pseudo-merge overlap stale traversal' ' ) ' +test_expect_success 'pseudo-merge reuse' ' + git init pseudo-merge-reuse && + ( + cd pseudo-merge-reuse && + + stable="1641013200" && # 2022-01-01 + unstable="1672549200" && # 2023-01-01 + + GIT_COMMITTER_DATE="$stable +0000" && + export GIT_COMMITTER_DATE && + test_commit_bulk --notick 128 && + GIT_COMMITTER_DATE="$unstable +0000" && + export GIT_COMMITTER_DATE && + test_commit_bulk --notick 128 && + + tag_everything && + + git \ + -c bitmapPseudoMerge.test.pattern="refs/tags/" \ + -c bitmapPseudoMerge.test.maxMerges=1 \ + -c bitmapPseudoMerge.test.threshold=now \ + -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \ + -c bitmapPseudoMerge.test.stableSize=512 \ + repack -adb && + + test_pseudo_merges >merges && + test_line_count = 2 merges && + + test_pseudo_merge_commits 0 >stable-oids.before && + test_pseudo_merge_commits 1 >unstable-oids.before && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt git \ + -c bitmapPseudoMerge.test.pattern="refs/tags/" \ + -c bitmapPseudoMerge.test.maxMerges=2 \ + -c bitmapPseudoMerge.test.threshold=now \ + -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \ + -c bitmapPseudoMerge.test.stableSize=512 \ + repack -adb && + + test_pseudo_merges_reused 1 merges && + test_line_count = 3 merges && + + test_pseudo_merge_commits 0 >stable-oids.after && + for i in 1 2 + do + test_pseudo_merge_commits $i || return 1 + done >unstable-oids.after && + + sort -u expect && + sort -u actual && + test_cmp expect actual && + + sort -u expect && + sort -u actual && + test_cmp expect actual + ) +' + test_done -- cgit 1.2.3-korg