aboutsummaryrefslogtreecommitdiffstats
path: root/pack-bitmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r--pack-bitmap.c435
1 files changed, 393 insertions, 42 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 229a11fb00..2e657a2aa4 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1,3 +1,5 @@
+#define USE_THE_REPOSITORY_VARIABLE
+
#include "git-compat-util.h"
#include "commit.h"
#include "gettext.h"
@@ -20,6 +22,7 @@
#include "list-objects-filter-options.h"
#include "midx.h"
#include "config.h"
+#include "pseudo-merge.h"
/*
* An entry on the bitmap index, representing the bitmap for a given
@@ -51,13 +54,6 @@ struct bitmap_index {
struct packed_git *pack;
struct multi_pack_index *midx;
- /*
- * Mark the first `reuse_objects` in the packfile as reused:
- * they will be sent as-is without using them for repacking
- * calculations
- */
- uint32_t reuse_objects;
-
/* mmapped buffer of the whole bitmap index */
unsigned char *map;
size_t map_size; /* size of the mmaped buffer */
@@ -93,6 +89,9 @@ struct bitmap_index {
*/
unsigned char *table_lookup;
+ /* This contains the pseudo-merge cache within 'map' (if found). */
+ struct pseudo_merge_map pseudo_merges;
+
/*
* Extended index.
*
@@ -117,6 +116,13 @@ struct bitmap_index {
unsigned int version;
};
+static int pseudo_merges_satisfied_nr;
+static int pseudo_merges_cascades_nr;
+static int existing_bitmaps_hits_nr;
+static int existing_bitmaps_misses_nr;
+static int roots_with_bitmaps_nr;
+static int roots_without_bitmaps_nr;
+
static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
{
struct ewah_bitmap *parent;
@@ -136,17 +142,13 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st)
return composed;
}
-/*
- * Read a bitmap from the current read position on the mmaped
- * index, and increase the read position accordingly
- */
-static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
+struct ewah_bitmap *read_bitmap(const unsigned char *map,
+ size_t map_size, size_t *map_pos)
{
struct ewah_bitmap *b = ewah_pool_new();
- ssize_t bitmap_size = ewah_read_mmap(b,
- index->map + index->map_pos,
- index->map_size - index->map_pos);
+ ssize_t bitmap_size = ewah_read_mmap(b, map + *map_pos,
+ map_size - *map_pos);
if (bitmap_size < 0) {
error(_("failed to load bitmap index (corrupted?)"));
@@ -154,10 +156,20 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
return NULL;
}
- index->map_pos += bitmap_size;
+ *map_pos += bitmap_size;
+
return b;
}
+/*
+ * Read a bitmap from the current read position on the mmaped
+ * index, and increase the read position accordingly
+ */
+static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index)
+{
+ return read_bitmap(index->map, index->map_size, &index->map_pos);
+}
+
static uint32_t bitmap_num_objects(struct bitmap_index *index)
{
if (index->midx)
@@ -206,6 +218,46 @@ static int load_bitmap_header(struct bitmap_index *index)
index->table_lookup = (void *)(index_end - table_size);
index_end -= table_size;
}
+
+ if (flags & BITMAP_OPT_PSEUDO_MERGES) {
+ unsigned char *pseudo_merge_ofs;
+ size_t table_size;
+ uint32_t i;
+
+ if (sizeof(table_size) > index_end - index->map - header_size)
+ return error(_("corrupted bitmap index file (too short to fit pseudo-merge table header)"));
+
+ table_size = get_be64(index_end - 8);
+ if (table_size > index_end - index->map - header_size)
+ return error(_("corrupted bitmap index file (too short to fit pseudo-merge table)"));
+
+ if (git_env_bool("GIT_TEST_USE_PSEUDO_MERGES", 1)) {
+ const unsigned char *ext = (index_end - table_size);
+
+ index->pseudo_merges.map = index->map;
+ index->pseudo_merges.map_size = index->map_size;
+ index->pseudo_merges.commits = ext + get_be64(index_end - 16);
+ index->pseudo_merges.commits_nr = get_be32(index_end - 20);
+ index->pseudo_merges.nr = get_be32(index_end - 24);
+
+ if (st_add(st_mult(index->pseudo_merges.nr,
+ sizeof(uint64_t)),
+ 24) > table_size)
+ return error(_("corrupted bitmap index file, pseudo-merge table too short"));
+
+ CALLOC_ARRAY(index->pseudo_merges.v,
+ index->pseudo_merges.nr);
+
+ pseudo_merge_ofs = index_end - 24 -
+ (index->pseudo_merges.nr * sizeof(uint64_t));
+ for (i = 0; i < index->pseudo_merges.nr; i++) {
+ index->pseudo_merges.v[i].at = get_be64(pseudo_merge_ofs);
+ pseudo_merge_ofs += sizeof(uint64_t);
+ }
+ }
+
+ index_end -= table_size;
+ }
}
index->entry_count = ntohl(header->entry_count);
@@ -316,9 +368,8 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
char *midx_bitmap_filename(struct multi_pack_index *midx)
{
struct strbuf buf = STRBUF_INIT;
-
- get_midx_filename(&buf, midx->object_dir);
- strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx)));
+ get_midx_filename_ext(&buf, midx->object_dir, get_midx_checksum(midx),
+ MIDX_EXT_BITMAP);
return strbuf_detach(&buf, NULL);
}
@@ -375,7 +426,8 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
if (load_bitmap_header(bitmap_git) < 0)
goto cleanup;
- if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) {
+ if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum,
+ the_repository->hash_algo)) {
error(_("checksum doesn't match in MIDX and bitmap"));
goto cleanup;
}
@@ -968,6 +1020,22 @@ static void show_commit(struct commit *commit UNUSED,
{
}
+static unsigned apply_pseudo_merges_for_commit_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct commit *commit,
+ uint32_t commit_pos)
+{
+ int ret;
+
+ ret = apply_pseudo_merges_for_commit(&bitmap_git->pseudo_merges,
+ result, commit, commit_pos);
+
+ if (ret)
+ pseudo_merges_satisfied_nr += ret;
+
+ return ret;
+}
+
static int add_to_include_set(struct bitmap_index *bitmap_git,
struct include_data *data,
struct commit *commit,
@@ -983,11 +1051,19 @@ static int add_to_include_set(struct bitmap_index *bitmap_git,
partial = bitmap_for_commit(bitmap_git, commit);
if (partial) {
+ existing_bitmaps_hits_nr++;
+
bitmap_or_ewah(data->base, partial);
return 0;
}
+ existing_bitmaps_misses_nr++;
+
bitmap_set(data->base, bitmap_pos);
+ if (apply_pseudo_merges_for_commit_1(bitmap_git, data->base, commit,
+ bitmap_pos))
+ return 0;
+
return 1;
}
@@ -1038,8 +1114,12 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git,
{
struct ewah_bitmap *or_with = bitmap_for_commit(bitmap_git, commit);
- if (!or_with)
+ if (!or_with) {
+ existing_bitmaps_misses_nr++;
return 0;
+ }
+
+ existing_bitmaps_hits_nr++;
if (!*base)
*base = ewah_to_bitmap(or_with);
@@ -1113,6 +1193,20 @@ static void show_boundary_object(struct object *object UNUSED,
BUG("should not be called");
}
+static unsigned cascade_pseudo_merges_1(struct bitmap_index *bitmap_git,
+ struct bitmap *result,
+ struct bitmap *roots)
+{
+ int ret = cascade_pseudo_merges(&bitmap_git->pseudo_merges,
+ result, roots);
+ if (ret) {
+ pseudo_merges_cascades_nr++;
+ pseudo_merges_satisfied_nr += ret;
+ }
+
+ return ret;
+}
+
static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots)
@@ -1122,6 +1216,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
unsigned int i;
unsigned int tmp_blobs, tmp_trees, tmp_tags;
int any_missing = 0;
+ int existing_bitmaps = 0;
cb.bitmap_git = bitmap_git;
cb.base = bitmap_new();
@@ -1129,6 +1224,25 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
revs->ignore_missing_links = 1;
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ if (!cascade_pseudo_merges_1(bitmap_git, cb.base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* OR in any existing reachability bitmaps among `roots` into
* `cb.base`.
@@ -1140,8 +1254,10 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
continue;
if (add_commit_to_bitmap(bitmap_git, &cb.base,
- (struct commit *)object))
+ (struct commit *)object)) {
+ existing_bitmaps = 1;
continue;
+ }
any_missing = 1;
}
@@ -1149,6 +1265,9 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
if (!any_missing)
goto cleanup;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, cb.base, NULL);
+
tmp_blobs = revs->blob_objects;
tmp_trees = revs->tree_objects;
tmp_tags = revs->blob_objects;
@@ -1204,6 +1323,44 @@ cleanup:
return cb.base;
}
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+ struct commit *commit)
+{
+ struct commit_list *p;
+ struct bitmap *parents;
+ struct pseudo_merge *match = NULL;
+
+ if (!bitmap_git->pseudo_merges.nr)
+ return NULL;
+
+ parents = bitmap_new();
+
+ for (p = commit->parents; p; p = p->next) {
+ int pos = bitmap_position(bitmap_git, &p->item->object.oid);
+ if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
+ goto done;
+
+ bitmap_set(parents, pos);
+ }
+
+ match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
+ parents);
+
+done:
+ bitmap_free(parents);
+ if (match)
+ return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
+
+ return NULL;
+}
+
+static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
+{
+ uint32_t i;
+ for (i = 0; i < bitmap_git->pseudo_merges.nr; i++)
+ bitmap_git->pseudo_merges.v[i].satisfied = 0;
+}
+
static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
struct rev_info *revs,
struct object_list *roots,
@@ -1211,9 +1368,32 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
{
struct bitmap *base = NULL;
int needs_walk = 0;
+ unsigned existing_bitmaps = 0;
struct object_list *not_mapped = NULL;
+ unsatisfy_all_pseudo_merges(bitmap_git);
+
+ if (bitmap_git->pseudo_merges.nr) {
+ struct bitmap *roots_bitmap = bitmap_new();
+ struct object_list *objects = NULL;
+
+ for (objects = roots; objects; objects = objects->next) {
+ struct object *object = objects->item;
+ int pos;
+
+ pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos < 0)
+ continue;
+
+ bitmap_set(roots_bitmap, pos);
+ }
+
+ base = bitmap_new();
+ if (!cascade_pseudo_merges_1(bitmap_git, base, roots_bitmap))
+ bitmap_free(roots_bitmap);
+ }
+
/*
* Go through all the roots for the walk. The ones that have bitmaps
* on the bitmap index will be `or`ed together to form an initial
@@ -1224,11 +1404,21 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
*/
while (roots) {
struct object *object = roots->item;
+
roots = roots->next;
+ if (base) {
+ int pos = bitmap_position(bitmap_git, &object->oid);
+ if (pos > 0 && bitmap_get(base, pos)) {
+ object->flags |= SEEN;
+ continue;
+ }
+ }
+
if (object->type == OBJ_COMMIT &&
add_commit_to_bitmap(bitmap_git, &base, (struct commit *)object)) {
object->flags |= SEEN;
+ existing_bitmaps = 1;
continue;
}
@@ -1244,6 +1434,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
roots = not_mapped;
+ if (existing_bitmaps)
+ cascade_pseudo_merges_1(bitmap_git, base, NULL);
+
/*
* Let's iterate through all the roots that don't have bitmaps to
* check if we can determine them to be reachable from the existing
@@ -1264,8 +1457,12 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git,
object->flags &= ~UNINTERESTING;
add_pending_object(revs, object, "");
needs_walk = 1;
+
+ roots_without_bitmaps_nr++;
} else {
object->flags |= SEEN;
+
+ roots_with_bitmaps_nr++;
}
}
@@ -1828,6 +2025,19 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
object_list_free(&wants);
object_list_free(&haves);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_satisfied",
+ pseudo_merges_satisfied_nr);
+ trace2_data_intmax("bitmap", the_repository, "pseudo_merges_cascades",
+ pseudo_merges_cascades_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/hits",
+ existing_bitmaps_hits_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/misses",
+ existing_bitmaps_misses_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/roots_with_bitmap",
+ roots_with_bitmaps_nr);
+ trace2_data_intmax("bitmap", the_repository, "bitmap/roots_without_bitmap",
+ roots_without_bitmaps_nr);
+
return bitmap_git;
cleanup:
@@ -2056,7 +2266,10 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
load_reverse_index(r, bitmap_git);
- if (bitmap_is_midx(bitmap_git)) {
+ if (!bitmap_is_midx(bitmap_git) || !bitmap_git->midx->chunk_bitmapped_packs)
+ multi_pack_reuse = 0;
+
+ if (multi_pack_reuse) {
for (i = 0; i < bitmap_git->midx->num_packs; i++) {
struct bitmapped_pack pack;
if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) {
@@ -2069,34 +2282,45 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
if (!pack.bitmap_nr)
continue;
- if (!multi_pack_reuse && pack.bitmap_pos) {
- /*
- * If we're only reusing a single pack, skip
- * over any packs which are not positioned at
- * the beginning of the MIDX bitmap.
- *
- * This is consistent with the existing
- * single-pack reuse behavior, which only reuses
- * parts of the MIDX's preferred pack.
- */
- continue;
- }
-
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
memcpy(&packs[packs_nr++], &pack, sizeof(pack));
objects_nr += pack.p->num_objects;
-
- if (!multi_pack_reuse)
- break;
}
QSORT(packs, packs_nr, bitmapped_pack_cmp);
} else {
- ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
+ struct packed_git *pack;
+ uint32_t pack_int_id;
+
+ if (bitmap_is_midx(bitmap_git)) {
+ uint32_t preferred_pack_pos;
+
+ if (midx_preferred_pack(bitmap_git->midx, &preferred_pack_pos) < 0) {
+ warning(_("unable to compute preferred pack, disabling pack-reuse"));
+ return;
+ }
+
+ pack = bitmap_git->midx->packs[preferred_pack_pos];
+ pack_int_id = preferred_pack_pos;
+ } else {
+ pack = bitmap_git->pack;
+ /*
+ * Any value for 'pack_int_id' will do here. When we
+ * process the pack via try_partial_reuse(), we won't
+ * use the `pack_int_id` field since we have a non-MIDX
+ * bitmap.
+ *
+ * Use '-1' as a sentinel value to make it clear
+ * that we do not expect to read this field.
+ */
+ pack_int_id = -1;
+ }
- packs[packs_nr].p = bitmap_git->pack;
- packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
+ ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
+ packs[packs_nr].p = pack;
+ packs[packs_nr].pack_int_id = pack_int_id;
+ packs[packs_nr].bitmap_nr = pack->num_objects;
packs[packs_nr].bitmap_pos = 0;
objects_nr = packs[packs_nr++].bitmap_nr;
@@ -2404,6 +2628,132 @@ cleanup:
return 0;
}
+static void bit_pos_to_object_id(struct bitmap_index *bitmap_git,
+ uint32_t bit_pos,
+ struct object_id *oid)
+{
+ uint32_t index_pos;
+
+ if (bitmap_is_midx(bitmap_git))
+ index_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
+ else
+ index_pos = pack_pos_to_index(bitmap_git->pack, bit_pos);
+
+ nth_bitmap_object_oid(bitmap_git, oid, index_pos);
+}
+
+int test_bitmap_pseudo_merges(struct repository *r)
+{
+ struct bitmap_index *bitmap_git;
+ uint32_t i;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ for (i = 0; i < bitmap_git->pseudo_merges.nr; i++) {
+ struct pseudo_merge *merge;
+ struct ewah_bitmap *commits_bitmap, *merge_bitmap;
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[i]);
+ commits_bitmap = merge->commits;
+ merge_bitmap = pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
+ merge);
+
+ printf("at=%"PRIuMAX", commits=%"PRIuMAX", objects=%"PRIuMAX"\n",
+ (uintmax_t)merge->at,
+ (uintmax_t)ewah_bitmap_popcount(commits_bitmap),
+ (uintmax_t)ewah_bitmap_popcount(merge_bitmap));
+ }
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return 0;
+}
+
+static void dump_ewah_object_ids(struct bitmap_index *bitmap_git,
+ struct ewah_bitmap *bitmap)
+
+{
+ struct ewah_iterator it;
+ eword_t word;
+ uint32_t pos = 0;
+
+ ewah_iterator_init(&it, bitmap);
+
+ while (ewah_iterator_next(&word, &it)) {
+ struct object_id oid;
+ uint32_t offset;
+
+ for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+ if (!(word >> offset))
+ break;
+
+ offset += ewah_bit_ctz64(word >> offset);
+
+ bit_pos_to_object_id(bitmap_git, pos + offset, &oid);
+ printf("%s\n", oid_to_hex(&oid));
+ }
+ pos += BITS_IN_EWORD;
+ }
+}
+
+int test_bitmap_pseudo_merge_commits(struct repository *r, uint32_t n)
+{
+ struct bitmap_index *bitmap_git;
+ struct pseudo_merge *merge;
+ int ret = 0;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ if (n >= bitmap_git->pseudo_merges.nr) {
+ ret = error(_("pseudo-merge index out of range "
+ "(%"PRIu32" >= %"PRIuMAX")"),
+ n, (uintmax_t)bitmap_git->pseudo_merges.nr);
+ goto cleanup;
+ }
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[n]);
+ dump_ewah_object_ids(bitmap_git, merge->commits);
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return ret;
+}
+
+int test_bitmap_pseudo_merge_objects(struct repository *r, uint32_t n)
+{
+ struct bitmap_index *bitmap_git;
+ struct pseudo_merge *merge;
+ int ret = 0;
+
+ bitmap_git = prepare_bitmap_git(r);
+ if (!bitmap_git || !bitmap_git->pseudo_merges.nr)
+ goto cleanup;
+
+ if (n >= bitmap_git->pseudo_merges.nr) {
+ ret = error(_("pseudo-merge index out of range "
+ "(%"PRIu32" >= %"PRIuMAX")"),
+ n, (uintmax_t)bitmap_git->pseudo_merges.nr);
+ goto cleanup;
+ }
+
+ merge = use_pseudo_merge(&bitmap_git->pseudo_merges,
+ &bitmap_git->pseudo_merges.v[n]);
+
+ dump_ewah_object_ids(bitmap_git,
+ pseudo_merge_bitmap(&bitmap_git->pseudo_merges,
+ merge));
+
+cleanup:
+ free_bitmap_index(bitmap_git);
+ return ret;
+}
+
int rebuild_bitmap(const uint32_t *reposition,
struct ewah_bitmap *source,
struct bitmap *dest)
@@ -2510,6 +2860,7 @@ void free_bitmap_index(struct bitmap_index *b)
*/
close_midx_revindex(b->midx);
}
+ free_pseudo_merge_map(&b->pseudo_merges);
free(b);
}