aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2021-01-25 14:19:19 -0800
committerJunio C Hamano <gitster@pobox.com>2021-01-25 14:19:20 -0800
commitbcaaf972e61459e6bfca27144b5d5b66bc51df8c (patch)
tree66858baeccf7c0daaebecaf381a930d89d962fcb
parent381dac23491ee3d80e00787449f0f1c70449419c (diff)
parent779412b9d99544ae71eefabb699a109b1638f96c (diff)
downloadgit-bcaaf972e61459e6bfca27144b5d5b66bc51df8c.tar.gz
Merge branch 'tb/pack-revindex-api'
Abstract accesses to in-core revindex that allows enumerating objects stored in a packfile in the order they appear in the pack, in preparation for introducing an on-disk precomputed revindex. * tb/pack-revindex-api: (21 commits) for_each_object_in_pack(): clarify pack vs index ordering pack-revindex.c: avoid direct revindex access in 'offset_to_pack_pos()' pack-revindex: hide the definition of 'revindex_entry' pack-revindex: remove unused 'find_revindex_position()' pack-revindex: remove unused 'find_pack_revindex()' builtin/gc.c: guess the size of the revindex for_each_object_in_pack(): convert to new revindex API unpack_entry(): convert to new revindex API packed_object_info(): convert to new revindex API retry_bad_packed_offset(): convert to new revindex API get_delta_base_oid(): convert to new revindex API rebuild_existing_bitmaps(): convert to new revindex API try_partial_reuse(): convert to new revindex API get_size_by_pos(): convert to new revindex API show_objects_for_type(): convert to new revindex API bitmap_position_packfile(): convert to new revindex API check_object(): convert to new revindex API write_reused_pack_verbatim(): convert to new revindex API write_reused_pack_one(): convert to new revindex API write_reuse_object(): convert to new revindex API ...
-rw-r--r--builtin/gc.c2
-rw-r--r--builtin/pack-objects.c37
-rw-r--r--pack-bitmap.c44
-rw-r--r--pack-revindex.c51
-rw-r--r--pack-revindex.h60
-rw-r--r--packfile.c76
6 files changed, 185 insertions, 85 deletions
diff --git a/builtin/gc.c b/builtin/gc.c
index fe35b10fe9..ae34362e93 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -301,7 +301,7 @@ static uint64_t estimate_repack_memory(struct packed_git *pack)
/* and then obj_hash[], underestimated in fact */
heap += sizeof(struct object *) * nr_objects;
/* revindex is used also */
- heap += sizeof(struct revindex_entry) * nr_objects;
+ heap += (sizeof(off_t) + sizeof(uint32_t)) * nr_objects;
/*
* read_sha1_file() (either at delta calculation phase, or
* writing phase) also fills up the delta base cache
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 2a00358f34..5b0c4489e2 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -419,7 +419,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
{
struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL;
- struct revindex_entry *revidx;
+ uint32_t pos;
off_t offset;
enum object_type type = oe_type(entry);
off_t datalen;
@@ -436,10 +436,15 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
type, entry_size);
offset = entry->in_pack_offset;
- revidx = find_pack_revindex(p, offset);
- datalen = revidx[1].offset - offset;
+ if (offset_to_pack_pos(p, offset, &pos) < 0)
+ die(_("write_reuse_object: could not locate %s, expected at "
+ "offset %"PRIuMAX" in pack %s"),
+ oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
+ p->pack_name);
+ datalen = pack_pos_to_offset(p, pos + 1) - offset;
if (!pack_to_stdout && p->index_version > 1 &&
- check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
+ check_pack_crc(p, &w_curs, offset, datalen,
+ pack_pos_to_index(p, pos))) {
error(_("bad packed object CRC for %s"),
oid_to_hex(&entry->idx.oid));
unuse_pack(&w_curs);
@@ -863,8 +868,8 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
enum object_type type;
unsigned long size;
- offset = reuse_packfile->revindex[pos].offset;
- next = reuse_packfile->revindex[pos + 1].offset;
+ offset = pack_pos_to_offset(reuse_packfile, pos);
+ next = pack_pos_to_offset(reuse_packfile, pos + 1);
record_reused_object(offset, offset - hashfile_total(out));
@@ -884,11 +889,17 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
/* Convert to REF_DELTA if we must... */
if (!allow_ofs_delta) {
- int base_pos = find_revindex_position(reuse_packfile, base_offset);
+ uint32_t base_pos;
struct object_id base_oid;
+ if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
+ die(_("expected object at offset %"PRIuMAX" "
+ "in pack %s"),
+ (uintmax_t)base_offset,
+ reuse_packfile->pack_name);
+
nth_packed_object_id(&base_oid, reuse_packfile,
- reuse_packfile->revindex[base_pos].nr);
+ pack_pos_to_index(reuse_packfile, base_pos));
len = encode_in_pack_object_header(header, sizeof(header),
OBJ_REF_DELTA, size);
@@ -941,7 +952,7 @@ static size_t write_reused_pack_verbatim(struct hashfile *out,
off_t to_write;
written = (pos * BITS_IN_EWORD);
- to_write = reuse_packfile->revindex[written].offset
+ to_write = pack_pos_to_offset(reuse_packfile, written)
- sizeof(struct pack_header);
/* We're recording one chunk, not one object. */
@@ -1806,11 +1817,11 @@ static void check_object(struct object_entry *entry, uint32_t object_index)
goto give_up;
}
if (reuse_delta && !entry->preferred_base) {
- struct revindex_entry *revidx;
- revidx = find_pack_revindex(p, ofs);
- if (!revidx)
+ uint32_t pos;
+ if (offset_to_pack_pos(p, ofs, &pos) < 0)
goto give_up;
- if (!nth_packed_object_id(&base_ref, p, revidx->nr))
+ if (!nth_packed_object_id(&base_ref, p,
+ pack_pos_to_index(p, pos)))
have_base = 1;
}
entry->in_pack_header_size = used + used_0;
diff --git a/pack-bitmap.c b/pack-bitmap.c
index d88745fb02..60fe20fb87 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -407,11 +407,14 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git,
const struct object_id *oid)
{
+ uint32_t pos;
off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack);
if (!offset)
return -1;
- return find_revindex_position(bitmap_git->pack, offset);
+ if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0)
+ return -1;
+ return pos;
}
static int bitmap_position(struct bitmap_index *bitmap_git,
@@ -708,21 +711,22 @@ static void show_objects_for_type(
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
struct object_id oid;
- struct revindex_entry *entry;
- uint32_t hash = 0;
+ uint32_t hash = 0, index_pos;
+ off_t ofs;
if ((word >> offset) == 0)
break;
offset += ewah_bit_ctz64(word >> offset);
- entry = &bitmap_git->pack->revindex[pos + offset];
- nth_packed_object_id(&oid, bitmap_git->pack, entry->nr);
+ index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset);
+ ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset);
+ nth_packed_object_id(&oid, bitmap_git->pack, index_pos);
if (bitmap_git->hashes)
- hash = get_be32(bitmap_git->hashes + entry->nr);
+ hash = get_be32(bitmap_git->hashes + index_pos);
- show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset);
+ show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs);
}
}
}
@@ -831,11 +835,11 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git,
oi.sizep = &size;
if (pos < pack->num_objects) {
- struct revindex_entry *entry = &pack->revindex[pos];
- if (packed_object_info(the_repository, pack,
- entry->offset, &oi) < 0) {
+ off_t ofs = pack_pos_to_offset(pack, pos);
+ if (packed_object_info(the_repository, pack, ofs, &oi) < 0) {
struct object_id oid;
- nth_packed_object_id(&oid, pack, entry->nr);
+ nth_packed_object_id(&oid, pack,
+ pack_pos_to_index(pack, pos));
die(_("unable to get size of %s"), oid_to_hex(&oid));
}
} else {
@@ -1065,23 +1069,21 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
struct bitmap *reuse,
struct pack_window **w_curs)
{
- struct revindex_entry *revidx;
- off_t offset;
+ off_t offset, header;
enum object_type type;
unsigned long size;
if (pos >= bitmap_git->pack->num_objects)
return; /* not actually in the pack */
- revidx = &bitmap_git->pack->revindex[pos];
- offset = revidx->offset;
+ offset = header = pack_pos_to_offset(bitmap_git->pack, pos);
type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size);
if (type < 0)
return; /* broken packfile, punt */
if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
off_t base_offset;
- int base_pos;
+ uint32_t base_pos;
/*
* Find the position of the base object so we can look it up
@@ -1092,11 +1094,10 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git,
* more detail.
*/
base_offset = get_delta_base(bitmap_git->pack, w_curs,
- &offset, type, revidx->offset);
+ &offset, type, header);
if (!base_offset)
return;
- base_pos = find_revindex_position(bitmap_git->pack, base_offset);
- if (base_pos < 0)
+ if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0)
return;
/*
@@ -1391,11 +1392,10 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
for (i = 0; i < num_objects; ++i) {
struct object_id oid;
- struct revindex_entry *entry;
struct object_entry *oe;
- entry = &bitmap_git->pack->revindex[i];
- nth_packed_object_id(&oid, bitmap_git->pack, entry->nr);
+ nth_packed_object_id(&oid, bitmap_git->pack,
+ pack_pos_to_index(bitmap_git->pack, i));
oe = packlist_find(mapping, &oid);
if (oe)
diff --git a/pack-revindex.c b/pack-revindex.c
index ecdde39cf4..5e69bc7372 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -3,6 +3,11 @@
#include "object-store.h"
#include "packfile.h"
+struct revindex_entry {
+ off_t offset;
+ unsigned int nr;
+};
+
/*
* Pack index for existing packs give us easy access to the offsets into
* corresponding pack file where each object's data starts, but the entries
@@ -169,17 +174,24 @@ int load_pack_revindex(struct packed_git *p)
return 0;
}
-int find_revindex_position(struct packed_git *p, off_t ofs)
+int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
{
- int lo = 0;
- int hi = p->num_objects + 1;
- const struct revindex_entry *revindex = p->revindex;
+ unsigned lo, hi;
+
+ if (load_pack_revindex(p) < 0)
+ return -1;
+
+ lo = 0;
+ hi = p->num_objects + 1;
do {
const unsigned mi = lo + (hi - lo) / 2;
- if (revindex[mi].offset == ofs) {
- return mi;
- } else if (ofs < revindex[mi].offset)
+ off_t got = pack_pos_to_offset(p, mi);
+
+ if (got == ofs) {
+ *pos = mi;
+ return 0;
+ } else if (ofs < got)
hi = mi;
else
lo = mi + 1;
@@ -189,17 +201,20 @@ int find_revindex_position(struct packed_git *p, off_t ofs)
return -1;
}
-struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs)
+uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos)
{
- int pos;
-
- if (load_pack_revindex(p))
- return NULL;
-
- pos = find_revindex_position(p, ofs);
-
- if (pos < 0)
- return NULL;
+ if (!p->revindex)
+ BUG("pack_pos_to_index: reverse index not yet loaded");
+ if (p->num_objects <= pos)
+ BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos);
+ return p->revindex[pos].nr;
+}
- return p->revindex + pos;
+off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos)
+{
+ if (!p->revindex)
+ BUG("pack_pos_to_index: reverse index not yet loaded");
+ if (p->num_objects < pos)
+ BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos);
+ return p->revindex[pos].offset;
}
diff --git a/pack-revindex.h b/pack-revindex.h
index 848331d5d6..6e0320b08b 100644
--- a/pack-revindex.h
+++ b/pack-revindex.h
@@ -1,16 +1,62 @@
#ifndef PACK_REVINDEX_H
#define PACK_REVINDEX_H
-struct packed_git;
+/**
+ * A revindex allows converting efficiently between three properties
+ * of an object within a pack:
+ *
+ * - index position: the numeric position within the list of sorted object ids
+ * found in the .idx file
+ *
+ * - pack position: the numeric position within the list of objects in their
+ * order within the actual .pack file (i.e., 0 is the first object in the
+ * .pack, 1 is the second, and so on)
+ *
+ * - offset: the byte offset within the .pack file at which the object contents
+ * can be found
+ */
-struct revindex_entry {
- off_t offset;
- unsigned int nr;
-};
+struct packed_git;
+/*
+ * load_pack_revindex populates the revindex's internal data-structures for the
+ * given pack, returning zero on success and a negative value otherwise.
+ */
int load_pack_revindex(struct packed_git *p);
-int find_revindex_position(struct packed_git *p, off_t ofs);
-struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs);
+/*
+ * offset_to_pack_pos converts an object offset to a pack position. This
+ * function returns zero on success, and a negative number otherwise. The
+ * parameter 'pos' is usable only on success.
+ *
+ * If the reverse index has not yet been loaded, this function loads it lazily,
+ * and returns an negative number if an error was encountered.
+ *
+ * This function runs in time O(log N) with the number of objects in the pack.
+ */
+int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos);
+
+/*
+ * pack_pos_to_index converts the given pack-relative position 'pos' by
+ * returning an index-relative position.
+ *
+ * If the reverse index has not yet been loaded, or the position is out of
+ * bounds, this function aborts.
+ *
+ * This function runs in constant time.
+ */
+uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos);
+
+/*
+ * pack_pos_to_offset converts the given pack-relative position 'pos' into a
+ * pack offset. For a pack with 'N' objects, asking for position 'N' will return
+ * the total size (in bytes) of the pack.
+ *
+ * If the reverse index has not yet been loaded, or the position is out of
+ * bounds, this function aborts.
+ *
+ * This function runs in constant time.
+ */
+off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos);
#endif
diff --git a/packfile.c b/packfile.c
index 62d92e0c7c..4b938b4372 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1235,18 +1235,18 @@ static int get_delta_base_oid(struct packed_git *p,
oidread(oid, base);
return 0;
} else if (type == OBJ_OFS_DELTA) {
- struct revindex_entry *revidx;
+ uint32_t base_pos;
off_t base_offset = get_delta_base(p, w_curs, &curpos,
type, delta_obj_offset);
if (!base_offset)
return -1;
- revidx = find_pack_revindex(p, base_offset);
- if (!revidx)
+ if (offset_to_pack_pos(p, base_offset, &base_pos) < 0)
return -1;
- return nth_packed_object_id(oid, p, revidx->nr);
+ return nth_packed_object_id(oid, p,
+ pack_pos_to_index(p, base_pos));
} else
return -1;
}
@@ -1256,12 +1256,11 @@ static int retry_bad_packed_offset(struct repository *r,
off_t obj_offset)
{
int type;
- struct revindex_entry *revidx;
+ uint32_t pos;
struct object_id oid;
- revidx = find_pack_revindex(p, obj_offset);
- if (!revidx)
+ if (offset_to_pack_pos(p, obj_offset, &pos) < 0)
return OBJ_BAD;
- nth_packed_object_id(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos));
mark_bad_packed_object(p, oid.hash);
type = oid_object_info(r, &oid, NULL);
if (type <= OBJ_NONE)
@@ -1538,8 +1537,15 @@ int packed_object_info(struct repository *r, struct packed_git *p,
}
if (oi->disk_sizep) {
- struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
- *oi->disk_sizep = revidx[1].offset - obj_offset;
+ uint32_t pos;
+ if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ error("could not find object at offset %"PRIuMAX" "
+ "in pack %s", (uintmax_t)obj_offset, p->pack_name);
+ type = OBJ_BAD;
+ goto out;
+ }
+
+ *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
}
if (oi->typep || oi->type_name) {
@@ -1688,11 +1694,21 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
}
if (do_check_packed_object_crc && p->index_version > 1) {
- struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
- off_t len = revidx[1].offset - obj_offset;
- if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
+ uint32_t pack_pos, index_pos;
+ off_t len;
+
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
+ error("could not find object at offset %"PRIuMAX" in pack %s",
+ (uintmax_t)obj_offset, p->pack_name);
+ data = NULL;
+ goto out;
+ }
+
+ len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+ index_pos = pack_pos_to_index(p, pack_pos);
+ if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) {
struct object_id oid;
- nth_packed_object_id(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, index_pos);
error("bad packed object CRC for %s",
oid_to_hex(&oid));
mark_bad_packed_object(p, oid.hash);
@@ -1775,11 +1791,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* This is costly but should happen only in the presence
* of a corrupted pack, and is better than failing outright.
*/
- struct revindex_entry *revidx;
+ uint32_t pos;
struct object_id base_oid;
- revidx = find_pack_revindex(p, obj_offset);
- if (revidx) {
- nth_packed_object_id(&base_oid, p, revidx->nr);
+ if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
+ nth_packed_object_id(&base_oid, p,
+ pack_pos_to_index(p, pos));
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
oid_to_hex(&base_oid), (uintmax_t)obj_offset,
@@ -2066,19 +2082,31 @@ int for_each_object_in_pack(struct packed_git *p,
}
for (i = 0; i < p->num_objects; i++) {
- uint32_t pos;
+ uint32_t index_pos;
struct object_id oid;
+ /*
+ * We are iterating "i" from 0 up to num_objects, but its
+ * meaning may be different, depending on the requested output
+ * order:
+ *
+ * - in object-name order, it is the same as the index order
+ * used by nth_packed_object_id(), so we can pass it
+ * directly
+ *
+ * - in pack-order, it is pack position, which we must
+ * convert to an index position in order to get the oid.
+ */
if (flags & FOR_EACH_OBJECT_PACK_ORDER)
- pos = p->revindex[i].nr;
+ index_pos = pack_pos_to_index(p, i);
else
- pos = i;
+ index_pos = i;
- if (nth_packed_object_id(&oid, p, pos) < 0)
+ if (nth_packed_object_id(&oid, p, index_pos) < 0)
return error("unable to get sha1 of object %u in %s",
- pos, p->pack_name);
+ index_pos, p->pack_name);
- r = cb(&oid, p, pos, data);
+ r = cb(&oid, p, index_pos, data);
if (r)
break;
}