diff options
| -rw-r--r-- | builtin/gc.c | 2 | ||||
| -rw-r--r-- | builtin/pack-objects.c | 37 | ||||
| -rw-r--r-- | pack-bitmap.c | 44 | ||||
| -rw-r--r-- | pack-revindex.c | 51 | ||||
| -rw-r--r-- | pack-revindex.h | 60 | ||||
| -rw-r--r-- | packfile.c | 76 |
6 files changed, 185 insertions, 85 deletions
diff --git a/builtin/gc.c b/builtin/gc.c index fe35b10fe9..ae34362e93 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -301,7 +301,7 @@ static uint64_t estimate_repack_memory(struct packed_git *pack) /* and then obj_hash[], underestimated in fact */ heap += sizeof(struct object *) * nr_objects; /* revindex is used also */ - heap += sizeof(struct revindex_entry) * nr_objects; + heap += (sizeof(off_t) + sizeof(uint32_t)) * nr_objects; /* * read_sha1_file() (either at delta calculation phase, or * writing phase) also fills up the delta base cache diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 2a00358f34..5b0c4489e2 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -419,7 +419,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, { struct packed_git *p = IN_PACK(entry); struct pack_window *w_curs = NULL; - struct revindex_entry *revidx; + uint32_t pos; off_t offset; enum object_type type = oe_type(entry); off_t datalen; @@ -436,10 +436,15 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, type, entry_size); offset = entry->in_pack_offset; - revidx = find_pack_revindex(p, offset); - datalen = revidx[1].offset - offset; + if (offset_to_pack_pos(p, offset, &pos) < 0) + die(_("write_reuse_object: could not locate %s, expected at " + "offset %"PRIuMAX" in pack %s"), + oid_to_hex(&entry->idx.oid), (uintmax_t)offset, + p->pack_name); + datalen = pack_pos_to_offset(p, pos + 1) - offset; if (!pack_to_stdout && p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { + check_pack_crc(p, &w_curs, offset, datalen, + pack_pos_to_index(p, pos))) { error(_("bad packed object CRC for %s"), oid_to_hex(&entry->idx.oid)); unuse_pack(&w_curs); @@ -863,8 +868,8 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, enum object_type type; unsigned long size; - offset = reuse_packfile->revindex[pos].offset; - next = reuse_packfile->revindex[pos + 1].offset; + offset = pack_pos_to_offset(reuse_packfile, pos); + next = pack_pos_to_offset(reuse_packfile, pos + 1); record_reused_object(offset, offset - hashfile_total(out)); @@ -884,11 +889,17 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, /* Convert to REF_DELTA if we must... */ if (!allow_ofs_delta) { - int base_pos = find_revindex_position(reuse_packfile, base_offset); + uint32_t base_pos; struct object_id base_oid; + if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0) + die(_("expected object at offset %"PRIuMAX" " + "in pack %s"), + (uintmax_t)base_offset, + reuse_packfile->pack_name); + nth_packed_object_id(&base_oid, reuse_packfile, - reuse_packfile->revindex[base_pos].nr); + pack_pos_to_index(reuse_packfile, base_pos)); len = encode_in_pack_object_header(header, sizeof(header), OBJ_REF_DELTA, size); @@ -941,7 +952,7 @@ static size_t write_reused_pack_verbatim(struct hashfile *out, off_t to_write; written = (pos * BITS_IN_EWORD); - to_write = reuse_packfile->revindex[written].offset + to_write = pack_pos_to_offset(reuse_packfile, written) - sizeof(struct pack_header); /* We're recording one chunk, not one object. */ @@ -1806,11 +1817,11 @@ static void check_object(struct object_entry *entry, uint32_t object_index) goto give_up; } if (reuse_delta && !entry->preferred_base) { - struct revindex_entry *revidx; - revidx = find_pack_revindex(p, ofs); - if (!revidx) + uint32_t pos; + if (offset_to_pack_pos(p, ofs, &pos) < 0) goto give_up; - if (!nth_packed_object_id(&base_ref, p, revidx->nr)) + if (!nth_packed_object_id(&base_ref, p, + pack_pos_to_index(p, pos))) have_base = 1; } entry->in_pack_header_size = used + used_0; diff --git a/pack-bitmap.c b/pack-bitmap.c index d88745fb02..60fe20fb87 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -407,11 +407,14 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, const struct object_id *oid) { + uint32_t pos; off_t offset = find_pack_entry_one(oid->hash, bitmap_git->pack); if (!offset) return -1; - return find_revindex_position(bitmap_git->pack, offset); + if (offset_to_pack_pos(bitmap_git->pack, offset, &pos) < 0) + return -1; + return pos; } static int bitmap_position(struct bitmap_index *bitmap_git, @@ -708,21 +711,22 @@ static void show_objects_for_type( for (offset = 0; offset < BITS_IN_EWORD; ++offset) { struct object_id oid; - struct revindex_entry *entry; - uint32_t hash = 0; + uint32_t hash = 0, index_pos; + off_t ofs; if ((word >> offset) == 0) break; offset += ewah_bit_ctz64(word >> offset); - entry = &bitmap_git->pack->revindex[pos + offset]; - nth_packed_object_id(&oid, bitmap_git->pack, entry->nr); + index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); + ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); + nth_packed_object_id(&oid, bitmap_git->pack, index_pos); if (bitmap_git->hashes) - hash = get_be32(bitmap_git->hashes + entry->nr); + hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, bitmap_git->pack, entry->offset); + show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs); } } } @@ -831,11 +835,11 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, oi.sizep = &size; if (pos < pack->num_objects) { - struct revindex_entry *entry = &pack->revindex[pos]; - if (packed_object_info(the_repository, pack, - entry->offset, &oi) < 0) { + off_t ofs = pack_pos_to_offset(pack, pos); + if (packed_object_info(the_repository, pack, ofs, &oi) < 0) { struct object_id oid; - nth_packed_object_id(&oid, pack, entry->nr); + nth_packed_object_id(&oid, pack, + pack_pos_to_index(pack, pos)); die(_("unable to get size of %s"), oid_to_hex(&oid)); } } else { @@ -1065,23 +1069,21 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, struct bitmap *reuse, struct pack_window **w_curs) { - struct revindex_entry *revidx; - off_t offset; + off_t offset, header; enum object_type type; unsigned long size; if (pos >= bitmap_git->pack->num_objects) return; /* not actually in the pack */ - revidx = &bitmap_git->pack->revindex[pos]; - offset = revidx->offset; + offset = header = pack_pos_to_offset(bitmap_git->pack, pos); type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); if (type < 0) return; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; - int base_pos; + uint32_t base_pos; /* * Find the position of the base object so we can look it up @@ -1092,11 +1094,10 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * more detail. */ base_offset = get_delta_base(bitmap_git->pack, w_curs, - &offset, type, revidx->offset); + &offset, type, header); if (!base_offset) return; - base_pos = find_revindex_position(bitmap_git->pack, base_offset); - if (base_pos < 0) + if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0) return; /* @@ -1391,11 +1392,10 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, for (i = 0; i < num_objects; ++i) { struct object_id oid; - struct revindex_entry *entry; struct object_entry *oe; - entry = &bitmap_git->pack->revindex[i]; - nth_packed_object_id(&oid, bitmap_git->pack, entry->nr); + nth_packed_object_id(&oid, bitmap_git->pack, + pack_pos_to_index(bitmap_git->pack, i)); oe = packlist_find(mapping, &oid); if (oe) diff --git a/pack-revindex.c b/pack-revindex.c index ecdde39cf4..5e69bc7372 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -3,6 +3,11 @@ #include "object-store.h" #include "packfile.h" +struct revindex_entry { + off_t offset; + unsigned int nr; +}; + /* * Pack index for existing packs give us easy access to the offsets into * corresponding pack file where each object's data starts, but the entries @@ -169,17 +174,24 @@ int load_pack_revindex(struct packed_git *p) return 0; } -int find_revindex_position(struct packed_git *p, off_t ofs) +int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) { - int lo = 0; - int hi = p->num_objects + 1; - const struct revindex_entry *revindex = p->revindex; + unsigned lo, hi; + + if (load_pack_revindex(p) < 0) + return -1; + + lo = 0; + hi = p->num_objects + 1; do { const unsigned mi = lo + (hi - lo) / 2; - if (revindex[mi].offset == ofs) { - return mi; - } else if (ofs < revindex[mi].offset) + off_t got = pack_pos_to_offset(p, mi); + + if (got == ofs) { + *pos = mi; + return 0; + } else if (ofs < got) hi = mi; else lo = mi + 1; @@ -189,17 +201,20 @@ int find_revindex_position(struct packed_git *p, off_t ofs) return -1; } -struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs) +uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos) { - int pos; - - if (load_pack_revindex(p)) - return NULL; - - pos = find_revindex_position(p, ofs); - - if (pos < 0) - return NULL; + if (!p->revindex) + BUG("pack_pos_to_index: reverse index not yet loaded"); + if (p->num_objects <= pos) + BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos); + return p->revindex[pos].nr; +} - return p->revindex + pos; +off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos) +{ + if (!p->revindex) + BUG("pack_pos_to_index: reverse index not yet loaded"); + if (p->num_objects < pos) + BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos); + return p->revindex[pos].offset; } diff --git a/pack-revindex.h b/pack-revindex.h index 848331d5d6..6e0320b08b 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -1,16 +1,62 @@ #ifndef PACK_REVINDEX_H #define PACK_REVINDEX_H -struct packed_git; +/** + * A revindex allows converting efficiently between three properties + * of an object within a pack: + * + * - index position: the numeric position within the list of sorted object ids + * found in the .idx file + * + * - pack position: the numeric position within the list of objects in their + * order within the actual .pack file (i.e., 0 is the first object in the + * .pack, 1 is the second, and so on) + * + * - offset: the byte offset within the .pack file at which the object contents + * can be found + */ -struct revindex_entry { - off_t offset; - unsigned int nr; -}; +struct packed_git; +/* + * load_pack_revindex populates the revindex's internal data-structures for the + * given pack, returning zero on success and a negative value otherwise. + */ int load_pack_revindex(struct packed_git *p); -int find_revindex_position(struct packed_git *p, off_t ofs); -struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs); +/* + * offset_to_pack_pos converts an object offset to a pack position. This + * function returns zero on success, and a negative number otherwise. The + * parameter 'pos' is usable only on success. + * + * If the reverse index has not yet been loaded, this function loads it lazily, + * and returns an negative number if an error was encountered. + * + * This function runs in time O(log N) with the number of objects in the pack. + */ +int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos); + +/* + * pack_pos_to_index converts the given pack-relative position 'pos' by + * returning an index-relative position. + * + * If the reverse index has not yet been loaded, or the position is out of + * bounds, this function aborts. + * + * This function runs in constant time. + */ +uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos); + +/* + * pack_pos_to_offset converts the given pack-relative position 'pos' into a + * pack offset. For a pack with 'N' objects, asking for position 'N' will return + * the total size (in bytes) of the pack. + * + * If the reverse index has not yet been loaded, or the position is out of + * bounds, this function aborts. + * + * This function runs in constant time. + */ +off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos); #endif diff --git a/packfile.c b/packfile.c index 62d92e0c7c..4b938b4372 100644 --- a/packfile.c +++ b/packfile.c @@ -1235,18 +1235,18 @@ static int get_delta_base_oid(struct packed_git *p, oidread(oid, base); return 0; } else if (type == OBJ_OFS_DELTA) { - struct revindex_entry *revidx; + uint32_t base_pos; off_t base_offset = get_delta_base(p, w_curs, &curpos, type, delta_obj_offset); if (!base_offset) return -1; - revidx = find_pack_revindex(p, base_offset); - if (!revidx) + if (offset_to_pack_pos(p, base_offset, &base_pos) < 0) return -1; - return nth_packed_object_id(oid, p, revidx->nr); + return nth_packed_object_id(oid, p, + pack_pos_to_index(p, base_pos)); } else return -1; } @@ -1256,12 +1256,11 @@ static int retry_bad_packed_offset(struct repository *r, off_t obj_offset) { int type; - struct revindex_entry *revidx; + uint32_t pos; struct object_id oid; - revidx = find_pack_revindex(p, obj_offset); - if (!revidx) + if (offset_to_pack_pos(p, obj_offset, &pos) < 0) return OBJ_BAD; - nth_packed_object_id(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); mark_bad_packed_object(p, oid.hash); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) @@ -1538,8 +1537,15 @@ int packed_object_info(struct repository *r, struct packed_git *p, } if (oi->disk_sizep) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - *oi->disk_sizep = revidx[1].offset - obj_offset; + uint32_t pos; + if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + error("could not find object at offset %"PRIuMAX" " + "in pack %s", (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; + goto out; + } + + *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; } if (oi->typep || oi->type_name) { @@ -1688,11 +1694,21 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, } if (do_check_packed_object_crc && p->index_version > 1) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - off_t len = revidx[1].offset - obj_offset; - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + uint32_t pack_pos, index_pos; + off_t len; + + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { + error("could not find object at offset %"PRIuMAX" in pack %s", + (uintmax_t)obj_offset, p->pack_name); + data = NULL; + goto out; + } + + len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + index_pos = pack_pos_to_index(p, pack_pos); + if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) { struct object_id oid; - nth_packed_object_id(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); mark_bad_packed_object(p, oid.hash); @@ -1775,11 +1791,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, * This is costly but should happen only in the presence * of a corrupted pack, and is better than failing outright. */ - struct revindex_entry *revidx; + uint32_t pos; struct object_id base_oid; - revidx = find_pack_revindex(p, obj_offset); - if (revidx) { - nth_packed_object_id(&base_oid, p, revidx->nr); + if (!(offset_to_pack_pos(p, obj_offset, &pos))) { + nth_packed_object_id(&base_oid, p, + pack_pos_to_index(p, pos)); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, @@ -2066,19 +2082,31 @@ int for_each_object_in_pack(struct packed_git *p, } for (i = 0; i < p->num_objects; i++) { - uint32_t pos; + uint32_t index_pos; struct object_id oid; + /* + * We are iterating "i" from 0 up to num_objects, but its + * meaning may be different, depending on the requested output + * order: + * + * - in object-name order, it is the same as the index order + * used by nth_packed_object_id(), so we can pass it + * directly + * + * - in pack-order, it is pack position, which we must + * convert to an index position in order to get the oid. + */ if (flags & FOR_EACH_OBJECT_PACK_ORDER) - pos = p->revindex[i].nr; + index_pos = pack_pos_to_index(p, i); else - pos = i; + index_pos = i; - if (nth_packed_object_id(&oid, p, pos) < 0) + if (nth_packed_object_id(&oid, p, index_pos) < 0) return error("unable to get sha1 of object %u in %s", - pos, p->pack_name); + index_pos, p->pack_name); - r = cb(&oid, p, pos, data); + r = cb(&oid, p, index_pos, data); if (r) break; } |
