diff options
| author | Patrick Steinhardt <ps@pks.im> | 2025-10-30 11:38:45 +0100 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-10-30 07:09:53 -0700 |
| commit | c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2 (patch) | |
| tree | e834be384525f7b9546a71453d7b6a862e652a8f | |
| parent | 6aff1f25a046f3dcd8a78b0c61414fa2d1c9a93c (diff) | |
| download | git-c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2.tar.gz | |
packfile: track packs via the MRU list exclusively
We track packfiles via two different lists:
- `struct packfile_store::packs` is a list that sorts local packs
first. In addition, these packs are sorted so that younger packs are
sorted towards the front.
- `struct packfile_store::mru` is a list that sorts packs so that
most-recently used packs are at the front.
The reasoning behind the ordering in the `packs` list is that younger
objects stored in the local object store tend to be accessed more
frequently, and that is certainly true for some cases. But there are
going to be lots of cases where that isn't true. Especially when
traversing history it is likely that one needs to access many older
objects, and due to our housekeeping it is very likely that almost all
of those older objects will be contained in one large pack that is
oldest.
So whether or not the ordering makes sense really depends on the use
case at hand. A flexible approach like our MRU list addresses that need,
as it will sort packs towards the front that are accessed all the time.
Intuitively, this approach is thus able to satisfy more use cases more
efficiently.
This reasoning casts some doubt on whether or not it really makes sense
to track packs via two different lists. It causes confusion, and it is
not clear whether there are use cases where the `packs` list really is
such an obvious choice.
Merge these two lists into one most-recently-used list.
Note that there is one important edge case: `for_each_packed_object()`
uses the MRU list to iterate through packs, and then it lists each
object in those packs. This would have the effect that we now sort the
current pack towards the front, thus modifying the list of packfiles we
are iterating over, with the consequence that we'll see an infinite
loop. This edge case is worked around by introducing a new field that
allows us to skip updating the MRU.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
| -rw-r--r-- | builtin/pack-objects.c | 4 | ||||
| -rw-r--r-- | packfile.c | 27 | ||||
| -rw-r--r-- | packfile.h | 27 |
3 files changed, 26 insertions, 32 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b83eb8ead1..0e4e9f8068 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->mru, p); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index 60f2e42876..378b0b1920 100644 --- a/packfile.c +++ b/packfile.c @@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - packfile_list_prepend(&store->packs, pack); - packfile_list_append(&store->mru, pack); - + packfile_list_append(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a, return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - packfile_list_clear(&store->mru); - - for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) - packfile_list_append(&store->mru, e->pack); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store) if (!e->next) store->packs.tail = e; - packfile_store_prepare_mru(store); store->initialized = true; } @@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->mru.head; -} - /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs.head) return 0; - for (l = r->objects->packfiles->mru.head; l; l = l->next) { + for (l = r->objects->packfiles->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - packfile_list_prepend(&r->objects->packfiles->mru, p); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; + repo->objects->packfiles->skip_mru_updates = true; repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; @@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index d95275e666..27ba607e7c 100644 --- a/packfile.h +++ b/packfile.h @@ -79,8 +79,8 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ struct packfile_list packs; @@ -98,9 +98,6 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct packfile_list mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -112,6 +109,21 @@ struct packfile_store { * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -172,11 +184,6 @@ void packfile_store_add_pack(struct packfile_store *store, struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); /* - * Get all packs in most-recently-used order. - */ -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); - -/* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a * `NULL` pointer in case the packfile could not be opened. |
