aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2025-10-30 11:38:45 +0100
committerJunio C Hamano <gitster@pobox.com>2025-10-30 07:09:53 -0700
commitc31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2 (patch)
treee834be384525f7b9546a71453d7b6a862e652a8f
parent6aff1f25a046f3dcd8a78b0c61414fa2d1c9a93c (diff)
downloadgit-c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2.tar.gz
packfile: track packs via the MRU list exclusively
We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--builtin/pack-objects.c4
-rw-r--r--packfile.c27
-rw-r--r--packfile.h27
3 files changed, 26 insertions, 32 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index b83eb8ead1..0e4e9f8068 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
}
}
- for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) {
+ for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) {
struct packed_git *p = e->pack;
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
if (!exclude && want > 0)
- packfile_list_prepend(&the_repository->objects->packfiles->mru, p);
+ packfile_list_prepend(&the_repository->objects->packfiles->packs, p);
if (want != -1)
return want;
}
diff --git a/packfile.c b/packfile.c
index 60f2e42876..378b0b1920 100644
--- a/packfile.c
+++ b/packfile.c
@@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store,
if (pack->pack_fd != -1)
pack_open_fds++;
- packfile_list_prepend(&store->packs, pack);
- packfile_list_append(&store->mru, pack);
-
+ packfile_list_append(&store->packs, pack);
strmap_put(&store->packs_by_path, pack->pack_name, pack);
}
@@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a,
return -1;
}
-static void packfile_store_prepare_mru(struct packfile_store *store)
-{
- packfile_list_clear(&store->mru);
-
- for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
- packfile_list_append(&store->mru, e->pack);
-}
-
void packfile_store_prepare(struct packfile_store *store)
{
struct odb_source *source;
@@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store)
if (!e->next)
store->packs.tail = e;
- packfile_store_prepare_mru(store);
store->initialized = true;
}
@@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor
return store->packs.head;
}
-struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store)
-{
- packfile_store_prepare(store);
- return store->mru.head;
-}
-
/*
* Give a fast, rough count of the number of objects in the repository. This
* ignores loose objects completely. If you have a lot of them, then either
@@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
if (!r->objects->packfiles->packs.head)
return 0;
- for (l = r->objects->packfiles->mru.head; l; l = l->next) {
+ for (l = r->objects->packfiles->packs.head; l; l = l->next) {
struct packed_git *p = l->pack;
if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {
- packfile_list_prepend(&r->objects->packfiles->mru, p);
+ if (!r->objects->packfiles->skip_mru_updates)
+ packfile_list_prepend(&r->objects->packfiles->packs, p);
return 1;
}
}
@@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
int r = 0;
int pack_errors = 0;
+ repo->objects->packfiles->skip_mru_updates = true;
repo_for_each_pack(repo, p) {
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
@@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
if (r)
break;
}
+ repo->objects->packfiles->skip_mru_updates = false;
+
return r ? r : pack_errors;
}
diff --git a/packfile.h b/packfile.h
index d95275e666..27ba607e7c 100644
--- a/packfile.h
+++ b/packfile.h
@@ -79,8 +79,8 @@ struct packfile_store {
struct object_database *odb;
/*
- * The list of packfiles in the order in which they are being added to
- * the store.
+ * The list of packfiles in the order in which they have been most
+ * recently used.
*/
struct packfile_list packs;
@@ -98,9 +98,6 @@ struct packfile_store {
unsigned flags;
} kept_cache;
- /* A most-recently-used ordered version of the packs list. */
- struct packfile_list mru;
-
/*
* A map of packfile names to packed_git structs for tracking which
* packs have been loaded already.
@@ -112,6 +109,21 @@ struct packfile_store {
* packs.
*/
bool initialized;
+
+ /*
+ * Usually, packfiles will be reordered to the front of the `packs`
+ * list whenever an object is looked up via them. This has the effect
+ * that packs that contain a lot of accessed objects will be located
+ * towards the front.
+ *
+ * This is usually desireable, but there are exceptions. One exception
+ * is when the looking up multiple objects in a loop for each packfile.
+ * In that case, we may easily end up with an infinite loop as the
+ * packfiles get reordered to the front repeatedly.
+ *
+ * Setting this field to `true` thus disables these reorderings.
+ */
+ bool skip_mru_updates;
};
/*
@@ -172,11 +184,6 @@ void packfile_store_add_pack(struct packfile_store *store,
struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store);
/*
- * Get all packs in most-recently-used order.
- */
-struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store);
-
-/*
* Open the packfile and add it to the store if it isn't yet known. Returns
* either the newly opened packfile or the preexisting packfile. Returns a
* `NULL` pointer in case the packfile could not be opened.