object-file: refactor writing objects via a stream

We have two different ways to write an object into the database: - We either provide the full buffer and write the object all at once. - Or we provide an input stream that has a `read()` function so that we can chunk the object. The latter is especially used for large objects, where it may be too expensive to hold the complete object in memory all at once. While we already have `odb_write_object()` at the ODB-layer, we don't have an equivalent for streaming an object. Introduce a new function `odb_write_object_stream()` to address this gap so that callers don't have to be aware of the inner workings of how to stream an object to disk with a specific object source. Rename `stream_loose_object()` to `odb_source_loose_write_stream()` to clarify its scope. This matches our modern best practices around how to name functions. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: Patrick Steinhardt <ps@pks.im> 2025-11-03 08:42:08 +0100
committer: Junio C Hamano <gitster@pobox.com> 2025-11-03 12:18:48 -0800
commit: 3e5e360888316ed1a44da69bf134bb6ec70aee1b (patch)
tree: b1d04c153f051a909c4f57f522c3e4120735410e
parent: bfb1b2b4ac5cfa99f7d2503b404d282714d84bdf (diff)
download: git-3e5e360888316ed1a44da69bf134bb6ec70aee1b.tar.gz
5 files changed, 27 insertions, 17 deletions
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index ef79e43715..6fc64e9e4b 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -363,7 +363,7 @@ struct input_zstream_data {
 	int status;
 };
 
-static const void *feed_input_zstream(struct input_stream *in_stream,
+static const void *feed_input_zstream(struct odb_write_stream *in_stream,
 				      unsigned long *readlen)
 {
 	struct input_zstream_data *data = in_stream->data;
@@ -393,7 +393,7 @@ static void stream_blob(unsigned long size, unsigned nr)
 {
 	git_zstream zstream = { 0 };
 	struct input_zstream_data data = { 0 };
-	struct input_stream in_stream = {
+	struct odb_write_stream in_stream = {
 		.read = feed_input_zstream,
 		.data = &data,
 	};
@@ -402,8 +402,7 @@ static void stream_blob(unsigned long size, unsigned nr)
 	data.zstream = &zstream;
 	git_inflate_init(&zstream);
 
-	if (stream_loose_object(the_repository->objects->sources,
-				&in_stream, size, &info->oid))
+	if (odb_write_object_stream(the_repository->objects, &in_stream, size, &info->oid))
 		die(_("failed to write object in stream"));
 
 	if (data.status != Z_STREAM_END)
diff --git a/object-file.c b/object-file.c
index fdc644a427..811c569ed3 100644
--- a/object-file.c
+++ b/object-file.c
@@ -974,9 +974,9 @@ int odb_source_loose_freshen_object(struct odb_source *source,
 	return !!check_and_freshen_source(source, oid, 1);
 }
 
-int stream_loose_object(struct odb_source *source,
-			struct input_stream *in_stream, size_t len,
-			struct object_id *oid)
+int odb_source_loose_write_stream(struct odb_source *source,
+				  struct odb_write_stream *in_stream, size_t len,
+				  struct object_id *oid)
 {
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	struct object_id compat_oid;
diff --git a/object-file.h b/object-file.h
index 36a60e15c4..eeffa67bbd 100644
--- a/object-file.h
+++ b/object-file.h
@@ -67,6 +67,10 @@ int odb_source_loose_write_object(struct odb_source *source,
 				  enum object_type type, struct object_id *oid,
 				  struct object_id *compat_oid_in, unsigned flags);
 
+int odb_source_loose_write_stream(struct odb_source *source,
+				  struct odb_write_stream *stream, size_t len,
+				  struct object_id *oid);
+
 /*
  * Populate and return the loose object cache array corresponding to the
  * given object ID.
@@ -173,16 +177,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 struct object_info;
 int parse_loose_header(const char *hdr, struct object_info *oi);
 
-struct input_stream {
-	const void *(*read)(struct input_stream *, unsigned long *len);
-	void *data;
-	int is_finished;
-};
-
-int stream_loose_object(struct odb_source *source,
-			struct input_stream *in_stream, size_t len,
-			struct object_id *oid);
-
 int force_object_loose(struct odb_source *source,
 		       const struct object_id *oid, time_t mtime);
 
diff --git a/odb.c b/odb.c
index da44f1d63b..3ec21ef24e 100644
--- a/odb.c
+++ b/odb.c
@@ -1025,6 +1025,13 @@ int odb_write_object_ext(struct object_database *odb,
 					     oid, compat_oid, flags);
 }
 
+int odb_write_object_stream(struct object_database *odb,
+			    struct odb_write_stream *stream, size_t len,
+			    struct object_id *oid)
+{
+	return odb_source_loose_write_stream(odb->sources, stream, len, oid);
+}
+
 struct object_database *odb_new(struct repository *repo)
 {
 	struct object_database *o = xmalloc(sizeof(*o));
diff --git a/odb.h b/odb.h
index 2653247e0c..9bb28008b1 100644
--- a/odb.h
+++ b/odb.h
@@ -492,4 +492,14 @@ static inline int odb_write_object(struct object_database *odb,
 	return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0);
 }
 
+struct odb_write_stream {
+	const void *(*read)(struct odb_write_stream *, unsigned long *len);
+	void *data;
+	int is_finished;
+};
+
+int odb_write_object_stream(struct object_database *odb,
+			    struct odb_write_stream *stream, size_t len,
+			    struct object_id *oid);
+
 #endif /* ODB_H */
author	Patrick Steinhardt <ps@pks.im>	2025-11-03 08:42:08 +0100
committer	Junio C Hamano <gitster@pobox.com>	2025-11-03 12:18:48 -0800
commit	3e5e360888316ed1a44da69bf134bb6ec70aee1b (patch)
tree	b1d04c153f051a909c4f57f522c3e4120735410e
parent	bfb1b2b4ac5cfa99f7d2503b404d282714d84bdf (diff)
download	git-3e5e360888316ed1a44da69bf134bb6ec70aee1b.tar.gz