Merge branch 'ps/object-read-stream'

The "git_istream" abstraction has been revamped to make it easier to interface with pluggable object database design. * ps/object-read-stream: streaming: drop redundant type and size pointers streaming: move into object database subsystem streaming: refactor interface to be object-database-centric streaming: move logic to read packed objects streams into backend streaming: move logic to read loose objects streams into backend streaming: make the `odb_read_stream` definition public streaming: get rid of `the_repository` streaming: rely on object sources to create object stream packfile: introduce function to read object info from a store streaming: move zlib stream into backends streaming: create structure for filtered object streams streaming: create structure for packed object streams streaming: create structure for loose object streams streaming: create structure for in-core object streams streaming: allocate stream inside the backend-specific logic streaming: explicitly pass packfile info when streaming a packed object streaming: propagate final object type via the stream streaming: drop the `open()` callback function streaming: rename `git_istream` into `odb_read_stream`
author: Junio C Hamano <gitster@pobox.com> 2025-12-16 11:08:34 +0900
committer: Junio C Hamano <gitster@pobox.com> 2025-12-16 11:08:34 +0900
commit: dbe54273a707406409fa386db5b105557b31a831 (patch)
tree: 0def4e76638eff5ca5628d3cb2d0acb157c7bf37
parent: d8af7cadaa79d5837d73ec949e10b57dedb43e9b (diff)
parent: 7b940286527ec2175dffbb317f47e080bb37cf3e (diff)
download: git-dbe54273a707406409fa386db5b105557b31a831.tar.gz
20 files changed, 779 insertions, 729 deletions
diff --git a/Makefile b/Makefile
index 6fc322ff88..291e4a7219 100644
--- a/Makefile
+++ b/Makefile
@@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o
 LIB_OBJS += object-name.o
 LIB_OBJS += object.o
 LIB_OBJS += odb.o
+LIB_OBJS += odb/streaming.o
 LIB_OBJS += oid-array.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
@@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o
 LIB_OBJS += stable-qsort.o
 LIB_OBJS += statinfo.o
 LIB_OBJS += strbuf.o
-LIB_OBJS += streaming.o
 LIB_OBJS += string-list.o
 LIB_OBJS += strmap.o
 LIB_OBJS += strvec.o
diff --git a/archive-tar.c b/archive-tar.c
index 73b63ddc41..0fc70d13a8 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -12,8 +12,8 @@
 #include "tar.h"
 #include "archive.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "strbuf.h"
-#include "streaming.h"
 #include "run-command.h"
 #include "write-or-die.h"
 
@@ -129,22 +129,20 @@ static void write_trailer(void)
  */
 static int stream_blocked(struct repository *r, const struct object_id *oid)
 {
-	struct git_istream *st;
-	enum object_type type;
-	unsigned long sz;
+	struct odb_read_stream *st;
 	char buf[BLOCKSIZE];
 	ssize_t readlen;
 
-	st = open_istream(r, oid, &type, &sz, NULL);
+	st = odb_read_stream_open(r->objects, oid, NULL);
 	if (!st)
 		return error(_("cannot stream blob %s"), oid_to_hex(oid));
 	for (;;) {
-		readlen = read_istream(st, buf, sizeof(buf));
+		readlen = odb_read_stream_read(st, buf, sizeof(buf));
 		if (readlen <= 0)
 			break;
 		do_write_blocked(buf, readlen);
 	}
-	close_istream(st);
+	odb_read_stream_close(st);
 	if (!readlen)
 		finish_record();
 	return readlen;
diff --git a/archive-zip.c b/archive-zip.c
index bea5bdd43d..97ea8d60d6 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,9 +10,9 @@
 #include "gettext.h"
 #include "git-zlib.h"
 #include "hex.h"
-#include "streaming.h"
 #include "utf8.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "strbuf.h"
 #include "userdiff.h"
 #include "write-or-die.h"
@@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args,
 	enum zip_method method;
 	unsigned char *out;
 	void *deflated = NULL;
-	struct git_istream *stream = NULL;
+	struct odb_read_stream *stream = NULL;
 	unsigned long flags = 0;
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
@@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args,
 			method = ZIP_METHOD_DEFLATE;
 
 		if (!buffer) {
-			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
-					      NULL);
+			stream = odb_read_stream_open(args->repo->objects, oid, NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
 					     oid_to_hex(oid));
+			size = stream->size;
 			flags |= ZIP_STREAM;
 			out = NULL;
 		} else {
@@ -429,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args,
 		ssize_t readlen;
 
 		for (;;) {
-			readlen = read_istream(stream, buf, sizeof(buf));
+			readlen = odb_read_stream_read(stream, buf, sizeof(buf));
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
@@ -439,7 +438,7 @@ static int write_zip_entry(struct archiver_args *args,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
 		}
-		close_istream(stream);
+		odb_read_stream_close(stream);
 		if (readlen)
 			return readlen;
 
@@ -462,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args,
 		zstream.avail_out = sizeof(compressed);
 
 		for (;;) {
-			readlen = read_istream(stream, buf, sizeof(buf));
+			readlen = odb_read_stream_read(stream, buf, sizeof(buf));
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
@@ -486,7 +485,7 @@ static int write_zip_entry(struct archiver_args *args,
 			}
 
 		}
-		close_istream(stream);
+		odb_read_stream_close(stream);
 		if (readlen)
 			return readlen;
 
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 983ecec837..505ddaa12f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -18,13 +18,13 @@
 #include "list-objects-filter-options.h"
 #include "parse-options.h"
 #include "userdiff.h"
-#include "streaming.h"
 #include "oid-array.h"
 #include "packfile.h"
 #include "pack-bitmap.h"
 #include "object-file.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "replace-object.h"
 #include "promisor-remote.h"
 #include "mailmap.h"
@@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode,
 
 static int stream_blob(const struct object_id *oid)
 {
-	if (stream_blob_to_fd(1, oid, NULL, 0))
+	if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0))
 		die("unable to stream %s to stdout", oid_to_hex(oid));
 	return 0;
 }
diff --git a/builtin/fsck.c b/builtin/fsck.c
index c489582faa..4979bc795e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -13,11 +13,11 @@
 #include "fsck.h"
 #include "parse-options.h"
 #include "progress.h"
-#include "streaming.h"
 #include "packfile.h"
 #include "object-file.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "path.h"
 #include "read-cache-ll.h"
 #include "replace-object.h"
@@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj)
 			}
 			f = xfopen(filename, "w");
 			if (obj->type == OBJ_BLOB) {
-				if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1))
+				if (odb_stream_blob_to_fd(the_repository->objects, fileno(f),
+							  &obj->oid, NULL, 1))
 					die_errno(_("could not write '%s'"), filename);
 			} else
 				fprintf(f, "%s\n", describe_object(&obj->oid));
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 699fe678cd..a7e901e49c 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -16,12 +16,12 @@
 #include "progress.h"
 #include "fsck.h"
 #include "strbuf.h"
-#include "streaming.h"
 #include "thread-utils.h"
 #include "packfile.h"
 #include "pack-revindex.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "oid-array.h"
 #include "oidset.h"
 #include "path.h"
@@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid,
 
 struct compare_data {
 	struct object_entry *entry;
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	unsigned char *buf;
 	unsigned long buf_size;
 };
@@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
 	}
 
 	while (size) {
-		ssize_t len = read_istream(data->st, data->buf, size);
+		ssize_t len = odb_read_stream_read(data->st, data->buf, size);
 		if (len == 0)
 			die(_("SHA1 COLLISION FOUND WITH %s !"),
 			    oid_to_hex(&data->entry->idx.oid));
@@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
 static int check_collison(struct object_entry *entry)
 {
 	struct compare_data data;
-	enum object_type type;
-	unsigned long size;
 
 	if (entry->size <= repo_settings_get_big_file_threshold(the_repository) ||
 	    entry->type != OBJ_BLOB)
@@ -807,15 +805,14 @@ static int check_collison(struct object_entry *entry)
 
 	memset(&data, 0, sizeof(data));
 	data.entry = entry;
-	data.st = open_istream(the_repository, &entry->idx.oid, &type, &size,
-			       NULL);
+	data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL);
 	if (!data.st)
 		return -1;
-	if (size != entry->size || type != entry->type)
+	if (data.st->size != entry->size || data.st->type != entry->type)
 		die(_("SHA1 COLLISION FOUND WITH %s !"),
 		    oid_to_hex(&entry->idx.oid));
 	unpack_data(entry, compare_objects, &data);
-	close_istream(data.st);
+	odb_read_stream_close(data.st);
 	free(data.buf);
 	return 0;
 }
diff --git a/builtin/log.c b/builtin/log.c
index c8319b8af3..d4cf9c59c8 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -16,6 +16,7 @@
 #include "refs.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "pager.h"
 #include "color.h"
 #include "commit.h"
@@ -35,7 +36,6 @@
 #include "parse-options.h"
 #include "line-log.h"
 #include "branch.h"
-#include "streaming.h"
 #include "version.h"
 #include "mailmap.h"
 #include "progress.h"
@@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 	fflush(rev->diffopt.file);
 	if (!rev->diffopt.flags.textconv_set_via_cmdline ||
 	    !rev->diffopt.flags.allow_textconv)
-		return stream_blob_to_fd(1, oid, NULL, 0);
+		return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
 
 	if (get_oid_with_context(the_repository, obj_name,
 				 GET_OID_RECORD_PATH,
@@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 	    !textconv_object(the_repository, obj_context.path,
 			     obj_context.mode, &oidc, 1, &buf, &size)) {
 		object_context_release(&obj_context);
-		return stream_blob_to_fd(1, oid, NULL, 0);
+		return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
 	}
 
 	if (!buf)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 7937106ec5..1ce8d6ee21 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -22,7 +22,6 @@
 #include "pack-objects.h"
 #include "progress.h"
 #include "refs.h"
-#include "streaming.h"
 #include "thread-utils.h"
 #include "pack-bitmap.h"
 #include "delta-islands.h"
@@ -33,6 +32,7 @@
 #include "packfile.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "replace-object.h"
 #include "dir.h"
 #include "midx.h"
@@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size)
 	return stream.total_out;
 }
 
-static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
+static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f,
 					   const struct object_id *oid)
 {
 	git_zstream stream;
@@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct hashfi
 	for (;;) {
 		ssize_t readlen;
 		int zret = Z_OK;
-		readlen = read_istream(st, ibuf, sizeof(ibuf));
+		readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf));
 		if (readlen == -1)
 			die(_("unable to read %s"), oid_to_hex(oid));
 
@@ -513,17 +513,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	unsigned hdrlen;
 	enum object_type type;
 	void *buf;
-	struct git_istream *st = NULL;
+	struct odb_read_stream *st = NULL;
 	const unsigned hashsz = the_hash_algo->rawsz;
 
 	if (!usable_delta) {
 		if (oe_type(entry) == OBJ_BLOB &&
 		    oe_size_greater_than(&to_pack, entry,
 					 repo_settings_get_big_file_threshold(the_repository)) &&
-		    (st = open_istream(the_repository, &entry->idx.oid, &type,
-				       &size, NULL)) != NULL)
+		    (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
+					       NULL)) != NULL) {
 			buf = NULL;
-		else {
+			type = st->type;
+			size = st->size;
+		} else {
 			buf = odb_read_object(the_repository->objects,
 					      &entry->idx.oid, &type,
 					      &size);
@@ -577,7 +579,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 			dheader[--pos] = 128 | (--ofs & 127);
 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -591,7 +593,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		 */
 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -601,7 +603,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -609,7 +611,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	}
 	if (st) {
 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
-		close_istream(st);
+		odb_read_stream_close(st);
 	} else {
 		hashwrite(f, buf, datalen);
 		free(buf);
diff --git a/entry.c b/entry.c
index cae02eb503..7817aee362 100644
--- a/entry.c
+++ b/entry.c
@@ -2,13 +2,13 @@
 
 #include "git-compat-util.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "dir.h"
 #include "environment.h"
 #include "gettext.h"
 #include "hex.h"
 #include "name-hash.h"
 #include "sparse-index.h"
-#include "streaming.h"
 #include "submodule.h"
 #include "symlinks.h"
 #include "progress.h"
@@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path,
 	if (fd < 0)
 		return -1;
 
-	result |= stream_blob_to_fd(fd, &ce->oid, filter, 1);
+	result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1);
 	*fstat_done = fstat_checkout_output(fd, state, statbuf);
 	result |= close(fd);
 
diff --git a/meson.build b/meson.build
index 00ad8a5c60..95f4c5ee95 100644
--- a/meson.build
+++ b/meson.build
@@ -397,6 +397,7 @@ libgit_sources = [
   'object-name.c',
   'object.c',
   'odb.c',
+  'odb/streaming.c',
   'oid-array.c',
   'oidmap.c',
   'oidset.c',
@@ -490,7 +491,6 @@ libgit_sources = [
   'stable-qsort.c',
   'statinfo.c',
   'strbuf.c',
-  'streaming.c',
   'string-list.c',
   'strmap.c',
   'strvec.c',
diff --git a/object-file.c b/object-file.c
index 84c9249dab..af1c3f972d 100644
--- a/object-file.c
+++ b/object-file.c
@@ -20,13 +20,13 @@
 #include "object-file-convert.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "oidtree.h"
 #include "pack.h"
 #include "packfile.h"
 #include "path.h"
 #include "read-cache-ll.h"
 #include "setup.h"
-#include "streaming.h"
 #include "tempfile.h"
 #include "tmp-objdir.h"
 
@@ -132,29 +132,27 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 int stream_object_signature(struct repository *r, const struct object_id *oid)
 {
 	struct object_id real_oid;
-	unsigned long size;
-	enum object_type obj_type;
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	struct git_hash_ctx c;
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
 
-	st = open_istream(r, oid, &obj_type, &size, NULL);
+	st = odb_read_stream_open(r->objects, oid, NULL);
 	if (!st)
 		return -1;
 
 	/* Generate the header */
-	hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
+	hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
 
 	/* Sha1.. */
 	r->hash_algo->init_fn(&c);
 	git_hash_update(&c, hdr, hdrlen);
 	for (;;) {
 		char buf[1024 * 16];
-		ssize_t readlen = read_istream(st, buf, sizeof(buf));
+		ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
 
 		if (readlen < 0) {
-			close_istream(st);
+			odb_read_stream_close(st);
 			return -1;
 		}
 		if (!readlen)
@@ -162,7 +160,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 		git_hash_update(&c, buf, readlen);
 	}
 	git_hash_final_oid(&real_oid, &c);
-	close_istream(st);
+	odb_read_stream_close(st);
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
 
@@ -234,9 +232,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
 	return map;
 }
 
-void *odb_source_loose_map_object(struct odb_source *source,
-				  const struct object_id *oid,
-				  unsigned long *size)
+static void *odb_source_loose_map_object(struct odb_source *source,
+					 const struct object_id *oid,
+					 unsigned long *size)
 {
 	const char *p;
 	int fd = open_loose_object(source->loose, oid, &p);
@@ -246,11 +244,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
 	return map_fd(fd, p, size);
 }
 
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-						    unsigned char *map,
-						    unsigned long mapsize,
-						    void *buffer,
-						    unsigned long bufsiz)
+enum unpack_loose_header_result {
+	ULHR_OK,
+	ULHR_BAD,
+	ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+							   unsigned char *map,
+							   unsigned long mapsize,
+							   void *buffer,
+							   unsigned long bufsiz)
 {
 	int status;
 
@@ -329,11 +345,18 @@ static void *unpack_loose_rest(git_zstream *stream,
 }
 
 /*
+ * parse_loose_header() parses the starting "<type> <len>\0" of an
+ * object. If it doesn't follow that format -1 is returned. To check
+ * the validity of the <type> populate the "typep" in the "struct
+ * object_info". It will be OBJ_BAD if the object type is unknown. The
+ * parsed <len> can be retrieved via "oi->sizep", and from there
+ * passed to unpack_loose_rest().
+ *
  * We used to just use "sscanf()", but that's actually way
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_loose_header(const char *hdr, struct object_info *oi)
+static int parse_loose_header(const char *hdr, struct object_info *oi)
 {
 	const char *type_buf = hdr;
 	size_t size;
@@ -1980,3 +2003,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
 	loose_object_map_clear(&loose->map);
 	free(loose);
 }
+
+struct odb_loose_read_stream {
+	struct odb_read_stream base;
+	git_zstream z;
+	enum {
+		ODB_LOOSE_READ_STREAM_INUSE,
+		ODB_LOOSE_READ_STREAM_DONE,
+		ODB_LOOSE_READ_STREAM_ERROR,
+	} z_state;
+	void *mapped;
+	unsigned long mapsize;
+	char hdr[32];
+	int hdr_avail;
+	int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+	size_t total_read = 0;
+
+	switch (st->z_state) {
+	case ODB_LOOSE_READ_STREAM_DONE:
+		return 0;
+	case ODB_LOOSE_READ_STREAM_ERROR:
+		return -1;
+	default:
+		break;
+	}
+
+	if (st->hdr_used < st->hdr_avail) {
+		size_t to_copy = st->hdr_avail - st->hdr_used;
+		if (sz < to_copy)
+			to_copy = sz;
+		memcpy(buf, st->hdr + st->hdr_used, to_copy);
+		st->hdr_used += to_copy;
+		total_read += to_copy;
+	}
+
+	while (total_read < sz) {
+		int status;
+
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		status = git_inflate(&st->z, Z_FINISH);
+
+		total_read = st->z.next_out - (unsigned char *)buf;
+
+		if (status == Z_STREAM_END) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+			break;
+		}
+		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+			return -1;
+		}
+	}
+	return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
+	munmap(st->mapped, st->mapsize);
+	return 0;
+}
+
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+					struct odb_source *source,
+					const struct object_id *oid)
+{
+	struct object_info oi = OBJECT_INFO_INIT;
+	struct odb_loose_read_stream *st;
+	unsigned long mapsize;
+	void *mapped;
+
+	mapped = odb_source_loose_map_object(source, oid, &mapsize);
+	if (!mapped)
+		return -1;
+
+	/*
+	 * Note: we must allocate this structure early even though we may still
+	 * fail. This is because we need to initialize the zlib stream, and it
+	 * is not possible to copy the stream around after the fact because it
+	 * has self-referencing pointers.
+	 */
+	CALLOC_ARRAY(st, 1);
+
+	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+				    sizeof(st->hdr))) {
+	case ULHR_OK:
+		break;
+	case ULHR_BAD:
+	case ULHR_TOO_LONG:
+		goto error;
+	}
+
+	oi.sizep = &st->base.size;
+	oi.typep = &st->base.type;
+
+	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+		goto error;
+
+	st->mapped = mapped;
+	st->mapsize = mapsize;
+	st->hdr_used = strlen(st->hdr) + 1;
+	st->hdr_avail = st->z.total_out;
+	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+	st->base.close = close_istream_loose;
+	st->base.read = read_istream_loose;
+
+	*out = &st->base;
+
+	return 0;
+error:
+	git_inflate_end(&st->z);
+	munmap(st->mapped, st->mapsize);
+	free(st);
+	return -1;
+}
diff --git a/object-file.h b/object-file.h
index eeffa67bbd..1229d5f675 100644
--- a/object-file.h
+++ b/object-file.h
@@ -16,6 +16,8 @@ enum {
 int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
 int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
 
+struct object_info;
+struct odb_read_stream;
 struct odb_source;
 
 struct odb_source_loose {
@@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
 				      const struct object_id *oid,
 				      struct object_info *oi, int flags);
 
-void *odb_source_loose_map_object(struct odb_source *source,
-				  const struct object_id *oid,
-				  unsigned long *size);
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+					struct odb_source *source,
+					const struct object_id *oid);
 
 /*
  * Return true iff an object database source has a loose object
@@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
 int format_object_header(char *str, size_t size, enum object_type type,
 			 size_t objsize);
 
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-enum unpack_loose_header_result {
-	ULHR_OK,
-	ULHR_BAD,
-	ULHR_TOO_LONG,
-};
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-						    unsigned char *map,
-						    unsigned long mapsize,
-						    void *buffer,
-						    unsigned long bufsiz);
-
-/**
- * parse_loose_header() parses the starting "<type> <len>\0" of an
- * object. If it doesn't follow that format -1 is returned. To check
- * the validity of the <type> populate the "typep" in the "struct
- * object_info". It will be OBJ_BAD if the object type is unknown. The
- * parsed <len> can be retrieved via "oi->sizep", and from there
- * passed to unpack_loose_rest().
- */
-struct object_info;
-int parse_loose_header(const char *hdr, struct object_info *oi);
-
 int force_object_loose(struct odb_source *source,
 		       const struct object_id *oid, time_t mtime);
 
diff --git a/odb.c b/odb.c
index dc8f292f3d..af13174425 100644
--- a/odb.c
+++ b/odb.c
@@ -670,8 +670,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
 {
 	static struct object_info blank_oi = OBJECT_INFO_INIT;
 	const struct cached_object *co;
-	struct pack_entry e;
-	int rtype;
 	const struct object_id *real = oid;
 	int already_retried = 0;
 
@@ -706,8 +704,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
 	while (1) {
 		struct odb_source *source;
 
-		if (find_pack_entry(odb->repo, real, &e))
-			break;
+		if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
+			return 0;
 
 		/* Most likely it's a loose object. */
 		for (source = odb->sources; source; source = source->next)
@@ -717,8 +715,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
 		/* Not a loose object; someone else may have just packed it. */
 		if (!(flags & OBJECT_INFO_QUICK)) {
 			odb_reprepare(odb->repo->objects);
-			if (find_pack_entry(odb->repo, real, &e))
-				break;
+			if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
+				return 0;
 		}
 
 		/*
@@ -751,25 +749,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
 		}
 		return -1;
 	}
-
-	if (oi == &blank_oi)
-		/*
-		 * We know that the caller doesn't actually need the
-		 * information below, so return early.
-		 */
-		return 0;
-	rtype = packed_object_info(odb->repo, e.p, e.offset, oi);
-	if (rtype < 0) {
-		mark_bad_packed_object(e.p, real);
-		return do_oid_object_info_extended(odb, real, oi, 0);
-	} else if (oi->whence == OI_PACKED) {
-		oi->u.packed.offset = e.offset;
-		oi->u.packed.pack = e.p;
-		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
-					 rtype == OBJ_OFS_DELTA);
-	}
-
-	return 0;
 }
 
 static int oid_object_info_convert(struct repository *r,
diff --git a/odb/streaming.c b/odb/streaming.c
new file mode 100644
index 0000000000..745cd486fb
--- /dev/null
+++ b/odb/streaming.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+
+#include "git-compat-util.h"
+#include "convert.h"
+#include "environment.h"
+#include "repository.h"
+#include "object-file.h"
+#include "odb.h"
+#include "odb/streaming.h"
+#include "replace-object.h"
+#include "packfile.h"
+
+#define FILTER_BUFFER (1024*16)
+
+/*****************************************************************
+ *
+ * Filtered stream
+ *
+ *****************************************************************/
+
+struct odb_filtered_read_stream {
+	struct odb_read_stream base;
+	struct odb_read_stream *upstream;
+	struct stream_filter *filter;
+	char ibuf[FILTER_BUFFER];
+	char obuf[FILTER_BUFFER];
+	int i_end, i_ptr;
+	int o_end, o_ptr;
+	int input_finished;
+};
+
+static int close_istream_filtered(struct odb_read_stream *_fs)
+{
+	struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
+	free_stream_filter(fs->filter);
+	return odb_read_stream_close(fs->upstream);
+}
+
+static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
+				     size_t sz)
+{
+	struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
+	size_t filled = 0;
+
+	while (sz) {
+		/* do we already have filtered output? */
+		if (fs->o_ptr < fs->o_end) {
+			size_t to_move = fs->o_end - fs->o_ptr;
+			if (sz < to_move)
+				to_move = sz;
+			memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
+			fs->o_ptr += to_move;
+			sz -= to_move;
+			filled += to_move;
+			continue;
+		}
+		fs->o_end = fs->o_ptr = 0;
+
+		/* do we have anything to feed the filter with? */
+		if (fs->i_ptr < fs->i_end) {
+			size_t to_feed = fs->i_end - fs->i_ptr;
+			size_t to_receive = FILTER_BUFFER;
+			if (stream_filter(fs->filter,
+					  fs->ibuf + fs->i_ptr, &to_feed,
+					  fs->obuf, &to_receive))
+				return -1;
+			fs->i_ptr = fs->i_end - to_feed;
+			fs->o_end = FILTER_BUFFER - to_receive;
+			continue;
+		}
+
+		/* tell the filter to drain upon no more input */
+		if (fs->input_finished) {
+			size_t to_receive = FILTER_BUFFER;
+			if (stream_filter(fs->filter,
+					  NULL, NULL,
+					  fs->obuf, &to_receive))
+				return -1;
+			fs->o_end = FILTER_BUFFER - to_receive;
+			if (!fs->o_end)
+				break;
+			continue;
+		}
+		fs->i_end = fs->i_ptr = 0;
+
+		/* refill the input from the upstream */
+		if (!fs->input_finished) {
+			fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER);
+			if (fs->i_end < 0)
+				return -1;
+			if (fs->i_end)
+				continue;
+		}
+		fs->input_finished = 1;
+	}
+	return filled;
+}
+
+static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
+						    struct stream_filter *filter)
+{
+	struct odb_filtered_read_stream *fs;
+
+	CALLOC_ARRAY(fs, 1);
+	fs->base.close = close_istream_filtered;
+	fs->base.read = read_istream_filtered;
+	fs->upstream = st;
+	fs->filter = filter;
+	fs->base.size = -1; /* unknown */
+	fs->base.type = st->type;
+
+	return &fs->base;
+}
+
+/*****************************************************************
+ *
+ * In-core stream
+ *
+ *****************************************************************/
+
+struct odb_incore_read_stream {
+	struct odb_read_stream base;
+	char *buf; /* from odb_read_object_info_extended() */
+	unsigned long read_ptr;
+};
+
+static int close_istream_incore(struct odb_read_stream *_st)
+{
+	struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
+	free(st->buf);
+	return 0;
+}
+
+static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+	struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
+	size_t read_size = sz;
+	size_t remainder = st->base.size - st->read_ptr;
+
+	if (remainder <= read_size)
+		read_size = remainder;
+	if (read_size) {
+		memcpy(buf, st->buf + st->read_ptr, read_size);
+		st->read_ptr += read_size;
+	}
+	return read_size;
+}
+
+static int open_istream_incore(struct odb_read_stream **out,
+			       struct object_database *odb,
+			       const struct object_id *oid)
+{
+	struct object_info oi = OBJECT_INFO_INIT;
+	struct odb_incore_read_stream stream = {
+		.base.close = close_istream_incore,
+		.base.read = read_istream_incore,
+	};
+	struct odb_incore_read_stream *st;
+	int ret;
+
+	oi.typep = &stream.base.type;
+	oi.sizep = &stream.base.size;
+	oi.contentp = (void **)&stream.buf;
+	ret = odb_read_object_info_extended(odb, oid, &oi,
+					    OBJECT_INFO_DIE_IF_CORRUPT);
+	if (ret)
+		return ret;
+
+	CALLOC_ARRAY(st, 1);
+	*st = stream;
+	*out = &st->base;
+
+	return 0;
+}
+
+/*****************************************************************************
+ * static helpers variables and functions for users of streaming interface
+ *****************************************************************************/
+
+static int istream_source(struct odb_read_stream **out,
+			  struct object_database *odb,
+			  const struct object_id *oid)
+{
+	struct odb_source *source;
+
+	if (!packfile_store_read_object_stream(out, odb->packfiles, oid))
+		return 0;
+
+	odb_prepare_alternates(odb);
+	for (source = odb->sources; source; source = source->next)
+		if (!odb_source_loose_read_object_stream(out, source, oid))
+			return 0;
+
+	return open_istream_incore(out, odb, oid);
+}
+
+/****************************************************************
+ * Users of streaming interface
+ ****************************************************************/
+
+int odb_read_stream_close(struct odb_read_stream *st)
+{
+	int r = st->close(st);
+	free(st);
+	return r;
+}
+
+ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz)
+{
+	return st->read(st, buf, sz);
+}
+
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+					     const struct object_id *oid,
+					     struct stream_filter *filter)
+{
+	struct odb_read_stream *st;
+	const struct object_id *real = lookup_replace_object(odb->repo, oid);
+	int ret = istream_source(&st, odb, real);
+
+	if (ret)
+		return NULL;
+
+	if (filter) {
+		/* Add "&& !is_null_stream_filter(filter)" for performance */
+		struct odb_read_stream *nst = attach_stream_filter(st, filter);
+		if (!nst) {
+			odb_read_stream_close(st);
+			return NULL;
+		}
+		st = nst;
+	}
+
+	return st;
+}
+
+int odb_stream_blob_to_fd(struct object_database *odb,
+			  int fd,
+			  const struct object_id *oid,
+			  struct stream_filter *filter,
+			  int can_seek)
+{
+	struct odb_read_stream *st;
+	ssize_t kept = 0;
+	int result = -1;
+
+	st = odb_read_stream_open(odb, oid, filter);
+	if (!st) {
+		if (filter)
+			free_stream_filter(filter);
+		return result;
+	}
+	if (st->type != OBJ_BLOB)
+		goto close_and_exit;
+	for (;;) {
+		char buf[1024 * 16];
+		ssize_t wrote, holeto;
+		ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
+
+		if (readlen < 0)
+			goto close_and_exit;
+		if (!readlen)
+			break;
+		if (can_seek && sizeof(buf) == readlen) {
+			for (holeto = 0; holeto < readlen; holeto++)
+				if (buf[holeto])
+					break;
+			if (readlen == holeto) {
+				kept += holeto;
+				continue;
+			}
+		}
+
+		if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
+			goto close_and_exit;
+		else
+			kept = 0;
+		wrote = write_in_full(fd, buf, readlen);
+
+		if (wrote < 0)
+			goto close_and_exit;
+	}
+	if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
+		     xwrite(fd, "", 1) != 1))
+		goto close_and_exit;
+	result = 0;
+
+ close_and_exit:
+	odb_read_stream_close(st);
+	return result;
+}
diff --git a/odb/streaming.h b/odb/streaming.h
new file mode 100644
index 0000000000..c7861f7e13
--- /dev/null
+++ b/odb/streaming.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2011, Google Inc.
+ */
+#ifndef STREAMING_H
+#define STREAMING_H 1
+
+#include "object.h"
+
+struct object_database;
+struct odb_read_stream;
+struct stream_filter;
+
+typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *);
+typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t);
+
+/*
+ * A stream that can be used to read an object from the object database without
+ * loading all of it into memory.
+ */
+struct odb_read_stream {
+	odb_read_stream_close_fn close;
+	odb_read_stream_read_fn read;
+	enum object_type type;
+	unsigned long size; /* inflated size of full object */
+};
+
+/*
+ * Create a new object stream for the given object database. An optional filter
+ * can be used to transform the object's content.
+ *
+ * Returns the stream on success, a `NULL` pointer otherwise.
+ */
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+					     const struct object_id *oid,
+					     struct stream_filter *filter);
+
+/*
+ * Close the given read stream and release all resources associated with it.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+int odb_read_stream_close(struct odb_read_stream *stream);
+
+/*
+ * Read data from the stream into the buffer. Returns 0 on EOF and the number
+ * of bytes read on success. Returns a negative error code in case reading from
+ * the stream fails.
+ */
+ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
+
+/*
+ * Look up the object by its ID and write the full contents to the file
+ * descriptor. The object must be a blob, or the function will fail. When
+ * provided, the filter is used to transform the blob contents.
+ *
+ * `can_seek` should be set to 1 in case the given file descriptor can be
+ * seek(3p)'d on. This is used to support files with holes in case a
+ * significant portion of the blob contains NUL bytes.
+ *
+ * Returns a negative error code on failure, 0 on success.
+ */
+int odb_stream_blob_to_fd(struct object_database *odb,
+			  int fd,
+			  const struct object_id *oid,
+			  struct stream_filter *filter,
+			  int can_seek);
+
+#endif /* STREAMING_H */
diff --git a/packfile.c b/packfile.c
index 3d8b994a61..c88bd92619 100644
--- a/packfile.c
+++ b/packfile.c
@@ -20,6 +20,7 @@
 #include "tree.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "midx.h"
 #include "commit-graph.h"
 #include "pack-revindex.h"
@@ -885,22 +886,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
 	return p;
 }
 
-int packfile_store_freshen_object(struct packfile_store *store,
-				  const struct object_id *oid)
-{
-	struct pack_entry e;
-	if (!find_pack_entry(store->odb->repo, oid, &e))
-		return 0;
-	if (e.p->is_cruft)
-		return 0;
-	if (e.p->freshened)
-		return 1;
-	if (utime(e.p->pack_name, NULL))
-		return 0;
-	e.p->freshened = 1;
-	return 1;
-}
-
 void (*report_garbage)(unsigned seen_bits, const char *path);
 
 static void report_helper(const struct string_list *list,
@@ -2105,7 +2090,9 @@ static int fill_pack_entry(const struct object_id *oid,
 	return 1;
 }
 
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
+static int find_pack_entry(struct repository *r,
+			   const struct object_id *oid,
+			   struct pack_entry *e)
 {
 	struct packfile_list_entry *l;
 
@@ -2130,6 +2117,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
 	return 0;
 }
 
+int packfile_store_freshen_object(struct packfile_store *store,
+				  const struct object_id *oid)
+{
+	struct pack_entry e;
+	if (!find_pack_entry(store->odb->repo, oid, &e))
+		return 0;
+	if (e.p->is_cruft)
+		return 0;
+	if (e.p->freshened)
+		return 1;
+	if (utime(e.p->pack_name, NULL))
+		return 0;
+	e.p->freshened = 1;
+	return 1;
+}
+
+int packfile_store_read_object_info(struct packfile_store *store,
+				    const struct object_id *oid,
+				    struct object_info *oi,
+				    unsigned flags UNUSED)
+{
+	static struct object_info blank_oi = OBJECT_INFO_INIT;
+	struct pack_entry e;
+	int rtype;
+
+	if (!find_pack_entry(store->odb->repo, oid, &e))
+		return 1;
+
+	/*
+	 * We know that the caller doesn't actually need the
+	 * information below, so return early.
+	 */
+	if (oi == &blank_oi)
+		return 0;
+
+	rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
+	if (rtype < 0) {
+		mark_bad_packed_object(e.p, oid);
+		return -1;
+	}
+
+	if (oi->whence == OI_PACKED) {
+		oi->u.packed.offset = e.offset;
+		oi->u.packed.pack = e.p;
+		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
+					 rtype == OBJ_OFS_DELTA);
+	}
+
+	return 0;
+}
+
 static void maybe_invalidate_kept_pack_cache(struct repository *r,
 					     unsigned flags)
 {
@@ -2400,3 +2438,130 @@ void packfile_store_close(struct packfile_store *store)
 		close_pack(e->pack);
 	}
 }
+
+struct odb_packed_read_stream {
+	struct odb_read_stream base;
+	struct packed_git *pack;
+	git_zstream z;
+	enum {
+		ODB_PACKED_READ_STREAM_UNINITIALIZED,
+		ODB_PACKED_READ_STREAM_INUSE,
+		ODB_PACKED_READ_STREAM_DONE,
+		ODB_PACKED_READ_STREAM_ERROR,
+	} z_state;
+	off_t pos;
+};
+
+static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
+					   size_t sz)
+{
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+	size_t total_read = 0;
+
+	switch (st->z_state) {
+	case ODB_PACKED_READ_STREAM_UNINITIALIZED:
+		memset(&st->z, 0, sizeof(st->z));
+		git_inflate_init(&st->z);
+		st->z_state = ODB_PACKED_READ_STREAM_INUSE;
+		break;
+	case ODB_PACKED_READ_STREAM_DONE:
+		return 0;
+	case ODB_PACKED_READ_STREAM_ERROR:
+		return -1;
+	case ODB_PACKED_READ_STREAM_INUSE:
+		break;
+	}
+
+	while (total_read < sz) {
+		int status;
+		struct pack_window *window = NULL;
+		unsigned char *mapped;
+
+		mapped = use_pack(st->pack, &window,
+				  st->pos, &st->z.avail_in);
+
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		st->z.next_in = mapped;
+		status = git_inflate(&st->z, Z_FINISH);
+
+		st->pos += st->z.next_in - mapped;
+		total_read = st->z.next_out - (unsigned char *)buf;
+		unuse_pack(&window);
+
+		if (status == Z_STREAM_END) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_DONE;
+			break;
+		}
+
+		/*
+		 * Unlike the loose object case, we do not have to worry here
+		 * about running out of input bytes and spinning infinitely. If
+		 * we get Z_BUF_ERROR due to too few input bytes, then we'll
+		 * replenish them in the next use_pack() call when we loop. If
+		 * we truly hit the end of the pack (i.e., because it's corrupt
+		 * or truncated), then use_pack() catches that and will die().
+		 */
+		if (status != Z_OK && status != Z_BUF_ERROR) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_ERROR;
+			return -1;
+		}
+	}
+	return total_read;
+}
+
+static int close_istream_pack_non_delta(struct odb_read_stream *_st)
+{
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+	if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
+	return 0;
+}
+
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+				      struct packfile_store *store,
+				      const struct object_id *oid)
+{
+	struct odb_packed_read_stream *stream;
+	struct pack_window *window = NULL;
+	struct object_info oi = OBJECT_INFO_INIT;
+	enum object_type in_pack_type;
+	unsigned long size;
+
+	oi.sizep = &size;
+
+	if (packfile_store_read_object_info(store, oid, &oi, 0) ||
+	    oi.u.packed.is_delta ||
+	    repo_settings_get_big_file_threshold(store->odb->repo) >= size)
+		return -1;
+
+	in_pack_type = unpack_object_header(oi.u.packed.pack,
+					    &window,
+					    &oi.u.packed.offset,
+					    &size);
+	unuse_pack(&window);
+	switch (in_pack_type) {
+	default:
+		return -1; /* we do not do deltas for now */
+	case OBJ_COMMIT:
+	case OBJ_TREE:
+	case OBJ_BLOB:
+	case OBJ_TAG:
+		break;
+	}
+
+	CALLOC_ARRAY(stream, 1);
+	stream->base.close = close_istream_pack_non_delta;
+	stream->base.read = read_istream_pack_non_delta;
+	stream->base.type = in_pack_type;
+	stream->base.size = size;
+	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
+	stream->pack = oi.u.packed.pack;
+	stream->pos = oi.u.packed.offset;
+
+	*out = &stream->base;
+
+	return 0;
+}
diff --git a/packfile.h b/packfile.h
index 20e1cf17b2..59d162a3f4 100644
--- a/packfile.h
+++ b/packfile.h
@@ -9,6 +9,7 @@
 
 /* in odb.h */
 struct object_info;
+struct odb_read_stream;
 
 struct packed_git {
 	struct pack_window *windows;
@@ -177,6 +178,21 @@ void packfile_store_add_pack(struct packfile_store *store,
 	for (struct packfile_list_entry *e = packfile_store_get_packs(repo->objects->packfiles); \
 	     ((p) = (e ? e->pack : NULL)); e = e->next)
 
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+				      struct packfile_store *store,
+				      const struct object_id *oid);
+
+/*
+ * Try to read the object identified by its ID from the object store and
+ * populate the object info with its data. Returns 1 in case the object was
+ * not found, 0 if it was and read successfully, and a negative error code in
+ * case the object was corrupted.
+ */
+int packfile_store_read_object_info(struct packfile_store *store,
+				    const struct object_id *oid,
+				    struct object_info *oi,
+				    unsigned flags);
+
 /*
  * Get all packs managed by the given store, including packfiles that are
  * referenced by multi-pack indices.
@@ -376,7 +392,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
  * Iff a pack file in the given repository contains the object named by sha1,
  * return true and store its location to e.
  */
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
 int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
 
 int has_object_pack(struct repository *r, const struct object_id *oid);
diff --git a/parallel-checkout.c b/parallel-checkout.c
index fba6aa65a6..0bf4bd6d4a 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -13,7 +13,7 @@
 #include "read-cache-ll.h"
 #include "run-command.h"
 #include "sigchain.h"
-#include "streaming.h"
+#include "odb/streaming.h"
 #include "symlinks.h"
 #include "thread-utils.h"
 #include "trace2.h"
@@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
 
 	filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid);
 	if (filter) {
-		if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) {
+		if (odb_stream_blob_to_fd(the_repository->objects, fd,
+					  &pc_item->ce->oid, filter, 1)) {
 			/* On error, reset fd to try writing without streaming */
 			if (reset_fd(fd, path))
 				return -1;
diff --git a/streaming.c b/streaming.c
deleted file mode 100644
index 00ad649ae3..0000000000
--- a/streaming.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-
-#define USE_THE_REPOSITORY_VARIABLE
-
-#include "git-compat-util.h"
-#include "convert.h"
-#include "environment.h"
-#include "streaming.h"
-#include "repository.h"
-#include "object-file.h"
-#include "odb.h"
-#include "replace-object.h"
-#include "packfile.h"
-
-typedef int (*open_istream_fn)(struct git_istream *,
-			       struct repository *,
-			       const struct object_id *,
-			       enum object_type *);
-typedef int (*close_istream_fn)(struct git_istream *);
-typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
-
-#define FILTER_BUFFER (1024*16)
-
-struct filtered_istream {
-	struct git_istream *upstream;
-	struct stream_filter *filter;
-	char ibuf[FILTER_BUFFER];
-	char obuf[FILTER_BUFFER];
-	int i_end, i_ptr;
-	int o_end, o_ptr;
-	int input_finished;
-};
-
-struct git_istream {
-	open_istream_fn open;
-	close_istream_fn close;
-	read_istream_fn read;
-
-	unsigned long size; /* inflated size of full object */
-	git_zstream z;
-	enum { z_unused, z_used, z_done, z_error } z_state;
-
-	union {
-		struct {
-			char *buf; /* from odb_read_object_info_extended() */
-			unsigned long read_ptr;
-		} incore;
-
-		struct {
-			void *mapped;
-			unsigned long mapsize;
-			char hdr[32];
-			int hdr_avail;
-			int hdr_used;
-		} loose;
-
-		struct {
-			struct packed_git *pack;
-			off_t pos;
-		} in_pack;
-
-		struct filtered_istream filtered;
-	} u;
-};
-
-/*****************************************************************
- *
- * Common helpers
- *
- *****************************************************************/
-
-static void close_deflated_stream(struct git_istream *st)
-{
-	if (st->z_state == z_used)
-		git_inflate_end(&st->z);
-}
-
-
-/*****************************************************************
- *
- * Filtered stream
- *
- *****************************************************************/
-
-static int close_istream_filtered(struct git_istream *st)
-{
-	free_stream_filter(st->u.filtered.filter);
-	return close_istream(st->u.filtered.upstream);
-}
-
-static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
-				     size_t sz)
-{
-	struct filtered_istream *fs = &(st->u.filtered);
-	size_t filled = 0;
-
-	while (sz) {
-		/* do we already have filtered output? */
-		if (fs->o_ptr < fs->o_end) {
-			size_t to_move = fs->o_end - fs->o_ptr;
-			if (sz < to_move)
-				to_move = sz;
-			memcpy(buf + filled, fs->obuf + fs->o_ptr, to_move);
-			fs->o_ptr += to_move;
-			sz -= to_move;
-			filled += to_move;
-			continue;
-		}
-		fs->o_end = fs->o_ptr = 0;
-
-		/* do we have anything to feed the filter with? */
-		if (fs->i_ptr < fs->i_end) {
-			size_t to_feed = fs->i_end - fs->i_ptr;
-			size_t to_receive = FILTER_BUFFER;
-			if (stream_filter(fs->filter,
-					  fs->ibuf + fs->i_ptr, &to_feed,
-					  fs->obuf, &to_receive))
-				return -1;
-			fs->i_ptr = fs->i_end - to_feed;
-			fs->o_end = FILTER_BUFFER - to_receive;
-			continue;
-		}
-
-		/* tell the filter to drain upon no more input */
-		if (fs->input_finished) {
-			size_t to_receive = FILTER_BUFFER;
-			if (stream_filter(fs->filter,
-					  NULL, NULL,
-					  fs->obuf, &to_receive))
-				return -1;
-			fs->o_end = FILTER_BUFFER - to_receive;
-			if (!fs->o_end)
-				break;
-			continue;
-		}
-		fs->i_end = fs->i_ptr = 0;
-
-		/* refill the input from the upstream */
-		if (!fs->input_finished) {
-			fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
-			if (fs->i_end < 0)
-				return -1;
-			if (fs->i_end)
-				continue;
-		}
-		fs->input_finished = 1;
-	}
-	return filled;
-}
-
-static struct git_istream *attach_stream_filter(struct git_istream *st,
-						struct stream_filter *filter)
-{
-	struct git_istream *ifs = xmalloc(sizeof(*ifs));
-	struct filtered_istream *fs = &(ifs->u.filtered);
-
-	ifs->close = close_istream_filtered;
-	ifs->read = read_istream_filtered;
-	fs->upstream = st;
-	fs->filter = filter;
-	fs->i_end = fs->i_ptr = 0;
-	fs->o_end = fs->o_ptr = 0;
-	fs->input_finished = 0;
-	ifs->size = -1; /* unknown */
-	return ifs;
-}
-
-/*****************************************************************
- *
- * Loose object stream
- *
- *****************************************************************/
-
-static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
-{
-	size_t total_read = 0;
-
-	switch (st->z_state) {
-	case z_done:
-		return 0;
-	case z_error:
-		return -1;
-	default:
-		break;
-	}
-
-	if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
-		size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
-		if (sz < to_copy)
-			to_copy = sz;
-		memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
-		st->u.loose.hdr_used += to_copy;
-		total_read += to_copy;
-	}
-
-	while (total_read < sz) {
-		int status;
-
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		status = git_inflate(&st->z, Z_FINISH);
-
-		total_read = st->z.next_out - (unsigned char *)buf;
-
-		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = z_done;
-			break;
-		}
-		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-			git_inflate_end(&st->z);
-			st->z_state = z_error;
-			return -1;
-		}
-	}
-	return total_read;
-}
-
-static int close_istream_loose(struct git_istream *st)
-{
-	close_deflated_stream(st);
-	munmap(st->u.loose.mapped, st->u.loose.mapsize);
-	return 0;
-}
-
-static int open_istream_loose(struct git_istream *st, struct repository *r,
-			      const struct object_id *oid,
-			      enum object_type *type)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-	struct odb_source *source;
-
-	oi.sizep = &st->size;
-	oi.typep = type;
-
-	odb_prepare_alternates(r->objects);
-	for (source = r->objects->sources; source; source = source->next) {
-		st->u.loose.mapped = odb_source_loose_map_object(source, oid,
-								 &st->u.loose.mapsize);
-		if (st->u.loose.mapped)
-			break;
-	}
-	if (!st->u.loose.mapped)
-		return -1;
-
-	switch (unpack_loose_header(&st->z, st->u.loose.mapped,
-				    st->u.loose.mapsize, st->u.loose.hdr,
-				    sizeof(st->u.loose.hdr))) {
-	case ULHR_OK:
-		break;
-	case ULHR_BAD:
-	case ULHR_TOO_LONG:
-		goto error;
-	}
-	if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0)
-		goto error;
-
-	st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
-	st->u.loose.hdr_avail = st->z.total_out;
-	st->z_state = z_used;
-	st->close = close_istream_loose;
-	st->read = read_istream_loose;
-
-	return 0;
-error:
-	git_inflate_end(&st->z);
-	munmap(st->u.loose.mapped, st->u.loose.mapsize);
-	return -1;
-}
-
-
-/*****************************************************************
- *
- * Non-delta packed object stream
- *
- *****************************************************************/
-
-static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
-					   size_t sz)
-{
-	size_t total_read = 0;
-
-	switch (st->z_state) {
-	case z_unused:
-		memset(&st->z, 0, sizeof(st->z));
-		git_inflate_init(&st->z);
-		st->z_state = z_used;
-		break;
-	case z_done:
-		return 0;
-	case z_error:
-		return -1;
-	case z_used:
-		break;
-	}
-
-	while (total_read < sz) {
-		int status;
-		struct pack_window *window = NULL;
-		unsigned char *mapped;
-
-		mapped = use_pack(st->u.in_pack.pack, &window,
-				  st->u.in_pack.pos, &st->z.avail_in);
-
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		st->z.next_in = mapped;
-		status = git_inflate(&st->z, Z_FINISH);
-
-		st->u.in_pack.pos += st->z.next_in - mapped;
-		total_read = st->z.next_out - (unsigned char *)buf;
-		unuse_pack(&window);
-
-		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = z_done;
-			break;
-		}
-
-		/*
-		 * Unlike the loose object case, we do not have to worry here
-		 * about running out of input bytes and spinning infinitely. If
-		 * we get Z_BUF_ERROR due to too few input bytes, then we'll
-		 * replenish them in the next use_pack() call when we loop. If
-		 * we truly hit the end of the pack (i.e., because it's corrupt
-		 * or truncated), then use_pack() catches that and will die().
-		 */
-		if (status != Z_OK && status != Z_BUF_ERROR) {
-			git_inflate_end(&st->z);
-			st->z_state = z_error;
-			return -1;
-		}
-	}
-	return total_read;
-}
-
-static int close_istream_pack_non_delta(struct git_istream *st)
-{
-	close_deflated_stream(st);
-	return 0;
-}
-
-static int open_istream_pack_non_delta(struct git_istream *st,
-				       struct repository *r UNUSED,
-				       const struct object_id *oid UNUSED,
-				       enum object_type *type UNUSED)
-{
-	struct pack_window *window;
-	enum object_type in_pack_type;
-
-	window = NULL;
-
-	in_pack_type = unpack_object_header(st->u.in_pack.pack,
-					    &window,
-					    &st->u.in_pack.pos,
-					    &st->size);
-	unuse_pack(&window);
-	switch (in_pack_type) {
-	default:
-		return -1; /* we do not do deltas for now */
-	case OBJ_COMMIT:
-	case OBJ_TREE:
-	case OBJ_BLOB:
-	case OBJ_TAG:
-		break;
-	}
-	st->z_state = z_unused;
-	st->close = close_istream_pack_non_delta;
-	st->read = read_istream_pack_non_delta;
-
-	return 0;
-}
-
-
-/*****************************************************************
- *
- * In-core stream
- *
- *****************************************************************/
-
-static int close_istream_incore(struct git_istream *st)
-{
-	free(st->u.incore.buf);
-	return 0;
-}
-
-static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
-{
-	size_t read_size = sz;
-	size_t remainder = st->size - st->u.incore.read_ptr;
-
-	if (remainder <= read_size)
-		read_size = remainder;
-	if (read_size) {
-		memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
-		st->u.incore.read_ptr += read_size;
-	}
-	return read_size;
-}
-
-static int open_istream_incore(struct git_istream *st, struct repository *r,
-			       const struct object_id *oid, enum object_type *type)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-
-	st->u.incore.read_ptr = 0;
-	st->close = close_istream_incore;
-	st->read = read_istream_incore;
-
-	oi.typep = type;
-	oi.sizep = &st->size;
-	oi.contentp = (void **)&st->u.incore.buf;
-	return odb_read_object_info_extended(r->objects, oid, &oi,
-					     OBJECT_INFO_DIE_IF_CORRUPT);
-}
-
-/*****************************************************************************
- * static helpers variables and functions for users of streaming interface
- *****************************************************************************/
-
-static int istream_source(struct git_istream *st,
-			  struct repository *r,
-			  const struct object_id *oid,
-			  enum object_type *type)
-{
-	unsigned long size;
-	int status;
-	struct object_info oi = OBJECT_INFO_INIT;
-
-	oi.typep = type;
-	oi.sizep = &size;
-	status = odb_read_object_info_extended(r->objects, oid, &oi, 0);
-	if (status < 0)
-		return status;
-
-	switch (oi.whence) {
-	case OI_LOOSE:
-		st->open = open_istream_loose;
-		return 0;
-	case OI_PACKED:
-		if (!oi.u.packed.is_delta &&
-		    repo_settings_get_big_file_threshold(the_repository) < size) {
-			st->u.in_pack.pack = oi.u.packed.pack;
-			st->u.in_pack.pos = oi.u.packed.offset;
-			st->open = open_istream_pack_non_delta;
-			return 0;
-		}
-		/* fallthru */
-	default:
-		st->open = open_istream_incore;
-		return 0;
-	}
-}
-
-/****************************************************************
- * Users of streaming interface
- ****************************************************************/
-
-int close_istream(struct git_istream *st)
-{
-	int r = st->close(st);
-	free(st);
-	return r;
-}
-
-ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
-{
-	return st->read(st, buf, sz);
-}
-
-struct git_istream *open_istream(struct repository *r,
-				 const struct object_id *oid,
-				 enum object_type *type,
-				 unsigned long *size,
-				 struct stream_filter *filter)
-{
-	struct git_istream *st = xmalloc(sizeof(*st));
-	const struct object_id *real = lookup_replace_object(r, oid);
-	int ret = istream_source(st, r, real, type);
-
-	if (ret) {
-		free(st);
-		return NULL;
-	}
-
-	if (st->open(st, r, real, type)) {
-		if (open_istream_incore(st, r, real, type)) {
-			free(st);
-			return NULL;
-		}
-	}
-	if (filter) {
-		/* Add "&& !is_null_stream_filter(filter)" for performance */
-		struct git_istream *nst = attach_stream_filter(st, filter);
-		if (!nst) {
-			close_istream(st);
-			return NULL;
-		}
-		st = nst;
-	}
-
-	*size = st->size;
-	return st;
-}
-
-int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
-		      int can_seek)
-{
-	struct git_istream *st;
-	enum object_type type;
-	unsigned long sz;
-	ssize_t kept = 0;
-	int result = -1;
-
-	st = open_istream(the_repository, oid, &type, &sz, filter);
-	if (!st) {
-		if (filter)
-			free_stream_filter(filter);
-		return result;
-	}
-	if (type != OBJ_BLOB)
-		goto close_and_exit;
-	for (;;) {
-		char buf[1024 * 16];
-		ssize_t wrote, holeto;
-		ssize_t readlen = read_istream(st, buf, sizeof(buf));
-
-		if (readlen < 0)
-			goto close_and_exit;
-		if (!readlen)
-			break;
-		if (can_seek && sizeof(buf) == readlen) {
-			for (holeto = 0; holeto < readlen; holeto++)
-				if (buf[holeto])
-					break;
-			if (readlen == holeto) {
-				kept += holeto;
-				continue;
-			}
-		}
-
-		if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
-			goto close_and_exit;
-		else
-			kept = 0;
-		wrote = write_in_full(fd, buf, readlen);
-
-		if (wrote < 0)
-			goto close_and_exit;
-	}
-	if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
-		     xwrite(fd, "", 1) != 1))
-		goto close_and_exit;
-	result = 0;
-
- close_and_exit:
-	close_istream(st);
-	return result;
-}
diff --git a/streaming.h b/streaming.h
deleted file mode 100644
index bd27f59e57..0000000000
--- a/streaming.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2011, Google Inc.
- */
-#ifndef STREAMING_H
-#define STREAMING_H 1
-
-#include "object.h"
-
-/* opaque */
-struct git_istream;
-struct stream_filter;
-
-struct git_istream *open_istream(struct repository *, const struct object_id *,
-				 enum object_type *, unsigned long *,
-				 struct stream_filter *);
-int close_istream(struct git_istream *);
-ssize_t read_istream(struct git_istream *, void *, size_t);
-
-int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek);
-
-#endif /* STREAMING_H */
author	Junio C Hamano <gitster@pobox.com>	2025-12-16 11:08:34 +0900
committer	Junio C Hamano <gitster@pobox.com>	2025-12-16 11:08:34 +0900
commit	dbe54273a707406409fa386db5b105557b31a831 (patch)
tree	0def4e76638eff5ca5628d3cb2d0acb157c7bf37
parent	d8af7cadaa79d5837d73ec949e10b57dedb43e9b (diff)
parent	7b940286527ec2175dffbb317f47e080bb37cf3e (diff)
download	git-dbe54273a707406409fa386db5b105557b31a831.tar.gz