reftable/record: handle overflows when decoding varints

The logic to decode varints isn't able to detect integer overflows: as long as the buffer still has more data available, and as long as the current byte has its 0x80 bit set, we'll continue to add up these values to the result. This will eventually cause the `uint64_t` to overflow, at which point we'll return an invalid result. Refactor the function so that it is able to detect such overflows. The implementation is basically copied from Git's own `decode_varint()`, which already knows to handle overflows. The only adjustment is that we also take into account the string view's length in order to not overrun it. The reftable documentation explicitly notes that those two encoding schemas are supposed to be the same: Varint encoding ^^^^^^^^^^^^^^^ Varint encoding is identical to the ofs-delta encoding method used within pack files. Decoder works as follows: .... val = buf[ptr] & 0x7f while (buf[ptr] & 0x80) { ptr++ val = ((val + 1) << 7) | (buf[ptr] & 0x7f) } .... While at it, refactor `put_var_int()` in the same way by copying over the implementation of `encode_varint()`. While `put_var_int()` doesn't have an issue with overflows, it generates warnings with -Wsign-compare. The implementation of `encode_varint()` doesn't, is battle-tested and at the same time way simpler than what we currently have. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: Patrick Steinhardt <ps@pks.im> 2025-01-20 17:17:21 +0100
committer: Junio C Hamano <gitster@pobox.com> 2025-01-21 14:20:28 -0800
commit: 072e3aa3a5c29ca1b68a7aaf570a0a8e7ab67127 (patch)
tree: d4f6f9bb3fa5c682076688d2b6defd56a8fe8325 /reftable/record.c
parent: a204f92d1cb08f3a0450551b5e6759284bbab12a (diff)
download: git-072e3aa3a5c29ca1b68a7aaf570a0a8e7ab67127.tar.gz
1 files changed, 32 insertions, 30 deletions
diff --git a/reftable/record.c b/reftable/record.c
index 04429d23fe..a55ce76aeb 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -21,47 +21,49 @@ static void *reftable_record_data(struct reftable_record *rec);
 
 int get_var_int(uint64_t *dest, struct string_view *in)
 {
-	int ptr = 0;
+	const unsigned char *buf = in->buf;
+	unsigned char c;
 	uint64_t val;
 
-	if (in->len == 0)
+	if (!in->len)
 		return -1;
-	val = in->buf[ptr] & 0x7f;
-
-	while (in->buf[ptr] & 0x80) {
-		ptr++;
-		if (ptr > in->len) {
+	c = *buf++;
+	val = c & 0x7f;
+
+	while (c & 0x80) {
+		/*
+		 * We use a micro-optimization here: whenever we see that the
+		 * 0x80 bit is set, we know that the remainder of the value
+		 * cannot be 0. The zero-values thus doesn't need to be encoded
+		 * at all, which is why we subtract 1 when encoding and add 1
+		 * when decoding.
+		 *
+		 * This allows us to save a byte in some edge cases.
+		 */
+		val += 1;
+		if (!val || (val & (uint64_t)(~0ULL << (64 - 7))))
+			return -1; /* overflow */
+		if (buf >= in->buf + in->len)
 			return -1;
-		}
-		val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f);
+		c = *buf++;
+		val = (val << 7) + (c & 0x7f);
 	}
 
 	*dest = val;
-	return ptr + 1;
+	return buf - in->buf;
 }
 
-int put_var_int(struct string_view *dest, uint64_t val)
+int put_var_int(struct string_view *dest, uint64_t value)
 {
-	uint8_t buf[10] = { 0 };
-	int i = 9;
-	int n = 0;
-	buf[i] = (uint8_t)(val & 0x7f);
-	i--;
-	while (1) {
-		val >>= 7;
-		if (!val) {
-			break;
-		}
-		val--;
-		buf[i] = 0x80 | (uint8_t)(val & 0x7f);
-		i--;
-	}
-
-	n = sizeof(buf) - i - 1;
-	if (dest->len < n)
+	unsigned char varint[10];
+	unsigned pos = sizeof(varint) - 1;
+	varint[pos] = value & 0x7f;
+	while (value >>= 7)
+		varint[--pos] = 0x80 | (--value & 0x7f);
+	if (dest->len < sizeof(varint) - pos)
 		return -1;
-	memcpy(dest->buf, &buf[i + 1], n);
-	return n;
+	memcpy(dest->buf, varint + pos, sizeof(varint) - pos);
+	return sizeof(varint) - pos;
 }
 
 int reftable_is_block_type(uint8_t typ)
author	Patrick Steinhardt <ps@pks.im>	2025-01-20 17:17:21 +0100
committer	Junio C Hamano <gitster@pobox.com>	2025-01-21 14:20:28 -0800
commit	072e3aa3a5c29ca1b68a7aaf570a0a8e7ab67127 (patch)
tree	d4f6f9bb3fa5c682076688d2b6defd56a8fe8325 /reftable/record.c
parent	a204f92d1cb08f3a0450551b5e6759284bbab12a (diff)
download	git-072e3aa3a5c29ca1b68a7aaf570a0a8e7ab67127.tar.gz