From b7d36ef4a644c69c37e64c7c813546a68264b924 Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Tue, 12 Dec 2017 12:24:11 +0000
Subject: zstream: treat `Z_BUF_ERROR` as non-fatal

zlib will return `Z_BUF_ERROR` whenever there is more input to inflate
or deflate than there is output to store the result.  This is normal for
us as we iterate through the input, particularly with very large input
buffers.
---
 src/zstream.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/zstream.c b/src/zstream.c
index 4895bdb16..1c9d506b1 100644
--- a/src/zstream.c
+++ b/src/zstream.c
@@ -14,17 +14,22 @@
 #define ZSTREAM_BUFFER_SIZE (1024 * 1024)
 #define ZSTREAM_BUFFER_MIN_EXTRA 8
 
-static int zstream_seterr(git_zstream *zs)
+GIT_INLINE(int) zstream_seterr(git_zstream *zs)
 {
-	if (zs->zerr == Z_OK || zs->zerr == Z_STREAM_END)
+	switch (zs->zerr) {
+	case Z_OK:
+	case Z_STREAM_END:
+	case Z_BUF_ERROR: /* not fatal; we retry with a larger buffer */
 		return 0;
-
-	if (zs->zerr == Z_MEM_ERROR)
+	case Z_MEM_ERROR:
 		giterr_set_oom();
-	else if (zs->z.msg)
-		giterr_set_str(GITERR_ZLIB, zs->z.msg);
-	else
-		giterr_set(GITERR_ZLIB, "unknown compression error");
+		break;
+	default:
+		if (zs->z.msg)
+			giterr_set_str(GITERR_ZLIB, zs->z.msg);
+		else
+			giterr_set(GITERR_ZLIB, "unknown compression error");
+	}
 
 	return -1;
 }
@@ -119,8 +124,8 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
 		else
 			zstream->zerr = deflate(&zstream->z, zflush);
 
-		if (zstream->zerr == Z_STREAM_ERROR)
-			return zstream_seterr(zstream);
+		if (zstream_seterr(zstream))
+			return -1;
 
 		out_used = (out_queued - zstream->z.avail_out);
 		out_remain -= out_used;
-- 
cgit v1.2.1


From 86219f40689c85ec4418575223f4376beffa45af Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Thu, 30 Nov 2017 15:40:13 +0000
Subject: util: introduce `git__prefixncmp` and consolidate implementations

Introduce `git_prefixncmp` that will search up to the first `n`
characters of a string to see if it is prefixed by another string.
This is useful for examining if a non-null terminated character
array is prefixed by a particular substring.

Consolidate the various implementations of `git__prefixcmp` around a
single core implementation and add some test cases to validate its
behavior.
---
 src/util.c          | 44 ++++++++++++++++++++++++++++----------------
 src/util.h          |  1 +
 tests/core/string.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/src/util.c b/src/util.c
index 6ae5cdaec..1760a315e 100644
--- a/src/util.c
+++ b/src/util.c
@@ -252,35 +252,47 @@ void git__strtolower(char *str)
 	git__strntolower(str, strlen(str));
 }
 
-int git__prefixcmp(const char *str, const char *prefix)
+GIT_INLINE(int) prefixcmp(const char *str, size_t str_n, const char *prefix, bool icase)
 {
-	for (;;) {
-		unsigned char p = *(prefix++), s;
+	int s, p;
+
+	while (str_n--) {
+		s = (unsigned char)*str++;
+		p = (unsigned char)*prefix++;
+
+		if (icase) {
+			s = git__tolower(s);
+			p = git__tolower(p);
+		}
+
 		if (!p)
 			return 0;
-		if ((s = *(str++)) != p)
+
+		if (s != p)
 			return s - p;
 	}
+
+	return (0 - *prefix);
 }
 
-int git__prefixcmp_icase(const char *str, const char *prefix)
+int git__prefixcmp(const char *str, const char *prefix)
 {
-	return strncasecmp(str, prefix, strlen(prefix));
+	return prefixcmp(str, SIZE_MAX, prefix, false);
 }
 
-int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix)
+int git__prefixncmp(const char *str, size_t str_n, const char *prefix)
 {
-	int s, p;
-
-	while(str_n--) {
-		s = (unsigned char)git__tolower(*str++);
-		p = (unsigned char)git__tolower(*prefix++);
+	return prefixcmp(str, str_n, prefix, false);
+}
 
-		if (s != p)
-			return s - p;
-	}
+int git__prefixcmp_icase(const char *str, const char *prefix)
+{
+	return prefixcmp(str, SIZE_MAX, prefix, true);
+}
 
-	return (0 - *prefix);
+int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix)
+{
+	return prefixcmp(str, str_n, prefix, true);
 }
 
 int git__suffixcmp(const char *str, const char *suffix)
diff --git a/src/util.h b/src/util.h
index 7c9a54ff1..80ee8e647 100644
--- a/src/util.h
+++ b/src/util.h
@@ -180,6 +180,7 @@ GIT_INLINE(void) git__free(void *ptr)
 
 extern int git__prefixcmp(const char *str, const char *prefix);
 extern int git__prefixcmp_icase(const char *str, const char *prefix);
+extern int git__prefixncmp(const char *str, size_t str_n, const char *prefix);
 extern int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix);
 extern int git__suffixcmp(const char *str, const char *suffix);
 
diff --git a/tests/core/string.c b/tests/core/string.c
index 90e8fa027..85db0c662 100644
--- a/tests/core/string.c
+++ b/tests/core/string.c
@@ -40,6 +40,48 @@ void test_core_string__2(void)
 	cl_assert(git__strcasesort_cmp("fooBar", "foobar") < 0);
 }
 
+/* compare prefixes with len */
+void test_core_string__prefixncmp(void)
+{
+	cl_assert(git__prefixncmp("", 0, "") == 0);
+	cl_assert(git__prefixncmp("a", 1, "") == 0);
+	cl_assert(git__prefixncmp("", 0, "a") < 0);
+	cl_assert(git__prefixncmp("a", 1, "b") < 0);
+	cl_assert(git__prefixncmp("b", 1, "a") > 0);
+	cl_assert(git__prefixncmp("ab", 2, "a") == 0);
+	cl_assert(git__prefixncmp("ab", 1, "a") == 0);
+	cl_assert(git__prefixncmp("ab", 2, "ac") < 0);
+	cl_assert(git__prefixncmp("a", 1, "ac") < 0);
+	cl_assert(git__prefixncmp("ab", 1, "ac") < 0);
+	cl_assert(git__prefixncmp("ab", 2, "aa") > 0);
+	cl_assert(git__prefixncmp("ab", 1, "aa") < 0);
+}
+
+/* compare prefixes with len */
+void test_core_string__prefixncmp_icase(void)
+{
+	cl_assert(git__prefixncmp_icase("", 0, "") == 0);
+	cl_assert(git__prefixncmp_icase("a", 1, "") == 0);
+	cl_assert(git__prefixncmp_icase("", 0, "a") < 0);
+	cl_assert(git__prefixncmp_icase("a", 1, "b") < 0);
+	cl_assert(git__prefixncmp_icase("A", 1, "b") < 0);
+	cl_assert(git__prefixncmp_icase("a", 1, "B") < 0);
+	cl_assert(git__prefixncmp_icase("b", 1, "a") > 0);
+	cl_assert(git__prefixncmp_icase("B", 1, "a") > 0);
+	cl_assert(git__prefixncmp_icase("b", 1, "A") > 0);
+	cl_assert(git__prefixncmp_icase("ab", 2, "a") == 0);
+	cl_assert(git__prefixncmp_icase("Ab", 2, "a") == 0);
+	cl_assert(git__prefixncmp_icase("ab", 2, "A") == 0);
+	cl_assert(git__prefixncmp_icase("ab", 1, "a") == 0);
+	cl_assert(git__prefixncmp_icase("ab", 2, "ac") < 0);
+	cl_assert(git__prefixncmp_icase("Ab", 2, "ac") < 0);
+	cl_assert(git__prefixncmp_icase("ab", 2, "Ac") < 0);
+	cl_assert(git__prefixncmp_icase("a", 1, "ac") < 0);
+	cl_assert(git__prefixncmp_icase("ab", 1, "ac") < 0);
+	cl_assert(git__prefixncmp_icase("ab", 2, "aa") > 0);
+	cl_assert(git__prefixncmp_icase("ab", 1, "aa") < 0);
+}
+
 void test_core_string__strcmp(void)
 {
 	cl_assert(git__strcmp("", "") == 0);
-- 
cgit v1.2.1


From dacc32910e36e79ba108bef507e3aec9b0626e3c Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Thu, 30 Nov 2017 15:49:05 +0000
Subject: odb: test loose reading/writing large objects

Introduce a test for very large objects in the ODB.  Write a large
object (5 GB) and ensure that the write succeeds and provides us the
expected object ID.  Introduce a test that writes that file and
ensures that we can subsequently read it.
---
 tests/odb/largefiles.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 tests/odb/largefiles.c

diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c
new file mode 100644
index 000000000..9a91cf1f0
--- /dev/null
+++ b/tests/odb/largefiles.c
@@ -0,0 +1,95 @@
+#include "clar_libgit2.h"
+#include "git2/odb_backend.h"
+
+static git_repository *repo;
+static git_odb *odb;
+
+void test_odb_largefiles__initialize(void)
+{
+	repo = cl_git_sandbox_init("testrepo.git");
+	cl_git_pass(git_repository_odb(&odb, repo));
+}
+
+void test_odb_largefiles__cleanup(void)
+{
+	git_odb_free(odb);
+	cl_git_sandbox_cleanup();
+}
+
+static void writefile(git_oid *oid)
+{
+	static git_odb_stream *stream;
+	git_buf buf = GIT_BUF_INIT;
+	size_t i;
+
+	for (i = 0; i < 3041; i++)
+		cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
+
+	cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB));
+	for (i = 0; i < 126103; i++)
+		cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size));
+
+	cl_git_pass(git_odb_stream_finalize_write(oid, stream));
+
+	git_odb_stream_free(stream);
+	git_buf_free(&buf);
+}
+
+void test_odb_largefiles__write_from_memory(void)
+{
+	git_oid expected, oid;
+	git_buf buf = GIT_BUF_INIT;
+	size_t i;
+
+#ifndef GIT_ARCH_64
+	cl_skip();
+#endif
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
+		cl_skip();
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+		cl_skip();
+
+	for (i = 0; i < (3041*126103); i++)
+		cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
+
+	git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c");
+	cl_git_pass(git_odb_write(&oid, odb, buf.ptr, buf.size, GIT_OBJ_BLOB));
+
+	cl_assert_equal_oid(&expected, &oid);
+}
+
+void test_odb_largefiles__streamwrite(void)
+{
+	git_oid expected, oid;
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
+		cl_skip();
+
+	git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c");
+	writefile(&oid);
+
+	cl_assert_equal_oid(&expected, &oid);
+}
+
+void test_odb_largefiles__read_into_memory(void)
+{
+	git_oid oid;
+	git_odb_object *obj;
+
+#ifndef GIT_ARCH_64
+	cl_skip();
+#endif
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
+		cl_skip();
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+		cl_skip();
+
+	writefile(&oid);
+	cl_git_pass(git_odb_read(&obj, odb, &oid));
+
+	git_odb_object_free(obj);
+}
-- 
cgit v1.2.1


From d1e446550a966a1dbc5d765aa79fe9bc47a1c1a3 Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Thu, 30 Nov 2017 15:52:47 +0000
Subject: object: introduce git_object_stringn2type

Introduce an internal API to get the object type based on a
length-specified (not null terminated) string representation.  This can
be used to compare the (space terminated) object type name in a loose
object.

Reimplement `git_object_string2type` based on this API.
---
 src/object.c | 13 +++++++++++--
 src/object.h |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/object.c b/src/object.c
index 4d069a34c..48f561384 100644
--- a/src/object.c
+++ b/src/object.c
@@ -235,14 +235,23 @@ const char *git_object_type2string(git_otype type)
 }
 
 git_otype git_object_string2type(const char *str)
+{
+	if (!str)
+		return GIT_OBJ_BAD;
+
+	return git_object_stringn2type(str, strlen(str));
+}
+
+git_otype git_object_stringn2type(const char *str, size_t len)
 {
 	size_t i;
 
-	if (!str || !*str)
+	if (!str || !len || !*str)
 		return GIT_OBJ_BAD;
 
 	for (i = 0; i < ARRAY_SIZE(git_objects_table); i++)
-		if (!strcmp(str, git_objects_table[i].str))
+		if (*git_objects_table[i].str &&
+			!git__prefixncmp(str, len, git_objects_table[i].str))
 			return (git_otype)i;
 
 	return GIT_OBJ_BAD;
diff --git a/src/object.h b/src/object.h
index ff61c1d33..e46c9cafa 100644
--- a/src/object.h
+++ b/src/object.h
@@ -30,6 +30,8 @@ int git_object__from_odb_object(
 
 int git_object__resolve_to_type(git_object **obj, git_otype type);
 
+git_otype git_object_stringn2type(const char *str, size_t len);
+
 int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header);
 
 void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid);
-- 
cgit v1.2.1


From ddefea750adcde06867b49d251760844540919fe Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Thu, 30 Nov 2017 15:55:59 +0000
Subject: odb: support large loose objects

zlib will only inflate/deflate an `int`s worth of data at a time.
We need to loop through large files in order to ensure that we inflate
the entire file, not just an `int`s worth of data.  Thankfully, we
already have this loop in our `git_zstream` layer.  Handle large objects
using the `git_zstream`.
---
 src/odb_loose.c | 190 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 92 insertions(+), 98 deletions(-)

diff --git a/src/odb_loose.c b/src/odb_loose.c
index 72b47f091..2294931b4 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -16,6 +16,7 @@
 #include "delta.h"
 #include "filebuf.h"
 #include "object.h"
+#include "zstream.h"
 
 #include "git2/odb_backend.h"
 #include "git2/types.h"
@@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
 	return used;
 }
 
-static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
+static int parse_header(
+	obj_hdr *out,
+       	size_t *out_len,
+	const unsigned char *_data,
+	size_t data_len)
 {
-	char c, typename[10];
-	size_t size, used = 0;
+	const char *data = (char *)_data;
+	size_t i, typename_len, size_idx, size_len;
+	int64_t size;
 
-	/*
-	 * type name string followed by space.
-	 */
-	while ((c = data[used]) != ' ') {
-		typename[used++] = c;
-		if (used >= sizeof(typename))
-			return 0;
+	*out_len = 0;
+
+	/* find the object type name */
+	for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
+		if (data[i] == ' ')
+			break;
 	}
-	typename[used] = 0;
-	if (used == 0)
-		return 0;
-	hdr->type = git_object_string2type(typename);
-	used++; /* consume the space */
 
-	/*
-	 * length follows immediately in decimal (without
-	 * leading zeros).
-	 */
-	size = data[used++] - '0';
-	if (size > 9)
-		return 0;
-	if (size) {
-		while ((c = data[used]) != '\0') {
-			size_t d = c - '0';
-			if (d > 9)
-				break;
-			used++;
-			size = size * 10 + d;
-		}
+	if (typename_len == data_len)
+		goto on_error;
+
+	out->type = git_object_stringn2type(data, typename_len);
+
+	size_idx = typename_len + 1;
+	for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
+		if (data[i] == '\0')
+			break;
 	}
-	hdr->size = size;
 
-	/*
-	 * the length must be followed by a zero byte
-	 */
-	if (data[used++] != '\0')
-		return 0;
+	if (i == data_len)
+		goto on_error;
 
-	return used;
-}
+	if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
+		size < 0)
+		goto on_error;
+
+	out->size = size;
 
+	if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
+		goto on_error;
+
+	return 0;
 
+on_error:
+	giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
+	return -1;
+}
 
 /***********************************************************
  *
@@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
 	return 0;
 }
 
-static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
-{
-	unsigned char *buf, *head = hb;
-	size_t tail, alloc_size;
-
-	/*
-	 * allocate a buffer to hold the inflated data and copy the
-	 * initial sequence of inflated data from the tail of the
-	 * head buffer, if any.
-	 */
-	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
-		(buf = git__malloc(alloc_size)) == NULL) {
-		inflateEnd(s);
-		return NULL;
-	}
-	tail = s->total_out - used;
-	if (used > 0 && tail > 0) {
-		if (tail > hdr->size)
-			tail = hdr->size;
-		memcpy(buf, head + used, tail);
-	}
-	used = tail;
-
-	/*
-	 * inflate the remainder of the object data, if any
-	 */
-	if (hdr->size < used)
-		inflateEnd(s);
-	else {
-		set_stream_output(s, buf + used, hdr->size - used);
-		if (finish_inflate(s)) {
-			git__free(buf);
-			return NULL;
-		}
-	}
-
-	return buf;
-}
-
 /*
  * At one point, there was a loose object format that was intended to
  * mimic the format used in pack-files. This was to allow easy copying
@@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
 
 static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
 {
-	unsigned char head[64], *buf;
-	z_stream zs;
+	git_zstream zstream = GIT_ZSTREAM_INIT;
+	unsigned char head[64], *body = NULL;
+	size_t decompressed, head_len, body_len, alloc_size;
 	obj_hdr hdr;
-	size_t used;
+	int error;
 
-	/*
-	 * check for a pack-like loose object
-	 */
+	/* check for a pack-like loose object */
 	if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
 		return inflate_packlike_loose_disk_obj(out, obj);
 
+	if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
+		(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
+		goto done;
+
+	decompressed = sizeof(head);
+
 	/*
-	 * inflate the initial part of the io buffer in order
-	 * to parse the object header (type and size).
-	 */
-	if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK ||
-		(used = get_object_header(&hdr, head)) == 0 ||
-		!git_object_typeisloose(hdr.type))
-	{
-		abort_inflate(&zs);
+	* inflate the initial part of the compressed buffer in order to parse the
+	* header; read the largest header possible, then push back the remainder.
+	*/
+	if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
+		(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
+		goto done;
+
+	if (!git_object_typeisloose(hdr.type)) {
 		giterr_set(GITERR_ODB, "failed to inflate disk object");
-		return -1;
+		error = -1;
+		goto done;
 	}
 
 	/*
 	 * allocate a buffer and inflate the object data into it
 	 * (including the initial sequence in the head buffer).
 	 */
-	if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
-		return -1;
-	buf[hdr.size] = '\0';
+	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
+		(body = git__malloc(alloc_size)) == NULL) {
+		error = -1;
+		goto done;
+	}
 
-	out->data = buf;
+	assert(decompressed >= head_len);
+	body_len = decompressed - head_len;
+
+	if (body_len)
+		memcpy(body, head + head_len, body_len);
+
+	decompressed = hdr.size - body_len;
+	if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
+		goto done;
+
+	if (!git_zstream_done(&zstream)) {
+		giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
+		error = -1;
+		goto done;
+	}
+
+	body[hdr.size] = '\0';
+
+	out->data = body;
 	out->len = hdr.size;
 	out->type = hdr.type;
 
-	return 0;
+done:
+	if (error < 0)
+		git__free(body);
+
+	git_zstream_free(&zstream);
+	return error;
 }
 
 
@@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
 	git_file fd;
 	z_stream zs;
 	obj_hdr header_obj;
+	size_t header_len;
 	unsigned char raw_buffer[16], inflated_buffer[64];
 
 	assert(out && loc);
@@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
 	}
 
 	if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
-		|| get_object_header(&header_obj, inflated_buffer) == 0
+		|| parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
 		|| git_object_typeisloose(header_obj.type) == 0)
 	{
 		giterr_set(GITERR_ZLIB, "failed to read loose object header");
-- 
cgit v1.2.1


From 8642feba7429ac2941a879a0870a84a83a3664cd Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Sun, 10 Dec 2017 17:23:44 +0000
Subject: zstream: use UINT_MAX sized chunks

Instead of paging to zlib in INT_MAX sized chunks, we can give it
as many as UINT_MAX bytes at a time.  zlib doesn't care how big
a buffer we give it, this simply results in fewer calls into zlib.
---
 src/zstream.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/zstream.c b/src/zstream.c
index 1c9d506b1..963c9a344 100644
--- a/src/zstream.c
+++ b/src/zstream.c
@@ -103,8 +103,9 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
 		/* set up in data */
 		zstream->z.next_in  = (Bytef *)zstream->in;
 		zstream->z.avail_in = (uInt)zstream->in_len;
+
 		if ((size_t)zstream->z.avail_in != zstream->in_len) {
-			zstream->z.avail_in = INT_MAX;
+			zstream->z.avail_in = UINT_MAX;
 			zflush = Z_NO_FLUSH;
 		} else {
 			zflush = Z_FINISH;
@@ -115,7 +116,7 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
 		zstream->z.next_out = out;
 		zstream->z.avail_out = (uInt)out_remain;
 		if ((size_t)zstream->z.avail_out != out_remain)
-			zstream->z.avail_out = INT_MAX;
+			zstream->z.avail_out = UINT_MAX;
 		out_queued = (size_t)zstream->z.avail_out;
 
 		/* compress next chunk */
-- 
cgit v1.2.1


From 3e6533ba12c1c567f91efe621bdd155ff801877c Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Sun, 10 Dec 2017 17:25:00 +0000
Subject: odb_loose: reject objects that cannot fit in memory

Check the size of objects being read from the loose odb backend and
reject those that would not fit in memory with an error message that
reflects the actual problem, instead of error'ing later with an
unintuitive error message regarding truncation or invalid hashes.
---
 src/odb_loose.c        |  5 +++++
 tests/odb/largefiles.c | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/odb_loose.c b/src/odb_loose.c
index 2294931b4..9900aae2a 100644
--- a/src/odb_loose.c
+++ b/src/odb_loose.c
@@ -156,6 +156,11 @@ static int parse_header(
 		size < 0)
 		goto on_error;
 
+	if ((uint64_t)size > SIZE_MAX) {
+		giterr_set(GITERR_OBJECT, "object is larger than available memory");
+		return -1;
+	}
+
 	out->size = size;
 
 	if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c
index 9a91cf1f0..dc987c473 100644
--- a/tests/odb/largefiles.c
+++ b/tests/odb/largefiles.c
@@ -93,3 +93,24 @@ void test_odb_largefiles__read_into_memory(void)
 
 	git_odb_object_free(obj);
 }
+
+void test_odb_largefiles__read_into_memory_rejected_on_32bit(void)
+{
+	git_oid oid;
+	git_odb_object *obj = NULL;
+
+#ifdef GIT_ARCH_64
+	cl_skip();
+#endif
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
+		cl_skip();
+
+	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+		cl_skip();
+
+	writefile(&oid);
+	cl_git_fail(git_odb_read(&obj, odb, &oid));
+
+	git_odb_object_free(obj);
+}
-- 
cgit v1.2.1


From a89560d5693a2f43cc852cb5806df837dc79b790 Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Sun, 10 Dec 2017 17:26:43 +0000
Subject: hash: win32 hash mechanism should support large files

Teach the win32 hash mechanisms to support large files.  The hash
primitives take at most `ULONG_MAX` bytes at a time.  Loop, giving the
hash function the maximum supported number of bytes, until we have
hashed the entire file.
---
 src/hash/hash_win32.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/hash/hash_win32.c b/src/hash/hash_win32.c
index 4d53a57bd..20ba9a5fe 100644
--- a/src/hash/hash_win32.c
+++ b/src/hash/hash_win32.c
@@ -136,12 +136,21 @@ GIT_INLINE(int) hash_cryptoapi_init(git_hash_ctx *ctx)
 	return 0;
 }
 
-GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *data, size_t len)
+GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *_data, size_t len)
 {
+	const BYTE *data = (BYTE *)_data;
+
 	assert(ctx->ctx.cryptoapi.valid);
 
-	if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, (const BYTE *)data, (DWORD)len, 0))
-		return -1;
+	while (len > 0) {
+		DWORD chunk = (len > MAXDWORD) ? MAXDWORD : (DWORD)len;
+
+		if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, data, chunk, 0))
+			return -1;
+
+		data += chunk;
+		len -= chunk;
+	}
 
 	return 0;
 }
@@ -202,10 +211,19 @@ GIT_INLINE(int) hash_cng_init(git_hash_ctx *ctx)
 	return 0;
 }
 
-GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *data, size_t len)
+GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *_data, size_t len)
 {
-	if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, (PBYTE)data, (ULONG)len, 0) < 0)
-		return -1;
+	PBYTE data = (PBYTE)_data;
+
+	while (len > 0) {
+		ULONG chunk = (len > ULONG_MAX) ? ULONG_MAX : (ULONG)len;
+
+		if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, data, chunk, 0) < 0)
+			return -1;
+
+		data += chunk;
+		len -= chunk;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.1


From bdb542143909fc278c8ba89b0c64cdf72fcaf7d2 Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Mon, 11 Dec 2017 16:46:05 +0000
Subject: hash: commoncrypto hash should support large files

Teach the CommonCrypto hash mechanisms to support large files.  The hash
primitives take a `CC_LONG` (aka `uint32_t`) at a time.  So loop to give
the hash function at most an unsigned 32 bit's worth of bytes until we
have hashed the entire file.
---
 src/hash/hash_common_crypto.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/hash/hash_common_crypto.h b/src/hash/hash_common_crypto.h
index eeeddd0cc..4cd229d3c 100644
--- a/src/hash/hash_common_crypto.h
+++ b/src/hash/hash_common_crypto.h
@@ -16,6 +16,8 @@ struct git_hash_ctx {
 	CC_SHA1_CTX c;
 };
 
+#define CC_LONG_MAX ((CC_LONG)-1)
+
 #define git_hash_global_init() 0
 #define git_hash_ctx_init(ctx) git_hash_init(ctx)
 #define git_hash_ctx_cleanup(ctx)
@@ -27,10 +29,21 @@ GIT_INLINE(int) git_hash_init(git_hash_ctx *ctx)
 	return 0;
 }
 
-GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *data, size_t len)
+GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *_data, size_t len)
 {
+	const unsigned char *data = _data;
+
 	assert(ctx);
-	CC_SHA1_Update(&ctx->c, data, len);
+
+	while (len > 0) {
+		CC_LONG chunk = (len > CC_LONG_MAX) ? CC_LONG_MAX : (CC_LONG)len;
+
+		CC_SHA1_Update(&ctx->c, data, chunk);
+
+		data += chunk;
+		len -= chunk;
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.1


From 456e52189c95315028d668f9e508798d490765e2 Mon Sep 17 00:00:00 2001
From: Edward Thomson <ethomson@edwardthomson.com>
Date: Wed, 20 Dec 2017 16:13:31 +0000
Subject: tests: add GITTEST_SLOW env var check

Writing very large files may be slow, particularly on inefficient
filesystems and when running instrumented code to detect invalid memory
accesses (eg within valgrind or similar tools).

Introduce `GITTEST_SLOW` so that tests that are slow can be skipped by
the CI system.
---
 tests/odb/largefiles.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/tests/odb/largefiles.c b/tests/odb/largefiles.c
index dc987c473..22f136df5 100644
--- a/tests/odb/largefiles.c
+++ b/tests/odb/largefiles.c
@@ -45,10 +45,9 @@ void test_odb_largefiles__write_from_memory(void)
 	cl_skip();
 #endif
 
-	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
-		cl_skip();
-
-	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+		!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
+		!cl_is_env_set("GITTEST_SLOW"))
 		cl_skip();
 
 	for (i = 0; i < (3041*126103); i++)
@@ -64,7 +63,8 @@ void test_odb_largefiles__streamwrite(void)
 {
 	git_oid expected, oid;
 
-	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+		!cl_is_env_set("GITTEST_SLOW"))
 		cl_skip();
 
 	git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c");
@@ -82,10 +82,9 @@ void test_odb_largefiles__read_into_memory(void)
 	cl_skip();
 #endif
 
-	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
-		cl_skip();
-
-	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+		!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
+		!cl_is_env_set("GITTEST_SLOW"))
 		cl_skip();
 
 	writefile(&oid);
@@ -103,10 +102,9 @@ void test_odb_largefiles__read_into_memory_rejected_on_32bit(void)
 	cl_skip();
 #endif
 
-	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE"))
-		cl_skip();
-
-	if (!cl_is_env_set("GITTEST_INVASIVE_MEMORY"))
+	if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
+		!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
+		!cl_is_env_set("GITTEST_SLOW"))
 		cl_skip();
 
 	writefile(&oid);
-- 
cgit v1.2.1