summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2014-09-11 10:33:32 -0700
committerJunio C Hamano <gitster@pobox.com>2014-09-11 10:33:33 -0700
commitbedd3b4b7b25df0b994abf444ee2136995dfeffa (patch)
tree1f5866f73cce9cf92d602f8d61fb6a52d1c3d3c9
parent08ad26a63deaa7eaaaa987ed2cc84ac0f83c080b (diff)
parent1aaf69e669b7fd67073d3024b386ac25ac77d0f8 (diff)
downloadgit-bedd3b4b7b25df0b994abf444ee2136995dfeffa.tar.gz
Merge branch 'nd/large-blobs'
Teach a few codepaths to punt (instead of dying) when large blobs that would not fit in core are involved in the operation. * nd/large-blobs: diff: shortcut for diff'ing two binary SHA-1 objects diff --stat: mark any file larger than core.bigfilethreshold binary diff.c: allow to pass more flags to diff_populate_filespec sha1_file.c: do not die failing to malloc in unpack_compressed_entry wrapper.c: introduce gentle xmallocz that does not die()
-rw-r--r--Documentation/config.txt3
-rw-r--r--Documentation/gitattributes.txt4
-rw-r--r--diff.c52
-rw-r--r--diffcore-rename.c6
-rw-r--r--diffcore.h4
-rw-r--r--git-compat-util.h1
-rw-r--r--sha1_file.c4
-rwxr-xr-xt/t1050-large.sh20
-rw-r--r--wrapper.c68
9 files changed, 125 insertions, 37 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index c55c22ab7b..3b5b24aeb7 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression. Storing large files without
delta compression avoids excessive memory usage, at the
- slight expense of increased disk usage.
+ slight expense of increased disk usage. Additionally files
+ larger than this size are always treated as binary.
+
Default is 512 MiB on all platforms. This should be reasonable
for most projects as source code and other text files can still
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 643c1ba929..9b45bda748 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -440,8 +440,8 @@ Unspecified::
A path to which the `diff` attribute is unspecified
first gets its contents inspected, and if it looks like
- text, it is treated as text. Otherwise it would
- generate `Binary files differ`.
+ text and is smaller than core.bigFileThreshold, it is treated
+ as text. Otherwise it would generate `Binary files differ`.
String::
diff --git a/diff.c b/diff.c
index e7d4d4200f..d7a5c81bb8 100644
--- a/diff.c
+++ b/diff.c
@@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
{
if (!DIFF_FILE_VALID(one))
return 0;
- diff_populate_filespec(one, 1);
+ diff_populate_filespec(one, CHECK_SIZE_ONLY);
return one->size;
}
@@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
} else if (DIFF_FILE_VALID(p->one)) {
- diff_populate_filespec(p->one, 1);
+ diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
copied = added = 0;
diff_free_filespec_data(p->one);
} else if (DIFF_FILE_VALID(p->two)) {
- diff_populate_filespec(p->two, 1);
+ diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
copied = 0;
added = p->two->size;
diff_free_filespec_data(p->two);
@@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
- diff_populate_filespec(one, 0);
- if (one->data)
+ diff_populate_filespec(one, CHECK_BINARY);
+ if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
if (one->is_binary == -1)
@@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
} else if (!DIFF_OPT_TST(o, TEXT) &&
( (!textconv_one && diff_filespec_is_binary(one)) ||
(!textconv_two && diff_filespec_is_binary(two)) )) {
+ if (!one->data && !two->data &&
+ S_ISREG(one->mode) && S_ISREG(two->mode) &&
+ !DIFF_OPT_TST(o, BINARY)) {
+ if (!hashcmp(one->sha1, two->sha1)) {
+ if (must_show_header)
+ fprintf(o->file, "%s", header.buf);
+ goto free_ab_and_return;
+ }
+ fprintf(o->file, "%s", header.buf);
+ fprintf(o->file, "%sBinary files %s and %s differ\n",
+ line_prefix, lbl[0], lbl[1]);
+ goto free_ab_and_return;
+ }
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
die("unable to read files to diff");
/* Quite common confusing case */
@@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
* grab the data for the blob (or file) for our own in-core comparison.
* diff_filespec has data and size fields for this purpose.
*/
-int diff_populate_filespec(struct diff_filespec *s, int size_only)
+int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
{
+ int size_only = flags & CHECK_SIZE_ONLY;
int err = 0;
/*
* demote FAIL to WARN to allow inspecting the situation
@@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
if (size_only)
return 0;
+ if ((flags & CHECK_BINARY) &&
+ s->size > big_file_threshold && s->is_binary == -1) {
+ s->is_binary = 1;
+ return 0;
+ }
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
@@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
else {
enum object_type type;
- if (size_only) {
+ if (size_only || (flags & CHECK_BINARY)) {
type = sha1_object_info(s->sha1, &s->size);
if (type < 0)
die("unable to read %s", sha1_to_hex(s->sha1));
- } else {
- s->data = read_sha1_file(s->sha1, &type, &s->size);
- if (!s->data)
- die("unable to read %s", sha1_to_hex(s->sha1));
- s->should_free = 1;
+ if (size_only)
+ return 0;
+ if (s->size > big_file_threshold && s->is_binary == -1) {
+ s->is_binary = 1;
+ return 0;
+ }
}
+ s->data = read_sha1_file(s->sha1, &type, &s->size);
+ if (!s->data)
+ die("unable to read %s", sha1_to_hex(s->sha1));
+ s->should_free = 1;
}
return 0;
}
@@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
!DIFF_FILE_VALID(p->two) ||
(p->one->sha1_valid && p->two->sha1_valid) ||
(p->one->mode != p->two->mode) ||
- diff_populate_filespec(p->one, 1) ||
- diff_populate_filespec(p->two, 1) ||
+ diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
+ diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
(p->one->size != p->two->size) ||
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
p->skip_stat_unmatch_result = 1;
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 2e44a37459..4e132f1fdb 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
* is a possible size - we really should have a flag to
* say whether the size is valid or not!)
*/
- if (!src->cnt_data && diff_populate_filespec(src, 1))
+ if (!src->cnt_data &&
+ diff_populate_filespec(src, CHECK_SIZE_ONLY))
return 0;
- if (!dst->cnt_data && diff_populate_filespec(dst, 1))
+ if (!dst->cnt_data &&
+ diff_populate_filespec(dst, CHECK_SIZE_ONLY))
return 0;
max_size = ((src->size > dst->size) ? src->size : dst->size);
diff --git a/diffcore.h b/diffcore.h
index c876dac71a..33ea2de348 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
int, unsigned short);
-extern int diff_populate_filespec(struct diff_filespec *, int);
+#define CHECK_SIZE_ONLY 1
+#define CHECK_BINARY 2
+extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
extern void diff_free_filespec_data(struct diff_filespec *);
extern void diff_free_filespec_blob(struct diff_filespec *);
extern int diff_filespec_is_binary(struct diff_filespec *);
diff --git a/git-compat-util.h b/git-compat-util.h
index d675c89603..4e7e3f8726 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
extern char *xstrdup(const char *str);
extern void *xmalloc(size_t size);
extern void *xmallocz(size_t size);
+extern void *xmallocz_gently(size_t size);
extern void *xmemdupz(const void *data, size_t len);
extern char *xstrndup(const char *str, size_t len);
extern void *xrealloc(void *ptr, size_t size);
diff --git a/sha1_file.c b/sha1_file.c
index 95afd20910..c08c0cbea8 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
git_zstream stream;
unsigned char *buffer, *in;
- buffer = xmallocz(size);
+ buffer = xmallocz_gently(size);
+ if (!buffer)
+ return NULL;
memset(&stream, 0, sizeof(stream));
stream.next_out = buffer;
stream.avail_out = size + 1;
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index aea493646e..05a1e1d270 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
'
+test_expect_success 'diff --stat' '
+ git diff --stat HEAD^ HEAD
+'
+
+test_expect_success 'diff' '
+ git diff HEAD^ HEAD >actual &&
+ grep "Binary files.*differ" actual
+'
+
+test_expect_success 'diff --cached' '
+ git diff --cached HEAD^ >actual &&
+ grep "Binary files.*differ" actual
+'
+
test_expect_success 'hash-object' '
git hash-object large1
'
@@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'
+test_expect_success 'fsck' '
+ test_must_fail git fsck 2>err &&
+ n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
+ test "$n" -gt 1
+'
+
test_done
diff --git a/wrapper.c b/wrapper.c
index bd24cdabfb..25074d71b6 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -9,16 +9,23 @@ static void do_nothing(size_t size)
static void (*try_to_free_routine)(size_t size) = do_nothing;
-static void memory_limit_check(size_t size)
+static int memory_limit_check(size_t size, int gentle)
{
static int limit = -1;
if (limit == -1) {
const char *env = getenv("GIT_ALLOC_LIMIT");
limit = env ? atoi(env) * 1024 : 0;
}
- if (limit && size > limit)
- die("attempting to allocate %"PRIuMAX" over limit %d",
- (intmax_t)size, limit);
+ if (limit && size > limit) {
+ if (gentle) {
+ error("attempting to allocate %"PRIuMAX" over limit %d",
+ (intmax_t)size, limit);
+ return -1;
+ } else
+ die("attempting to allocate %"PRIuMAX" over limit %d",
+ (intmax_t)size, limit);
+ }
+ return 0;
}
try_to_free_t set_try_to_free_routine(try_to_free_t routine)
@@ -42,11 +49,12 @@ char *xstrdup(const char *str)
return ret;
}
-void *xmalloc(size_t size)
+static void *do_xmalloc(size_t size, int gentle)
{
void *ret;
- memory_limit_check(size);
+ if (memory_limit_check(size, gentle))
+ return NULL;
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
@@ -55,9 +63,16 @@ void *xmalloc(size_t size)
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
- if (!ret)
- die("Out of memory, malloc failed (tried to allocate %lu bytes)",
- (unsigned long)size);
+ if (!ret) {
+ if (!gentle)
+ die("Out of memory, malloc failed (tried to allocate %lu bytes)",
+ (unsigned long)size);
+ else {
+ error("Out of memory, malloc failed (tried to allocate %lu bytes)",
+ (unsigned long)size);
+ return NULL;
+ }
+ }
}
#ifdef XMALLOC_POISON
memset(ret, 0xA5, size);
@@ -65,16 +80,37 @@ void *xmalloc(size_t size)
return ret;
}
-void *xmallocz(size_t size)
+void *xmalloc(size_t size)
+{
+ return do_xmalloc(size, 0);
+}
+
+static void *do_xmallocz(size_t size, int gentle)
{
void *ret;
- if (unsigned_add_overflows(size, 1))
- die("Data too large to fit into virtual memory space.");
- ret = xmalloc(size + 1);
- ((char*)ret)[size] = 0;
+ if (unsigned_add_overflows(size, 1)) {
+ if (gentle) {
+ error("Data too large to fit into virtual memory space.");
+ return NULL;
+ } else
+ die("Data too large to fit into virtual memory space.");
+ }
+ ret = do_xmalloc(size + 1, gentle);
+ if (ret)
+ ((char*)ret)[size] = 0;
return ret;
}
+void *xmallocz(size_t size)
+{
+ return do_xmallocz(size, 0);
+}
+
+void *xmallocz_gently(size_t size)
+{
+ return do_xmallocz(size, 1);
+}
+
/*
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
* "data" to the allocated memory, zero terminates the allocated memory,
@@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
{
void *ret;
- memory_limit_check(size);
+ memory_limit_check(size, 0);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
@@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
{
void *ret;
- memory_limit_check(size * nmemb);
+ memory_limit_check(size * nmemb, 0);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);