diff options
author | Nguyễn Thái Ngọc Duy <pclouds@gmail.com> | 2014-08-16 10:08:05 +0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-08-18 10:16:45 -0700 |
commit | 6bf3b813486b4528feca39d599c256f662defc14 (patch) | |
tree | af16bbb30e82a1beff126bb686d6d478c2f9eaa7 | |
parent | 8e5dd3d654ebb9e86e06b9ef5ca86bd6ebf44de1 (diff) | |
download | git-6bf3b813486b4528feca39d599c256f662defc14.tar.gz |
diff --stat: mark any file larger than core.bigfilethreshold binary
Too large files may lead to failure to allocate memory. If it happens
here, it could impact quite a few commands that involve
diff. Moreover, too large files are inefficient to compare anyway (and
most likely non-text), so mark them binary and skip looking at their
content.
Noticed-by: Dale R. Worley <worley@alum.mit.edu>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r-- | Documentation/config.txt | 3 | ||||
-rw-r--r-- | Documentation/gitattributes.txt | 4 | ||||
-rw-r--r-- | diff.c | 26 | ||||
-rw-r--r-- | diffcore.h | 1 | ||||
-rwxr-xr-x | t/t1050-large.sh | 4 |
5 files changed, 27 insertions, 11 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index c55c22ab7b..3b5b24aeb7 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -499,7 +499,8 @@ core.bigFileThreshold:: Files larger than this size are stored deflated, without attempting delta compression. Storing large files without delta compression avoids excessive memory usage, at the - slight expense of increased disk usage. + slight expense of increased disk usage. Additionally files + larger than this size are always treated as binary. + Default is 512 MiB on all platforms. This should be reasonable for most projects as source code and other text files can still diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 643c1ba929..9b45bda748 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -440,8 +440,8 @@ Unspecified:: A path to which the `diff` attribute is unspecified first gets its contents inspected, and if it looks like - text, it is treated as text. Otherwise it would - generate `Binary files differ`. + text and is smaller than core.bigFileThreshold, it is treated + as text. Otherwise it would generate `Binary files differ`. String:: @@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one) one->is_binary = one->driver->binary; else { if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(one, 0); - if (one->data) + diff_populate_filespec(one, CHECK_BINARY); + if (one->is_binary == -1 && one->data) one->is_binary = buffer_is_binary(one->data, one->size); if (one->is_binary == -1) @@ -2725,6 +2725,11 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags) } if (size_only) return 0; + if ((flags & CHECK_BINARY) && + s->size > big_file_threshold && s->is_binary == -1) { + s->is_binary = 1; + return 0; + } fd = open(s->path, O_RDONLY); if (fd < 0) goto err_empty; @@ -2746,16 +2751,21 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags) } else { enum object_type type; - if (size_only) { + if (size_only || (flags & CHECK_BINARY)) { type = sha1_object_info(s->sha1, &s->size); if (type < 0) die("unable to read %s", sha1_to_hex(s->sha1)); - } else { - s->data = read_sha1_file(s->sha1, &type, &s->size); - if (!s->data) - die("unable to read %s", sha1_to_hex(s->sha1)); - s->should_free = 1; + if (size_only) + return 0; + if (s->size > big_file_threshold && s->is_binary == -1) { + s->is_binary = 1; + return 0; + } } + s->data = read_sha1_file(s->sha1, &type, &s->size); + if (!s->data) + die("unable to read %s", sha1_to_hex(s->sha1)); + s->should_free = 1; } return 0; } diff --git a/diffcore.h b/diffcore.h index c80df18f3c..33ea2de348 100644 --- a/diffcore.h +++ b/diffcore.h @@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *, int, unsigned short); #define CHECK_SIZE_ONLY 1 +#define CHECK_BINARY 2 extern int diff_populate_filespec(struct diff_filespec *, unsigned int); extern void diff_free_filespec_data(struct diff_filespec *); extern void diff_free_filespec_blob(struct diff_filespec *); diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 5642f84b83..00d2f33df0 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -112,6 +112,10 @@ test_expect_success 'diff --raw' ' git diff --raw HEAD^ ' +test_expect_success 'diff --stat' ' + git diff --stat HEAD^ HEAD +' + test_expect_success 'hash-object' ' git hash-object large1 ' |