summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2014-08-16 10:08:05 +0700
committerJunio C Hamano <gitster@pobox.com>2014-08-18 10:16:45 -0700
commit6bf3b813486b4528feca39d599c256f662defc14 (patch)
treeaf16bbb30e82a1beff126bb686d6d478c2f9eaa7
parent8e5dd3d654ebb9e86e06b9ef5ca86bd6ebf44de1 (diff)
downloadgit-6bf3b813486b4528feca39d599c256f662defc14.tar.gz
diff --stat: mark any file larger than core.bigfilethreshold binary
Too large files may lead to failure to allocate memory. If it happens here, it could impact quite a few commands that involve diff. Moreover, too large files are inefficient to compare anyway (and most likely non-text), so mark them binary and skip looking at their content. Noticed-by: Dale R. Worley <worley@alum.mit.edu> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Documentation/config.txt3
-rw-r--r--Documentation/gitattributes.txt4
-rw-r--r--diff.c26
-rw-r--r--diffcore.h1
-rwxr-xr-xt/t1050-large.sh4
5 files changed, 27 insertions, 11 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index c55c22ab7b..3b5b24aeb7 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression. Storing large files without
delta compression avoids excessive memory usage, at the
- slight expense of increased disk usage.
+ slight expense of increased disk usage. Additionally files
+ larger than this size are always treated as binary.
+
Default is 512 MiB on all platforms. This should be reasonable
for most projects as source code and other text files can still
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 643c1ba929..9b45bda748 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -440,8 +440,8 @@ Unspecified::
A path to which the `diff` attribute is unspecified
first gets its contents inspected, and if it looks like
- text, it is treated as text. Otherwise it would
- generate `Binary files differ`.
+ text and is smaller than core.bigFileThreshold, it is treated
+ as text. Otherwise it would generate `Binary files differ`.
String::
diff --git a/diff.c b/diff.c
index f4b7421fa6..d381a6f446 100644
--- a/diff.c
+++ b/diff.c
@@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
- diff_populate_filespec(one, 0);
- if (one->data)
+ diff_populate_filespec(one, CHECK_BINARY);
+ if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
if (one->is_binary == -1)
@@ -2725,6 +2725,11 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
}
if (size_only)
return 0;
+ if ((flags & CHECK_BINARY) &&
+ s->size > big_file_threshold && s->is_binary == -1) {
+ s->is_binary = 1;
+ return 0;
+ }
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
@@ -2746,16 +2751,21 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
}
else {
enum object_type type;
- if (size_only) {
+ if (size_only || (flags & CHECK_BINARY)) {
type = sha1_object_info(s->sha1, &s->size);
if (type < 0)
die("unable to read %s", sha1_to_hex(s->sha1));
- } else {
- s->data = read_sha1_file(s->sha1, &type, &s->size);
- if (!s->data)
- die("unable to read %s", sha1_to_hex(s->sha1));
- s->should_free = 1;
+ if (size_only)
+ return 0;
+ if (s->size > big_file_threshold && s->is_binary == -1) {
+ s->is_binary = 1;
+ return 0;
+ }
}
+ s->data = read_sha1_file(s->sha1, &type, &s->size);
+ if (!s->data)
+ die("unable to read %s", sha1_to_hex(s->sha1));
+ s->should_free = 1;
}
return 0;
}
diff --git a/diffcore.h b/diffcore.h
index c80df18f3c..33ea2de348 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
int, unsigned short);
#define CHECK_SIZE_ONLY 1
+#define CHECK_BINARY 2
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
extern void diff_free_filespec_data(struct diff_filespec *);
extern void diff_free_filespec_blob(struct diff_filespec *);
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 5642f84b83..00d2f33df0 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
'
+test_expect_success 'diff --stat' '
+ git diff --stat HEAD^ HEAD
+'
+
test_expect_success 'hash-object' '
git hash-object large1
'