summaryrefslogtreecommitdiff
path: root/src/util.c
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-02-22 22:55:50 -0800
committerStan Hu <stanhu@gmail.com>2018-05-05 14:54:27 -0700
commit9d83a2b08724211e564bffca740cd5fdc93d890e (patch)
tree769c2369cf8b20156ecc8d2d74791597a3ec271c /src/util.c
parent0ad2372b4309f511c48c8e293f1eec396468595a (diff)
downloadlibgit2-9d83a2b08724211e564bffca740cd5fdc93d890e.tar.gz
Sanitize the hunk header to ensure it contains UTF-8 valid data
The diff driver truncates the hunk header text to 80 bytes, which can truncate 4-byte Unicode characters and introduce garbage characters in the diff output. This change sanitizes the hunk header before it is displayed. This mirrors the test in git: https://github.com/git/git/blob/master/t/t4025-hunk-header.sh Closes https://github.com/libgit2/rugged/issues/716
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/util.c b/src/util.c
index 2955b7ca0..bf778a949 100644
--- a/src/util.c
+++ b/src/util.c
@@ -806,6 +806,22 @@ double git_time_monotonic(void)
return git__timer();
}
+size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len)
+{
+ size_t offset = 0;
+
+ while (offset < str_len) {
+ int length = git__utf8_charlen(str + offset, str_len - offset);
+
+ if (length < 0)
+ break;
+
+ offset += length;
+ }
+
+ return offset;
+}
+
#ifdef GIT_WIN32
int git__getenv(git_buf *out, const char *name)
{