1 files changed, 35 insertions, 1 deletions
diff --git a/contrib/diff-highlight/t/t9400-diff-highlight.sh b/contrib/diff-highlight/t/t9400-diff-highlight.sh
index e42ff31f3d..b0fe7cc8aa 100755
--- a/contrib/diff-highlight/t/t9400-diff-highlight.sh
+++ b/contrib/diff-highlight/t/t9400-diff-highlight.sh
@@ -207,7 +207,41 @@ test_expect_failure 'diff-highlight highlights mismatched hunk size' '
 	EOF
 '
 
-# TODO add multi-byte test
+# These two code points share the same leading byte in UTF-8 representation;
+# a naive byte-wise diff would highlight only the second byte.
+#
+#   - U+00f3 ("o" with acute)
+o_accent=$(printf '\303\263')
+#   - U+00f8 ("o" with stroke)
+o_stroke=$(printf '\303\270')
+
+test_expect_success 'diff-highlight treats multibyte utf-8 as a unit' '
+	echo "unic${o_accent}de" >a &&
+	echo "unic${o_stroke}de" >b &&
+	dh_test a b <<-EOF
+		@@ -1 +1 @@
+		-unic${CW}${o_accent}${CR}de
+		+unic${CW}${o_stroke}${CR}de
+	EOF
+'
+
+# Unlike the UTF-8 above, these are combining code points which are meant
+# to modify the character preceding them:
+#
+#   - U+0301 (combining acute accent)
+combine_accent=$(printf '\314\201')
+#   - U+0302 (combining circumflex)
+combine_circum=$(printf '\314\202')
+
+test_expect_failure 'diff-highlight treats combining code points as a unit' '
+	echo "unico${combine_accent}de" >a &&
+	echo "unico${combine_circum}de" >b &&
+	dh_test a b <<-EOF
+		@@ -1 +1 @@
+		-unic${CW}o${combine_accent}${CR}de
+		+unic${CW}o${combine_circum}${CR}de
+	EOF
+'
 
 test_expect_success 'diff-highlight works with the --graph option' '
 	dh_test_setup_history &&