summaryrefslogtreecommitdiff
path: root/tests/multibyte-white-space
diff options
context:
space:
mode:
Diffstat (limited to 'tests/multibyte-white-space')
-rwxr-xr-xtests/multibyte-white-space51
1 files changed, 51 insertions, 0 deletions
diff --git a/tests/multibyte-white-space b/tests/multibyte-white-space
new file mode 100755
index 00000000..df2fe1b8
--- /dev/null
+++ b/tests/multibyte-white-space
@@ -0,0 +1,51 @@
+#! /bin/sh
+# Test whether \s matches SP and UTF-8 multi-byte white space characters.
+#
+# Copyright (C) 2013 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+require_en_utf8_locale_
+
+LC_ALL=en_US.UTF-8
+export LC_ALL
+
+# FIXME: including any the following in the list below would
+# make this test fail on Fedora 19/glibc-2.17-18.fc19.
+# Restore them to the list once it is fixed.
+these_fail_with_glibc='
+U+00A0 NO-BREAK SPACE: c2 a0
+U+2007 FIGURE SPACE: e2 80 87
+U+200B ZERO WIDTH SPACE: e2 80 8b
+U+202F NARROW NO-BREAK SPACE: e2 80 af
+'
+
+utf8_space_characters=$(sed 's/.*://;s/ /\\x/g' <<\EOF
+U+0020 SPACE: 20
+U+1680 OGHAM SPACE MARK: e1 9a 80
+U+2000 EN QUAD: e2 80 80
+U+2001 EM QUAD: e2 80 81
+U+2002 EN SPACE: e2 80 82
+U+2003 EM SPACE: e2 80 83
+U+2004 THREE-PER-EM SPACE: e2 80 84
+U+2005 FOUR-PER-EM SPACE: e2 80 85
+U+2006 SIX-PER-EM SPACE: e2 80 86
+U+2008 PUNCTUATION SPACE: e2 80 88
+U+2009 THIN SPACE: e2 80 89
+U+200A HAIR SPACE: e2 80 8a
+U+205F MEDIUM MATHEMATICAL SPACE: e2 81 9f
+U+3000 IDEOGRAPHIC SPACE: e3 80 80
+EOF
+)
+
+fail=0
+
+for i in $utf8_space_characters; do
+ printf "$i\n" | grep -q '^\s$' || { warn_ "$i FAILED"; fail=1; }
+done
+
+Exit $fail