diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2022-05-13 23:46:21 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2022-05-13 23:48:18 -0700 |
commit | 5447010fdbdf3f1a874689dd41a7c916bb262b2a (patch) | |
tree | 9f58605715e7de18e16fcc9bb2d6c99e52e34482 | |
parent | ef6c7768b300678895348ba7c827fa919e3f1d5c (diff) | |
download | grep-5447010fdbdf3f1a874689dd41a7c916bb262b2a.tar.gz |
grep: fix bug with . and some Hangul Syllables
* NEWS: Mention the fix, which comes from the recent Gnulib update.
* tests/hangul-syllable: New file.
* tests/Makefile.am (TESTS): Add it.
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | tests/Makefile.am | 1 | ||||
-rwxr-xr-x | tests/hangul-syllable | 88 |
3 files changed, 96 insertions, 0 deletions
@@ -13,6 +13,13 @@ GNU grep NEWS -*- outline -*- ** Bug fixes + In locales using UTF-8 encoding, the regular expression '.' no + longer sometimes fails to match Unicode characters U+D400 through + U+D7FF (some Hangul Syllables, and Hangul Jamo Extended-B) and + Unicode characters U+108000 through U+10FFFF (half of Supplemental + Private Use Area plane B). + [bug introduced in grep 3.4] + The -s option no longer suppresses "binary file matches" messages. [Bug#51860 introduced in grep 3.5] diff --git a/tests/Makefile.am b/tests/Makefile.am index 708980df..d72637f7 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -110,6 +110,7 @@ TESTS = \ grep-dev-null \ grep-dev-null-out \ grep-dir \ + hangul-syllable \ hash-collision-perf \ help-version \ high-bit-range \ diff --git a/tests/hangul-syllable b/tests/hangul-syllable new file mode 100755 index 00000000..9f94d2eb --- /dev/null +++ b/tests/hangul-syllable @@ -0,0 +1,88 @@ +#!/bin/sh +# grep 3.4 through 3.7 mishandled matching '.' against the valid UTF-8 +# sequences (ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF, +# which are some Hangul Syllables and Hangul Jamo Extended-B. They +# also mishandled (F4)(88-8F)(80-BF)(80-BF) which correspond to +# U+108000 through U+10FFFF (Supplemental Private Use Area plane B). + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +require_en_utf8_locale_ + +LC_ALL=en_US.UTF-8 +export LC_ALL + +check_char () +{ + printf "$1\\n" >in || framewmork_failure_ + + grep $2 '^.$' in >out || fail=1 + cmp in out || fail=1 +} + +fail=0 + +# "." should match U+D45C HANGUL SYLLABLE PYO. +check_char '\355\221\234' + +# Check boundary-condition characters +# while we are at it. + +check_char '\0' -a +check_char '\177' + +for i in 302 337; do + for j in 200 277; do + check_char "\\$i\\$j" + done +done +for i in 340; do + for j in 240 277; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 341 354 356 357; do + for j in 200 277; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 355; do + for j in 200 237; do + for k in 200 277; do + check_char "\\$i\\$j\\$k" + done + done +done +for i in 360; do + for j in 220 277; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done +for i in 361 363; do + for j in 200 277; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done +for i in 364; do + for j in 200 217; do + for k in 200 277; do + for l in 200 277; do + check_char "\\$i\\$j\\$k\\$l" + done + done + done +done + +Exit $fail |