diff options
author | Pádraig Brady <P@draigBrady.com> | 2022-02-23 17:50:46 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2022-02-24 13:58:06 +0000 |
commit | 6dc702928ec84b9d511396e756184403fd96cc6c (patch) | |
tree | f1814e946aa8f44d172229f1dc6be0a72d64fa35 | |
parent | 677fb3e4aba37bc5c6c7a13908e1f5d0f0ded743 (diff) | |
download | coreutils-6dc702928ec84b9d511396e756184403fd96cc6c.tar.gz |
fmt: fix invalid multi-byte splitting on macOS
On macOS, isspace(0x85) returns true,
which results in splitting within multi-byte characters.
* src/fmt.c (get_line): s/isspace/c_isspace/.
* tests/fmt/non-space.sh: Add a new test.
* tests/local.mk: Reference new test.
* NEWS: Mention the fix.
Addresses https://bugs.gnu.org/54124
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | src/fmt.c | 3 | ||||
-rwxr-xr-x | tests/fmt/non-space.sh | 49 | ||||
-rw-r--r-- | tests/local.mk | 3 |
4 files changed, 57 insertions, 2 deletions
@@ -21,6 +21,10 @@ GNU coreutils NEWS -*- outline -*- and B is in some other file system. [bug introduced in coreutils-9.0] + On macOS, fmt no longer corrupts multi-byte characters + by misdetecting their component bytes as spaces. + [This bug was present in "the beginning".] + 'id xyz' now uses the name 'xyz' to determine groups, instead of xyz's uid. [bug introduced in coreutils-8.22] @@ -26,6 +26,7 @@ it to be a type get syntax errors for the variable declaration below. */ #define word unused_word_type +#include "c-ctype.h" #include "system.h" #include "error.h" #include "die.h" @@ -702,7 +703,7 @@ get_line (FILE *f, int c) *wptr++ = c; c = getc (f); } - while (c != EOF && !isspace (c)); + while (c != EOF && !c_isspace (c)); in_column += word_limit->length = wptr - word_limit->text; check_punctuation (word_limit); diff --git a/tests/fmt/non-space.sh b/tests/fmt/non-space.sh new file mode 100755 index 000000000..093c9393a --- /dev/null +++ b/tests/fmt/non-space.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# Test fmt space handling + +# Copyright (C) 2022 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ fmt printf + +# Before coreutils 9.1 macOS treated bytes like 0x85 +# as space characters in multi-byte locales (including UTF-8) + +check_non_space() { + char="$1" + test "$(env printf "=$char=" | fmt -s -w1 | wc -l)" = 1 || fail=1 +} + +export LC_ALL=en_US.iso8859-1 # only lowercase form works on macOS 10.15.7 +if test "$(locale charmap 2>/dev/null | sed 's/iso/ISO-/')" = ISO-8859-1; then + check_non_space '\xA0' +fi + +export LC_ALL=en_US.UTF-8 +if test "$(locale charmap 2>/dev/null)" = UTF-8; then + check_non_space '\u00A0' # No break space + check_non_space '\u2007' # TODO: should probably split on figure space + check_non_space '\u202F' # Narrow no break space + check_non_space '\u2060' # zero-width no break space + check_non_space '\u0445' # Cyrillic kha, for which macOS isspace(0x85)==true +fi + +export LC_ALL=ru_RU.KOI8-R +if test "$(locale charmap 2>/dev/null)" = KOI8-R; then + check_non_space '\x9A' +fi + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk index f1376fb71..f97ddcb98 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -237,8 +237,9 @@ all_tests = \ tests/chgrp/posix-H.sh \ tests/chgrp/recurse.sh \ tests/fmt/base.pl \ - tests/fmt/long-line.sh \ tests/fmt/goal-option.sh \ + tests/fmt/long-line.sh \ + tests/fmt/non-space.sh \ tests/misc/echo.sh \ tests/misc/env.sh \ tests/misc/env-signal-handler.sh \ |