diff options
author | Pádraig Brady <P@draigBrady.com> | 2022-03-18 14:52:36 +0000 |
---|---|---|
committer | Pádraig Brady <P@draigBrady.com> | 2022-03-19 16:57:07 +0000 |
commit | 6367cfe5fc513fb0832a3be18be8c9a813383321 (patch) | |
tree | 40ed6dc7c6fad24ae581602a7a80e23d5eef88c4 | |
parent | 28adf9cf58e51ad8711fbbc98fa322ee8d3114d9 (diff) | |
download | coreutils-6367cfe5fc513fb0832a3be18be8c9a813383321.tar.gz |
printf: support printing the numeric value of multi-byte chars
* src/printf.c (STRTOX): Update to support multi-byte chars.
* tests/misc/printf-mb.sh: Add a new test.
* tests/local.mk: Reference the new test.
* NEWS: Mention the improvement.
Fixes https://bugs.gnu.org/54388
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | src/printf.c | 16 | ||||
-rw-r--r-- | tests/local.mk | 1 | ||||
-rwxr-xr-x | tests/misc/printf-mb.sh | 52 |
4 files changed, 71 insertions, 0 deletions
@@ -108,6 +108,8 @@ GNU coreutils NEWS -*- outline -*- any extra final progress just before synchronizing output data, since synchronizing can take a long time. + printf now supports printing the numeric value of multi-byte characters. + sort --debug now diagnoses issues with --field-separator characters that conflict with characters possibly used in numbers. diff --git a/src/printf.c b/src/printf.c index 5f84475fd..68c388341 100644 --- a/src/printf.c +++ b/src/printf.c @@ -53,6 +53,7 @@ #include <config.h> #include <stdio.h> #include <sys/types.h> +#include <wchar.h> #include "system.h" #include "cl-strtod.h" @@ -170,6 +171,21 @@ FUNC_NAME (char const *s) \ { \ unsigned char ch = *++s; \ val = ch; \ + \ + if (MB_CUR_MAX > 1 && *(s + 1)) \ + { \ + mbstate_t mbstate = { 0, }; \ + wchar_t wc; \ + size_t slen = strlen (s); \ + ssize_t bytes; \ + bytes = mbrtowc (&wc, s, slen, &mbstate); \ + if (0 < bytes) \ + { \ + val = wc; \ + s += bytes - 1; \ + } \ + } \ + \ /* If POSIXLY_CORRECT is not set, then give a warning that there \ are characters following the character constant and that GNU \ printf is ignoring those characters. If POSIXLY_CORRECT *is* \ diff --git a/tests/local.mk b/tests/local.mk index f97ddcb98..0f7778619 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -344,6 +344,7 @@ all_tests = \ tests/misc/printf.sh \ tests/misc/printf-cov.pl \ tests/misc/printf-hex.sh \ + tests/misc/printf-mb.sh \ tests/misc/printf-surprise.sh \ tests/misc/printf-quote.sh \ tests/misc/pwd-long.sh \ diff --git a/tests/misc/printf-mb.sh b/tests/misc/printf-mb.sh new file mode 100755 index 000000000..ad21dbe67 --- /dev/null +++ b/tests/misc/printf-mb.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# tests for printing multi-byte values of characters + +# Copyright (C) 2022 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ printf + +prog='env printf' + +unset LC_ALL +f=$LOCALE_FR_UTF8 +: ${LOCALE_FR_UTF8=none} +if test "$LOCALE_FR_UTF8" != "none"; then + ( + #valid multi-byte + LC_ALL=$f $prog '%04x\n' '"á' >>out 2>>err + #invalid multi-byte + LC_ALL=$f $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err + #uni-byte + LC_ALL=C $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err + #valid multi-byte, with trailing + LC_ALL=$f $prog '%04x\n' '"á"' >>out 2>>err + ) + cat <<\EOF > exp || framework_failure_ +00e1 +00e1 +00e1 +00e1 +EOF + compare exp out || fail=1 + + cat <<EOF > exp_err +printf: warning: ": character(s) following character constant have been ignored +EOF + compare exp_err err || fail=1 +fi + +Exit $fail |