summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2022-03-18 14:52:36 +0000
committerPádraig Brady <P@draigBrady.com>2022-03-19 16:57:07 +0000
commit6367cfe5fc513fb0832a3be18be8c9a813383321 (patch)
tree40ed6dc7c6fad24ae581602a7a80e23d5eef88c4
parent28adf9cf58e51ad8711fbbc98fa322ee8d3114d9 (diff)
downloadcoreutils-6367cfe5fc513fb0832a3be18be8c9a813383321.tar.gz
printf: support printing the numeric value of multi-byte chars
* src/printf.c (STRTOX): Update to support multi-byte chars. * tests/misc/printf-mb.sh: Add a new test. * tests/local.mk: Reference the new test. * NEWS: Mention the improvement. Fixes https://bugs.gnu.org/54388
-rw-r--r--NEWS2
-rw-r--r--src/printf.c16
-rw-r--r--tests/local.mk1
-rwxr-xr-xtests/misc/printf-mb.sh52
4 files changed, 71 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index ce60bad4a..6d6f204ee 100644
--- a/NEWS
+++ b/NEWS
@@ -108,6 +108,8 @@ GNU coreutils NEWS -*- outline -*-
any extra final progress just before synchronizing output data,
since synchronizing can take a long time.
+ printf now supports printing the numeric value of multi-byte characters.
+
sort --debug now diagnoses issues with --field-separator characters
that conflict with characters possibly used in numbers.
diff --git a/src/printf.c b/src/printf.c
index 5f84475fd..68c388341 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -53,6 +53,7 @@
#include <config.h>
#include <stdio.h>
#include <sys/types.h>
+#include <wchar.h>
#include "system.h"
#include "cl-strtod.h"
@@ -170,6 +171,21 @@ FUNC_NAME (char const *s) \
{ \
unsigned char ch = *++s; \
val = ch; \
+ \
+ if (MB_CUR_MAX > 1 && *(s + 1)) \
+ { \
+ mbstate_t mbstate = { 0, }; \
+ wchar_t wc; \
+ size_t slen = strlen (s); \
+ ssize_t bytes; \
+ bytes = mbrtowc (&wc, s, slen, &mbstate); \
+ if (0 < bytes) \
+ { \
+ val = wc; \
+ s += bytes - 1; \
+ } \
+ } \
+ \
/* If POSIXLY_CORRECT is not set, then give a warning that there \
are characters following the character constant and that GNU \
printf is ignoring those characters. If POSIXLY_CORRECT *is* \
diff --git a/tests/local.mk b/tests/local.mk
index f97ddcb98..0f7778619 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -344,6 +344,7 @@ all_tests = \
tests/misc/printf.sh \
tests/misc/printf-cov.pl \
tests/misc/printf-hex.sh \
+ tests/misc/printf-mb.sh \
tests/misc/printf-surprise.sh \
tests/misc/printf-quote.sh \
tests/misc/pwd-long.sh \
diff --git a/tests/misc/printf-mb.sh b/tests/misc/printf-mb.sh
new file mode 100755
index 000000000..ad21dbe67
--- /dev/null
+++ b/tests/misc/printf-mb.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+# tests for printing multi-byte values of characters
+
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ printf
+
+prog='env printf'
+
+unset LC_ALL
+f=$LOCALE_FR_UTF8
+: ${LOCALE_FR_UTF8=none}
+if test "$LOCALE_FR_UTF8" != "none"; then
+ (
+ #valid multi-byte
+ LC_ALL=$f $prog '%04x\n' '"á' >>out 2>>err
+ #invalid multi-byte
+ LC_ALL=$f $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+ #uni-byte
+ LC_ALL=C $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+ #valid multi-byte, with trailing
+ LC_ALL=$f $prog '%04x\n' '"á"' >>out 2>>err
+ )
+ cat <<\EOF > exp || framework_failure_
+00e1
+00e1
+00e1
+00e1
+EOF
+ compare exp out || fail=1
+
+ cat <<EOF > exp_err
+printf: warning: ": character(s) following character constant have been ignored
+EOF
+ compare exp_err err || fail=1
+fi
+
+Exit $fail