summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/Makefile6
-rw-r--r--localedata/tst-sscanf.c56
-rw-r--r--stdio-common/vfscanf.c242
5 files changed, 296 insertions, 17 deletions
diff --git a/ChangeLog b/ChangeLog
index d898823f51..be2556b4f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2007-02-18 Ulrich Drepper <drepper@redhat.com>
+ [BZ #2211]
+ * stdio-common/vfscanf.c: Handle localized digits etc for floating
+ point numbers.
+ Patch mostly by Hamed Malek <hamed@farsiweb.info>.
+
* stdio-common/vfscanf.c: Fix problems in width accounting.
* stdio-common/tst-sscanf.c (double_tests): New tests.
(main): Hook them up.
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 9dfe469503..e1bc145ec3 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,5 +1,9 @@
2007-02-18 Ulrich Drepper <drepper@redhat.com>
+ * Makefile (tests): Add tst-sscanf.
+ (LOCALES): Add fa_IR.UTF-8.
+ * tst-sscanf.c: New file.
+
* da_DK.in: Adjust for unified collation.
* locales/vi_VN: Don't define HOK here as well.
diff --git a/localedata/Makefile b/localedata/Makefile
index db7094fabe..e518175b98 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1996-2002, 2003, 2005, 2007 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -93,7 +93,7 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl \
tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
- tst-strfmon1
+ tst-strfmon1 tst-sscanf
ifeq (yes,$(build-shared))
ifneq (no,$(PERL))
tests: $(objpfx)mtrace-tst-leaks
@@ -133,7 +133,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
- tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW
+ tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW fa_IR.UTF-8
LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
CHARMAPS := $(shell echo "$(LOCALES)" | \
sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/localedata/tst-sscanf.c b/localedata/tst-sscanf.c
new file mode 100644
index 0000000000..89a77a14f8
--- /dev/null
+++ b/localedata/tst-sscanf.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <locale.h>
+#include <assert.h>
+
+#define P0 "\xDB\xB0"
+#define P1 "\xDB\xB1"
+#define P2 "\xDB\xB2"
+#define P3 "\xDB\xB3"
+#define P4 "\xDB\xB4"
+#define P5 "\xDB\xB5"
+#define P6 "\xDB\xB6"
+#define P7 "\xDB\xB7"
+#define P8 "\xDB\xB8"
+#define P9 "\xDB\xB9"
+#define PD "\xd9\xab"
+#define PT "\xd9\xac"
+
+static int
+check_sscanf (const char *s, const char *format, const float n)
+{
+ float f;
+
+ if (sscanf (s, format, &f) != 1)
+ {
+ printf ("nothing found for \"%s\"\n", s);
+ return 1;
+ }
+ if (f != n)
+ {
+ printf ("got %f expected %f from \"%s\"\n", f, n, s);
+ return 1;
+ }
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ if (setlocale (LC_ALL, "fa_IR") == NULL)
+ {
+ puts ("cannot set fa_IR locale");
+ return 1;
+ }
+
+ int r = check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
+ r |= check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
+ r |= check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
+ r |= check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
+ r |= check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
+ r |= check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);
+
+ return r;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 90e7e36a5f..cdb610dc0c 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -1,5 +1,4 @@
-/* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006, 2007
- Free Software Foundation, Inc.
+/* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -1264,13 +1263,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
cmpp = mbdigits[n];
- while ((unsigned char) *cmpp == c && avail > 0)
+ while ((unsigned char) *cmpp == c && avail >= 0)
{
if (*++cmpp == '\0')
break;
else
{
- if ((c = inchar ()) == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1317,13 +1316,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
int avail = width > 0 ? width : INT_MAX;
cmpp = mbdigits[n];
- while ((unsigned char) *cmpp == c && avail > 0)
+ while ((unsigned char) *cmpp == c && avail >= 0)
{
if (*++cmpp == '\0')
break;
else
{
- if ((c = inchar ()) == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1378,14 +1377,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
- while ((unsigned char) *cmpp == c && avail > 0)
+ while ((unsigned char) *cmpp == c && avail >= 0)
{
ADDW (c);
if (*++cmpp == '\0')
break;
else
{
- if ((c = inchar ()) == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1450,14 +1449,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
- while ((unsigned char) *cmpp == c && avail > 0)
+ while ((unsigned char) *cmpp == c && avail >= 0)
{
ADDW (c);
if (*++cmpp == '\0')
break;
else
{
- if ((c = inchar ()) == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1753,12 +1752,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
if (! got_dot)
{
- while ((unsigned char) *cmpp == c && avail > 0)
+ while ((unsigned char) *cmpp == c && avail >= 0)
if (*++cmpp == '\0')
break;
else
{
- if (inchar () == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1790,12 +1789,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
++cmp2p;
if (cmp2p - thousands == cmpp - decimal)
{
- while ((unsigned char) *cmp2p == c && avail > 0)
+ while ((unsigned char) *cmp2p == c && avail >= 0)
if (*++cmp2p == '\0')
break;
else
{
- if (inchar () == EOF)
+ if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@@ -1828,6 +1827,221 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
--width;
}
+ wctrans_t map;
+ if (__builtin_expect ((flags & I18N) != 0, 0)
+ /* Hexadecimal floats make no sense, fixing localized
+ digits with ASCII letters. */
+ && !is_hexa
+ /* Minimum requirement. */
+ && (wpsize == 0 || got_dot)
+ && (map = __wctrans ("to_inpunct")) != NULL)
+ {
+ /* Reget the first character. */
+ inchar ();
+
+ /* Localized digits, decimal points, and thousands
+ separator. */
+ wint_t wcdigits[12];
+
+ /* First get decimal equivalent to check if we read it
+ or not. */
+ wcdigits[11] = __towctrans (L'.', map);
+
+ /* If we have not read any character or have just read
+ locale decimal point which matches the decimal point
+ for localized FP numbers, then we may have localized
+ digits. Note, we test GOT_DOT above. */
+#ifdef COMPILE_WSCANF
+ if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
+#else
+ char mbdigits[12][MB_LEN_MAX + 1];
+
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+
+ bool match_so_far = wpsize == 0;
+ size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
+ if (mblen != (size_t) -1)
+ {
+ mbdigits[11][mblen] = '\0';
+ match_so_far |= (wpsize == strlen (decimal)
+ && strcmp (decimal, mbdigits[11]) == 0);
+ }
+ else
+ {
+ size_t decimal_len = strlen (decimal);
+ /* This should always be the case but the data comes
+ from a file. */
+ if (decimal_len <= MB_LEN_MAX)
+ {
+ match_so_far |= wpsize == decimal_len;
+ memcpy (mbdigits[11], decimal, decimal_len + 1);
+ }
+ else
+ match_so_far = false;
+ }
+
+ if (match_so_far)
+#endif
+ {
+ int have_locthousands = true;
+ /* Now get the digits and the thousands-sep equivalents. */
+ for (int n = 0; n < 11; ++n)
+ {
+ if (n < 10)
+ wcdigits[n] = __towctrans (L'0' + n, map);
+ else if (n == 10)
+ wcdigits[10] = __towctrans (L',', map);
+
+#ifndef COMPILE_WSCANF
+ memset (&state, '\0', sizeof (state));
+
+ size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
+ &state);
+ if (mblen == (size_t) -1)
+ {
+ if (n == 10)
+ {
+ if (thousands == NULL || (flags & GROUP) == 0)
+ have_locthousands = false;
+ else
+ {
+ size_t thousands_len = strlen (thousands);
+ if (thousands_len <= MB_LEN_MAX)
+ memcpy (mbdigits[10], thousands,
+ thousands_len + 1);
+ else
+ have_locthousands = false;
+ }
+ }
+ else
+ /* Ignore checking against localized digits. */
+ goto no_i18nflt;
+ }
+ else
+ mbdigits[n][mblen] = '\0';
+#endif
+ }
+
+ /* Start checking against localized digits, if
+ convertion is done correctly. */
+ while (1)
+ {
+ if (got_e && wp[wpsize - 1] == exp_char
+ && (c == L_('-') || c == L_('+')))
+ ADDW (c);
+ else if (wpsize > 0 && !got_e
+ && (CHAR_T) TOLOWER (c) == exp_char)
+ {
+ ADDW (exp_char);
+ got_e = got_dot = 1;
+ }
+ else
+ {
+ /* Check against localized digits, decimal point,
+ and thousands separator. */
+ int n;
+ for (n = 0; n < 12; ++n)
+ {
+#ifdef COMPILE_WSCANF
+ if (c == wcdigits[n])
+ {
+ if (n < 10)
+ ADDW (L_('0') + n);
+ else if (n == 11 && !got_dot)
+ {
+ ADDW (decimal);
+ got_dot = 1;
+ }
+ else if (n == 10 && have_locthousands
+ && ! got_dot)
+ ADDW (thousands);
+ else
+ /* The last read character is not part
+ of the number anymore. */
+ n = 12;
+
+ break;
+ }
+#else
+ const char *cmpp = mbdigits[n];
+ int avail = width > 0 ? width : INT_MAX;
+
+ while ((unsigned char) *cmpp == c && avail >= 0)
+ if (*++cmpp == '\0')
+ break;
+ else
+ {
+ if (avail == 0 || inchar () == EOF)
+ break;
+ --avail;
+ }
+ if (*cmpp == '\0')
+ {
+ if (width > 0)
+ width = avail;
+
+ if (n < 10)
+ ADDW (L_('0') + n);
+ else if (n == 11 && !got_dot)
+ {
+ /* Add all the characters. */
+ for (cmpp = decimal; *cmpp != '\0';
+ ++cmpp)
+ ADDW ((unsigned char) *cmpp);
+
+ got_dot = 1;
+ }
+ else if (n == 10 && (flags & GROUP) != 0
+ && thousands != NULL && ! got_dot)
+ {
+ /* Add all the characters. */
+ for (cmpp = thousands; *cmpp != '\0';
+ ++cmpp)
+ ADDW ((unsigned char) *cmpp);
+ }
+ else
+ /* The last read character is not part
+ of the number anymore. */
+ n = 12;
+
+ break;
+ }
+
+ /* We are pushing all read characters back. */
+ if (cmpp > mbdigits[n])
+ {
+ ungetc (c, s);
+ while (--cmpp > mbdigits[n])
+ ungetc_not_eof ((unsigned char) *cmpp, s);
+ c = (unsigned char) *cmpp;
+ }
+#endif
+ }
+
+ if (n >= 12)
+ {
+ /* The last read character is not part
+ of the number anymore. */
+ ungetc (c, s);
+ break;
+ }
+ }
+
+ if (width == 0 || inchar () == EOF)
+ break;
+
+ if (width > 0)
+ --width;
+ }
+ }
+
+#ifndef COMPILE_WSCANF
+ no_i18nflt:
+ ;
+#endif
+ }
+
/* Have we read any character? If we try to read a number
in hexadecimal notation and we have read only the `0x'
prefix or no exponent this is an error. */