summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-15 17:02:23 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-15 17:02:23 +0000
commitf1fa8b68f3e7623a3ef86dcd0c7d090ccf0389f5 (patch)
tree6779500e7e6e0d2dae115fc25d6b199efd1a76ee
parent479e9b3f2135707d4bfd13bf6c2ad1a242ea6cfc (diff)
downloadglibc-f1fa8b68f3e7623a3ef86dcd0c7d090ccf0389f5.tar.gz
Update.
1998-04-15 16:41 Ulrich Drepper <drepper@cygnus.com> Don't name internal representation since it might be different from the external form (namely on little endian machines). * iconv/gconv_builtin.h: Add UCS4 support. Change references to UCS4 into references to INTERNAL. * iconv/gconv_simple.c: Implement UCS4<->INTERNAL converters. Add endianess support to UCS functions. Change references to UCS4 into references to INTERNAL. * iconv/gconv_int.h: Change references to UCS4 into references to INTERNAL. * iconv/iconv_prog.c: Don't mention INTERNAL in --list output. * iconvdata/gconv-modules: Change accordingly. * wcsmbs/wcsmbsload.c: Change names to use INTERNAL. * iconv/gconv_simple.c: Adjust input buffer pointer for output buffer overflow. * iconvdata/8bit-gap.c: Likewise. * iconvdata/8bit-generic.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euccn.c: Likewise. * iconvdata/eucjp.c: Likewise. * iconvdata/euckr.c: Likewise. * iconvdata/euctw.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso6937.c: Likewise. * iconvdata/iso8859-1.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/8bit-gap.c: Correct access to to_ucs4 array. * iconvdata/8bit-generic.c: Likewise. * iconvdata/TESTS: Add more tests. * sysdeps/i386/bits/byteswap.h: Change to use "=r" when ror is used. 1998-04-15 11:47 Ulrich Drepper <drepper@cygnus.com> * iconvdata/Makefile: Better rules to run tests. * iconvdata/testdata/ISO-8859-1..UTF8: New file. * iconvdata/testdata/ISO-8859-10: Likewise. * iconvdata/testdata/ISO-8859-10..UCS2: Likewise. * iconvdata/testdata/ISO-8859-2: Likewise. * iconvdata/testdata/ISO-8859-2..UCS4: Likewise. * iconvdata/testdata/ISO-8859-2..UTF8: Likewise. * iconvdata/testdata/ISO-8859-3: Likewise. * iconvdata/testdata/ISO-8859-4: Likewise. * iconvdata/testdata/ISO-8859-5: Likewise. * iconvdata/testdata/ISO-8859-6: Likewise. * iconvdata/testdata/ISO-8859-7: Likewise. * iconvdata/testdata/ISO-8859-8: Likewise. * iconvdata/testdata/ISO-8859-9: Likewise. * iconvdata/run-iconv-test.sh: Handle $from..$t file to compare intermediate result (if available). * iconvdata/Makefile: Add rules to run run-iconv-test.sh. (distribute): Add run-iconv-test.sh and testdata/*. * stdlib/testmb.c (main): Simplify mbc array handling. * iconvdata/testdata/ISO-8859-1: New file.
-rw-r--r--ChangeLog66
-rw-r--r--iconv/gconv_builtin.h36
-rw-r--r--iconv/gconv_int.h13
-rw-r--r--iconv/gconv_simple.c268
-rw-r--r--iconv/iconv_prog.c15
-rw-r--r--iconvdata/8bit-gap.c4
-rw-r--r--iconvdata/8bit-generic.c4
-rw-r--r--iconvdata/Makefile14
-rw-r--r--iconvdata/TESTS9
-rw-r--r--iconvdata/big5.c2
-rw-r--r--iconvdata/euccn.c2
-rw-r--r--iconvdata/eucjp.c2
-rw-r--r--iconvdata/euckr.c2
-rw-r--r--iconvdata/euctw.c2
-rw-r--r--iconvdata/gconv-modules125
-rw-r--r--iconvdata/iso646.c2
-rw-r--r--iconvdata/iso6937.c2
-rw-r--r--iconvdata/iso8859-1.c2
-rw-r--r--iconvdata/johab.c2
-rwxr-xr-xiconvdata/run-iconv-test.sh8
-rw-r--r--iconvdata/sjis.c2
-rw-r--r--iconvdata/t61.c2
-rw-r--r--iconvdata/testdata/ISO-8859-1..UTF812
-rw-r--r--iconvdata/testdata/ISO-8859-1012
-rw-r--r--iconvdata/testdata/ISO-8859-10..UCS2bin0 -> 788 bytes
-rw-r--r--iconvdata/testdata/ISO-8859-212
-rw-r--r--iconvdata/testdata/ISO-8859-2..UCS4bin0 -> 1576 bytes
-rw-r--r--iconvdata/testdata/ISO-8859-2..UTF812
-rw-r--r--iconvdata/testdata/ISO-8859-312
-rw-r--r--iconvdata/testdata/ISO-8859-412
-rw-r--r--iconvdata/testdata/ISO-8859-512
-rw-r--r--iconvdata/testdata/ISO-8859-612
-rw-r--r--iconvdata/testdata/ISO-8859-712
-rw-r--r--iconvdata/testdata/ISO-8859-811
-rw-r--r--iconvdata/testdata/ISO-8859-912
-rw-r--r--iconvdata/uhc.c2
-rw-r--r--stdlib/testmb.c6
-rw-r--r--sysdeps/i386/bits/byteswap.h6
-rw-r--r--wcsmbs/wcsmbsload.c12
39 files changed, 565 insertions, 176 deletions
diff --git a/ChangeLog b/ChangeLog
index 0068610dc3..65210542a1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,73 @@
+1998-04-15 16:41 Ulrich Drepper <drepper@cygnus.com>
+
+ Don't name internal representation since it might be different from
+ the external form (namely on little endian machines).
+ * iconv/gconv_builtin.h: Add UCS4 support. Change references to
+ UCS4 into references to INTERNAL.
+ * iconv/gconv_simple.c: Implement UCS4<->INTERNAL converters.
+ Add endianess support to UCS functions. Change references to
+ UCS4 into references to INTERNAL.
+ * iconv/gconv_int.h: Change references to UCS4 into references to
+ INTERNAL.
+ * iconv/iconv_prog.c: Don't mention INTERNAL in --list output.
+ * iconvdata/gconv-modules: Change accordingly.
+ * wcsmbs/wcsmbsload.c: Change names to use INTERNAL.
+
+ * iconv/gconv_simple.c: Adjust input buffer pointer for output buffer
+ overflow.
+ * iconvdata/8bit-gap.c: Likewise.
+ * iconvdata/8bit-generic.c: Likewise.
+ * iconvdata/big5.c: Likewise.
+ * iconvdata/euccn.c: Likewise.
+ * iconvdata/eucjp.c: Likewise.
+ * iconvdata/euckr.c: Likewise.
+ * iconvdata/euctw.c: Likewise.
+ * iconvdata/iso646.c: Likewise.
+ * iconvdata/iso6937.c: Likewise.
+ * iconvdata/iso8859-1.c: Likewise.
+ * iconvdata/johab.c: Likewise.
+ * iconvdata/sjis.c: Likewise.
+ * iconvdata/t61.c: Likewise.
+ * iconvdata/uhc.c: Likewise.
+
+ * iconvdata/8bit-gap.c: Correct access to to_ucs4 array.
+ * iconvdata/8bit-generic.c: Likewise.
+
+ * iconvdata/TESTS: Add more tests.
+
+ * sysdeps/i386/bits/byteswap.h: Change to use "=r" when ror is used.
+
+1998-04-15 11:47 Ulrich Drepper <drepper@cygnus.com>
+
+ * iconvdata/Makefile: Better rules to run tests.
+
+ * iconvdata/testdata/ISO-8859-1..UTF8: New file.
+ * iconvdata/testdata/ISO-8859-10: Likewise.
+ * iconvdata/testdata/ISO-8859-10..UCS2: Likewise.
+ * iconvdata/testdata/ISO-8859-2: Likewise.
+ * iconvdata/testdata/ISO-8859-2..UCS4: Likewise.
+ * iconvdata/testdata/ISO-8859-2..UTF8: Likewise.
+ * iconvdata/testdata/ISO-8859-3: Likewise.
+ * iconvdata/testdata/ISO-8859-4: Likewise.
+ * iconvdata/testdata/ISO-8859-5: Likewise.
+ * iconvdata/testdata/ISO-8859-6: Likewise.
+ * iconvdata/testdata/ISO-8859-7: Likewise.
+ * iconvdata/testdata/ISO-8859-8: Likewise.
+ * iconvdata/testdata/ISO-8859-9: Likewise.
+
1998-04-15 Ulrich Drepper <drepper@cygnus.com>
+ * iconvdata/run-iconv-test.sh: Handle $from..$t file to compare
+ intermediate result (if available).
+
* iconv/gconv_simple.c (__gconv_transform_ucs4_ascii): Fix typo in
last change.
+ * iconvdata/Makefile: Add rules to run run-iconv-test.sh.
+ (distribute): Add run-iconv-test.sh and testdata/*.
+
+ * stdlib/testmb.c (main): Simplify mbc array handling.
+
1998-04-14 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
* elf/dl-minimal.c (__strtol_internal): Correct range check. Fix
@@ -27,6 +92,7 @@
* iconvdata/Makefile: Add rules to run tests.
* iconvdata/TESTS: New file.
* iconvdata/run-iconv-test.sh: New file.
+ * iconvdata/testdata/ISO-8859-1: New file.
* iconv/iconv_prog.c (main): Call process_block with OUTPUT stream,
not stdout.
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 9c98c3513d..265dca1f01 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -18,27 +18,41 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+BUILTIN_ALIAS ("UCS4//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("UCS-4//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("ISO-10646//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("10646-1:1993//", "ISO-10646/UCS4/")
+BUILTIN_ALIAS ("10646-1:1993/UCS4/", "ISO-10646/UCS4/")
+
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
+ "ISO-10646/UCS4/", 1, "=INTERNAL->ucs4",
+ __gconv_transform_internal_ucs4, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
+ "INTERNAL", 1, "=ucs4->INTERNAL",
+ __gconv_transform_internal_ucs4, NULL, NULL)
+/* Please note that we need only one function for both direction. */
+
BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
- "ISO-10646/UTF8/", 1, "=ucs4->utf8",
- __gconv_transform_ucs4_utf8, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
+ "ISO-10646/UTF8/", 1, "=INTERNAL->utf8",
+ __gconv_transform_internal_utf8, NULL, NULL)
BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13,
- "ISO-10646/UCS4/", 1, "=utf8->ucs4",
- __gconv_transform_utf8_ucs4, NULL, NULL)
+ "INTERNAL", 1, "=utf8->INTERNAL",
+ __gconv_transform_utf8_internal, NULL, NULL)
BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/",
- 1, "=ucs2->ucs4",
- __gconv_transform_ucs2_ucs4, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "INTERNAL",
+ 1, "=ucs2->INTERNAL",
+ __gconv_transform_ucs2_internal, NULL, NULL)
-BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/",
- 1, "=ucs4->ucs2",
- __gconv_transform_ucs4_ucs2, NULL, NULL)
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "ISO-10646/UCS2/",
+ 1, "=INTERNAL->ucs2",
+ __gconv_transform_internal_ucs2, NULL, NULL)
BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
__gconv_transform_dummy, NULL, NULL)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 35ec31a7b8..a1475f8508 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -153,12 +153,13 @@ extern void __gconv_get_builtin_trans (const char *__name,
int __do_flush)
__BUILTIN_TRANS (__gconv_transform_dummy);
-__BUILTIN_TRANS (__gconv_transform_ascii_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs4_ascii);
-__BUILTIN_TRANS (__gconv_transform_ucs4_utf8);
-__BUILTIN_TRANS (__gconv_transform_utf8_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4);
-__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2);
+__BUILTIN_TRANS (__gconv_transform_ascii_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_ascii);
+__BUILTIN_TRANS (__gconv_transform_utf8_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_utf8);
+__BUILTIN_TRANS (__gconv_transform_ucs2_internal);
+__BUILTIN_TRANS (__gconv_transform_internal_ucs2);
+__BUILTIN_TRANS (__gconv_transform_internal_ucs4);
# undef __BUITLIN_TRANS
#endif
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 38b6b56adb..b72e61edcc 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -18,6 +18,8 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <byteswap.h>
+#include <endian.h>
#include <errno.h>
#include <gconv.h>
#include <stdint.h>
@@ -76,16 +78,21 @@ __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
}
-/* Convert from ISO 646-IRV to ISO 10646/UCS4. */
+/* Transform from the internal, UCS4-like format, to UCS4. The
+ difference between the internal ucs4 format and the real UCS4
+ format is, if any, the endianess. The Unicode/ISO 10646 says that
+ unless some higher protocol specifies it differently, the byte
+ order is big endian.*/
int
-__gconv_transform_ascii_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ucs4 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
gconv_fct fct = next_step->fct;
- size_t do_write;
+ size_t do_write = 0;
int result;
/* If the function is called with no input this means we have to reset
@@ -95,7 +102,6 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
{
/* Clear the state. */
memset (data->statep, '\0', sizeof (mbstate_t));
- do_write = 0;
/* Call the steps down the chain if there are any. */
if (data->is_last)
@@ -114,12 +120,126 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
else
{
int save_errno = errno;
- do_write = 0;
result = GCONV_OK;
do
{
- const unsigned char *newinbuf = inbuf;
+ size_t n_convert = (MIN (*inlen,
+ (data->outbufsize - data->outbufavail))
+ / sizeof (wchar_t));
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ /* Sigh, we have to do some real work. */
+ wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt)
+ outbuf[cnt] = bswap_32 (((wchar_t *) inbuf)[cnt]);
+
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ /* Simply copy the data. */
+ memcpy (&data->outbuf[data->outbufsize], inbuf,
+ n_convert * sizeof (wchar_t));
+#else
+# error "This endianess is not supported."
+#endif
+
+ *inlen -= n_convert * sizeof (wchar_t);
+ inbuf += n_convert * sizeof (wchar_t);
+ data->outbufavail += n_convert * sizeof (wchar_t);
+ do_write += n_convert;
+
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
+ if (data->is_last)
+ {
+ /* This is the last step. */
+ result = (*inlen < sizeof (wchar_t)
+ ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ break;
+ }
+
+ /* Status so far. */
+ result = GCONV_EMPTY_INPUT;
+
+ if (data->outbufavail > 0)
+ {
+ /* Call the functions below in the chain. */
+ size_t newavail = data->outbufavail;
+
+ result = (*fct) (next_step, next_data, data->outbuf, &newavail,
+ written, 0);
+
+ /* Correct the output buffer. */
+ if (newavail != data->outbufavail && newavail > 0)
+ {
+ memmove (data->outbuf,
+ &data->outbuf[data->outbufavail - newavail],
+ newavail);
+ data->outbufavail = newavail;
+ }
+ }
+ }
+ while (*inlen >= sizeof (wchar_t) && result == GCONV_EMPTY_INPUT);
+
+ __set_errno (save_errno);
+ }
+
+ if (written != NULL && data->is_last)
+ *written = do_write;
+
+ return result;
+}
+
+
+/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
+int
+__gconv_transform_ascii_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
+{
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+ gconv_fct fct = next_step->fct;
+ size_t do_write = 0;
+ int result;
+
+ /* If the function is called with no input this means we have to reset
+ to the initial state. The possibly partly converted input is
+ dropped. */
+ if (do_flush)
+ {
+ /* Clear the state. */
+ memset (data->statep, '\0', sizeof (mbstate_t));
+
+ /* Call the steps down the chain if there are any. */
+ if (data->is_last)
+ result = GCONV_OK;
+ else
+ {
+ struct gconv_step *next_step = step + 1;
+ struct gconv_step_data *next_data = data + 1;
+
+ result = (*fct) (next_step, next_data, NULL, 0, written, 1);
+
+ /* Clear output buffer. */
+ data->outbufavail = 0;
+ }
+ }
+ else
+ {
+ const unsigned char *newinbuf = inbuf;
+ int save_errno = errno;
+
+ result = GCONV_OK;
+ do
+ {
size_t actually = 0;
size_t cnt = 0;
@@ -193,9 +313,10 @@ __gconv_transform_ascii_ucs4 (struct gconv_step *step,
/* Convert from ISO 10646/UCS to ISO 646-IRV. */
int
-__gconv_transform_ucs4_ascii (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ascii (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -228,13 +349,13 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
result = GCONV_OK;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
size_t actually = 0;
size_t cnt = 0;
@@ -264,11 +385,18 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
if (result != GCONV_OK)
break;
+ /* Check for incomplete input. */
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
if (data->is_last)
{
/* This is the last step. */
- result = (*inlen < sizeof (wchar_t)
- ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
break;
}
@@ -306,9 +434,10 @@ __gconv_transform_ucs4_ascii (struct gconv_step *step,
int
-__gconv_transform_ucs4_utf8 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_utf8 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -341,13 +470,13 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
result = GCONV_OK;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
size_t cnt = 0;
while (data->outbufavail < data->outbufsize
@@ -397,16 +526,24 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
/* Remember how much we converted. */
do_write += cnt;
*inlen -= cnt * sizeof (wchar_t);
+ newinbuf += cnt;
/* Check whether an illegal character appeared. */
if (result != GCONV_OK)
break;
+ /* Check for incomplete input. */
+ if (*inlen > 0 && *inlen < sizeof (wchar_t))
+ {
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+
if (data->is_last)
{
/* This is the last step. */
- result = (*inlen < sizeof (wchar_t)
- ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
+ result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
break;
}
@@ -444,9 +581,10 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
int
-__gconv_transform_utf8_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_utf8_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -578,6 +716,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
/* Remember how much we converted. */
do_write += actually;
*inlen -= cnt;
+ inbuf += cnt;
data->outbufavail += actually * sizeof (wchar_t);
@@ -588,7 +727,7 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
break;
}
- if (*inlen < extra)
+ if (*inlen > 0 && *inlen < extra)
{
/* We have an incomplete character at the end. */
result = GCONV_INCOMPLETE_INPUT;
@@ -637,9 +776,10 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
int
-__gconv_transform_ucs2_ucs4 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_ucs2_internal (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -669,12 +809,12 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
}
else
{
+ const uint16_t *newinbuf = (const uint16_t *) inbuf;
int save_errno = errno;
do_write = 0;
do
{
- const uint16_t *newinbuf = (const uint16_t *) inbuf;
wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
@@ -683,34 +823,29 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
while (data->outbufavail + 4 <= data->outbufsize
&& *inlen >= 2)
{
- outbuf[actually++] = *newinbuf++;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ outbuf[actually++] = (wchar_t) bswap_16 (*newinbuf++);
+#else
+ outbuf[actually++] = (wchar_t) *newinbuf++;
+#endif
data->outbufavail += 4;
*inlen -= 2;
}
- if (*inlen != 1)
- {
- /* We have an incomplete input character. */
- mbstate_t *state = data->statep;
- state->count = 1;
- state->value = *(uint8_t *) newinbuf;
- --*inlen;
- }
-
/* Remember how much we converted. */
do_write += actually * sizeof (wchar_t);
- /* Check whether an illegal character appeared. */
- if (errno != 0)
+ if (*inlen == 1)
{
- result = GCONV_ILLEGAL_INPUT;
+ /* We have an incomplete character at the end. */
+ result = GCONV_INCOMPLETE_INPUT;
break;
}
- if (*inlen == 0 && !__mbsinit (data->statep))
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
{
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
+ result = GCONV_ILLEGAL_INPUT;
break;
}
@@ -756,9 +891,10 @@ __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
int
-__gconv_transform_ucs4_ucs2 (struct gconv_step *step,
- struct gconv_step_data *data, const char *inbuf,
- size_t *inlen, size_t *written, int do_flush)
+__gconv_transform_internal_ucs2 (struct gconv_step *step,
+ struct gconv_step_data *data,
+ const char *inbuf, size_t *inlen,
+ size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
@@ -791,12 +927,12 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step,
}
else
{
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
int save_errno = errno;
do_write = 0;
do
{
- const wchar_t *newinbuf = (const wchar_t *) inbuf;
uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
@@ -810,39 +946,33 @@ __gconv_transform_ucs4_ucs2 (struct gconv_step *step,
__set_errno (EILSEQ);
break;
}
- outbuf[actually++] = (wchar_t) *newinbuf;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ /* Please note that we use the `uint32_t' pointer as a
+ `uint16_t' pointer which works since we are on a
+ little endian machine. */
+ outbuf[actually++] = bswap_16 (*((uint16_t *) newinbuf));
+ ++newinbuf;
+#else
+ outbuf[actually++] = *newinbuf++;
+#endif
*inlen -= 4;
data->outbufavail += 2;
}
- if (*inlen < 4)
- {
- /* We have an incomplete input character. */
- mbstate_t *state = data->statep;
- state->count = *inlen;
- state->value = 0;
- while (*inlen > 0)
- {
- state->value <<= 8;
- state->value += *(uint8_t *) newinbuf;
- --*inlen;
- }
- }
-
/* Remember how much we converted. */
do_write += (const char *) newinbuf - inbuf;
- /* Check whether an illegal character appeared. */
- if (errno != 0)
+ if (*inlen > 0 && *inlen < 4)
{
- result = GCONV_ILLEGAL_INPUT;
+ /* We have an incomplete input character. */
+ result = GCONV_INCOMPLETE_INPUT;
break;
}
- if (*inlen == 0 && !__mbsinit (data->statep))
+ /* Check whether an illegal character appeared. */
+ if (errno != 0)
{
- /* We have an incomplete character at the end. */
- result = GCONV_INCOMPLETE_INPUT;
+ result = GCONV_ILLEGAL_INPUT;
break;
}
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 0c1b9d045d..569bd3b3ec 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -509,14 +509,17 @@ print_known_names (void)
{
if (__gconv_modules_db[cnt]->from_pattern == NULL)
{
- tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
- (__compar_fn_t) strcoll);
- tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
- (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
+ (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
+ (__compar_fn_t) strcoll);
}
else
- tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
- (__compar_fn_t) strcoll);
+ if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL"))
+ tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
+ (__compar_fn_t) strcoll);
}
fputs (_("\
diff --git a/iconvdata/8bit-gap.c b/iconvdata/8bit-gap.c
index 6c78ce5c24..a8d3c99a68 100644
--- a/iconvdata/8bit-gap.c
+++ b/iconvdata/8bit-gap.c
@@ -110,7 +110,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
while (cnt < inchars
&& (outwchars + sizeof (wchar_t) <= data->outbufsize))
{
- wchar_t ch = to_ucs4[(unsigned int) inbuf[cnt]];
+ wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]];
if (ch == L'\0' && inbuf[cnt] != '\0')
{
@@ -125,6 +125,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -158,6 +159,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars < data->outbufsize)
diff --git a/iconvdata/8bit-generic.c b/iconvdata/8bit-generic.c
index 52cd540fb7..19194ad068 100644
--- a/iconvdata/8bit-generic.c
+++ b/iconvdata/8bit-generic.c
@@ -97,7 +97,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
while (cnt < inchars
&& (outwchars + sizeof (wchar_t) <= data->outbufsize))
{
- wchar_t ch = to_ucs4[(unsigned int) inbuf[cnt]];
+ wchar_t ch = to_ucs4[((unsigned char *) inbuf)[cnt]];
if (ch == L'\0' && inbuf[cnt] != '\0')
{
@@ -112,6 +112,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -136,6 +137,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars < data->outbufsize)
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index 48d44956f6..dd1c391c6c 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -94,7 +94,8 @@ distribute := 8bit-generic.c 8bit-gap.c gap.pl gaptab.pl gconv-modules \
ebcdic-at-de-a.c ebcdic-ca-fr.c jis0201.c jis0208.c jis0212.c \
extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h \
iso646.c big5.c eucjp.c gb2312.c gb2312.h euccn.c euctw.c \
- cns11643l1.c cns11643l1.h cns11643.h cns11643.c
+ cns11643l1.c cns11643l1.h cns11643.h cns11643.c \
+ run-iconv-test.sh $(wildcard testdata/*)
# We build the transformation modules only when we build shared libs.
ifeq (yes,$(build-shared))
@@ -208,3 +209,14 @@ $(inst_gconvdir)/gconv-modules: gconv-modules $(+force)
endif
include ../Rules
+
+.PHONY: do-iconv-test
+tests: do-iconv-test
+
+do-iconv-test: run-iconv-test.sh $(objpfx)gconv-modules \
+ $(addprefix $(objpfx),$(modules.so)) \
+ $(common-objdir)/iconv/iconv_prog
+ $(SHELL) -e $< $(common-objdir) > $(objpfx)iconv-test.out
+
+$(objpfx)gconv-modules: gconv-modules
+ cp $^ $@
diff --git a/iconvdata/TESTS b/iconvdata/TESTS
index 809104d60f..41d7edcaf1 100644
--- a/iconvdata/TESTS
+++ b/iconvdata/TESTS
@@ -28,3 +28,12 @@
# N. target coded character set.
ISO-8859-1 ISO-8859-1 UTF8
+ISO-8859-2 ISO-8859-2 UCS4 UTF8
+ISO-8859-3 ISO-8859-3 UTF8
+ISO-8859-4 ISO-8859-4 UTF8
+ISO-8859-5 ISO-8859-5 UTF8
+ISO-8859-6 ISO-8859-6 UTF8
+ISO-8859-7 ISO-8859-7 UTF8
+ISO-8859-8 ISO-8859-8 UTF8
+ISO-8859-9 ISO-8859-9 UTF8
+ISO-8859-10 ISO-8859-10 UCS2 UTF8
diff --git a/iconvdata/big5.c b/iconvdata/big5.c
index bfd14fd5bb..a6a2580dd7 100644
--- a/iconvdata/big5.c
+++ b/iconvdata/big5.c
@@ -8540,6 +8540,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -8632,6 +8633,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/euccn.c b/iconvdata/euccn.c
index e8d909768b..f683836ff7 100644
--- a/iconvdata/euccn.c
+++ b/iconvdata/euccn.c
@@ -163,6 +163,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -212,6 +213,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/eucjp.c b/iconvdata/eucjp.c
index 4e82904c8f..e6a71cc791 100644
--- a/iconvdata/eucjp.c
+++ b/iconvdata/eucjp.c
@@ -180,6 +180,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -256,6 +257,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/euckr.c b/iconvdata/euckr.c
index 1dfa42dbcc..2ad9478729 100644
--- a/iconvdata/euckr.c
+++ b/iconvdata/euckr.c
@@ -182,6 +182,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -226,6 +227,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/euctw.c b/iconvdata/euctw.c
index f38db47946..fd422c1fb0 100644
--- a/iconvdata/euctw.c
+++ b/iconvdata/euctw.c
@@ -180,6 +180,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -252,6 +253,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/gconv-modules b/iconvdata/gconv-modules
index ffdd8c321f..d5109dcf29 100644
--- a/iconvdata/gconv-modules
+++ b/iconvdata/gconv-modules
@@ -34,11 +34,6 @@
# name: the real name of the character set
# from to module cost
-alias ISO-10646// ISO-10646/UCS4/
-alias 10646-1:1993// ISO-10646/UCS4/
-alias 10646-1:1993/UCS4/ ISO-10646/UCS4/
-
-# from to module cost
alias ISO-IR-6// ANSI_X3.4-1968//
alias ANSI_X3.4-1986// ANSI_X3.4-1968//
alias ISO_646.IRV:1991// ANSI_X3.4-1968//
@@ -48,15 +43,15 @@ alias US-ASCII// ANSI_X3.4-1968//
alias US// ANSI_X3.4-1968//
alias IBM367// ANSI_X3.4-1968//
alias CP367// ANSI_X3.4-1968//
-module ANSI_X3.4-1968// ISO-10646/UCS4/ ISO646 2
-module ISO-10646/UCS4/ ANSI_X3.4-1968// ISO646 2
+module ANSI_X3.4-1968// INTERNAL ISO646 2
+module INTERNAL ANSI_X3.4-1968// ISO646 2
alias ISO-IR-4// BS_4730//
alias ISO646-GB// BS_4730//
alias GB// BS_4730//
alias UK// BS_4730//
-module BS_4730// ISO-10646/UCS4/ ISO646 2
-module ISO-10646/UCS4/ BS_4730// ISO646 2
+module BS_4730// INTERNAL ISO646 2
+module INTERNAL BS_4730// ISO646 2
# from to module cost
alias ISO-IR-100// ISO-8859-1//
@@ -66,8 +61,8 @@ alias LATIN1// ISO-8859-1//
alias L1// ISO-8859-1//
alias IBM819// ISO-8859-1//
alias CP819// ISO-8859-1//
-module ISO-8859-1// ISO-10646/UCS4/ ISO8859-1 1
-module ISO-10646/UCS4/ ISO-8859-1// ISO8859-1 1
+module ISO-8859-1// INTERNAL ISO8859-1 1
+module INTERNAL ISO-8859-1// ISO8859-1 1
# from to module cost
alias ISO-IR-101// ISO-8859-2//
@@ -75,8 +70,8 @@ alias ISO_8859-2:1987// ISO-8859-2//
alias ISO_8859-2// ISO-8859-2//
alias LATIN2// ISO-8859-2//
alias L2// ISO-8859-2//
-module ISO-8859-2// ISO-10646/UCS4/ ISO8859-2 1
-module ISO-10646/UCS4/ ISO-8859-2// ISO8859-2 1
+module ISO-8859-2// INTERNAL ISO8859-2 1
+module INTERNAL ISO-8859-2// ISO8859-2 1
# from to module cost
alias ISO-IR-109// ISO-8859-3//
@@ -84,8 +79,8 @@ alias ISO_8859-3:1988// ISO-8859-3//
alias ISO_8859-3// ISO-8859-3//
alias LATIN3// ISO-8859-3//
alias L3// ISO-8859-3//
-module ISO-8859-3// ISO-10646/UCS4/ ISO8859-3 1
-module ISO-10646/UCS4/ ISO-8859-3// ISO8859-3 1
+module ISO-8859-3// INTERNAL ISO8859-3 1
+module INTERNAL ISO-8859-3// ISO8859-3 1
# from to module cost
alias ISO-IR-110// ISO-8859-4//
@@ -93,16 +88,16 @@ alias ISO_8859-4:1988// ISO-8859-4//
alias ISO_8859-4// ISO-8859-4//
alias LATIN4// ISO-8859-4//
alias L4// ISO-8859-4//
-module ISO-8859-4// ISO-10646/UCS4/ ISO8859-4 1
-module ISO-10646/UCS4/ ISO-8859-4// ISO8859-4 1
+module ISO-8859-4// INTERNAL ISO8859-4 1
+module INTERNAL ISO-8859-4// ISO8859-4 1
# from to module cost
alias ISO-IR-144// ISO-8859-5//
alias ISO_8859-5:1988// ISO-8859-5//
alias ISO_8859-5// ISO-8859-5//
alias CYRILLIC// ISO-8859-5//
-module ISO-8859-5// ISO-10646/UCS4/ ISO8859-5 1
-module ISO-10646/UCS4/ ISO-8859-5// ISO8859-5 1
+module ISO-8859-5// INTERNAL ISO8859-5 1
+module INTERNAL ISO-8859-5// ISO8859-5 1
# from to module cost
alias ISO-IR-127// ISO-8859-6//
@@ -111,8 +106,8 @@ alias ISO_8859-6// ISO-8859-6//
alias ECMA-114// ISO-8859-6//
alias ASMO-708// ISO-8859-6//
alias ARABIC// ISO-8859-6//
-module ISO-8859-6// ISO-10646/UCS4/ ISO8859-6 1
-module ISO-10646/UCS4/ ISO-8859-6// ISO8859-6 1
+module ISO-8859-6// INTERNAL ISO8859-6 1
+module INTERNAL ISO-8859-6// ISO8859-6 1
# from to module cost
alias ISO-IR-126// ISO-8859-7//
@@ -122,16 +117,16 @@ alias ELOT_928// ISO-8859-7//
alias ECMA-118// ISO-8859-7//
alias GREEK// ISO-8859-7//
alias GREEK8// ISO-8859-7//
-module ISO-8859-7// ISO-10646/UCS4/ ISO8859-7 1
-module ISO-10646/UCS4/ ISO-8859-7// ISO8859-7 1
+module ISO-8859-7// INTERNAL ISO8859-7 1
+module INTERNAL ISO-8859-7// ISO8859-7 1
# from to module cost
alias ISO-IR-138// ISO-8859-8//
alias ISO_8859-8:1988// ISO-8859-8//
alias ISO_8859-8// ISO-8859-8//
alias HEBREW// ISO-8859-8//
-module ISO-8859-8// ISO-10646/UCS4/ ISO8859-8 1
-module ISO-10646/UCS4/ ISO-8859-8// ISO8859-8 1
+module ISO-8859-8// INTERNAL ISO8859-8 1
+module INTERNAL ISO-8859-8// ISO8859-8 1
# from to module cost
alias ISO-IR-148// ISO-8859-9//
@@ -139,8 +134,8 @@ alias ISO_8859-9:1989// ISO-8859-9//
alias ISO_8859-9// ISO-8859-9//
alias LATIN5// ISO-8859-9//
alias L5// ISO-8859-9//
-module ISO-8859-9// ISO-10646/UCS4/ ISO8859-9 1
-module ISO-10646/UCS4/ ISO-8859-9// ISO8859-9 1
+module ISO-8859-9// INTERNAL ISO8859-9 1
+module INTERNAL ISO-8859-9// ISO8859-9 1
# from to module cost
alias ISO-IR-157// ISO-8859-10//
@@ -148,96 +143,96 @@ alias ISO_8859-10:1993// ISO-8859-10//
alias ISO_8859-10// ISO-8859-10//
alias LATIN6// ISO-8859-10//
alias L6// ISO-8859-10//
-module ISO-8859-10// ISO-10646/UCS4/ ISO8859-10 1
-module ISO-10646/UCS4/ ISO-8859-10// ISO8859-10 1
+module ISO-8859-10// INTERNAL ISO8859-10 1
+module INTERNAL ISO-8859-10// ISO8859-10 1
# from to module cost
alias T.61// T.61-8BIT//
alias ISO-IR-103// T.61-8BIT//
-module T.61-8BIT// ISO-10646/UCS4/ T.61 1
-module ISO-10646/UCS4/ T.61-8BIT// T.61 1
+module T.61-8BIT// INTERNAL T.61 1
+module INTERNAL T.61-8BIT// T.61 1
# from to module cost
alias ISO-IR-156// ISO_6937//
alias ISO_6937:1992// ISO_6937//
alias ISO6937// ISO_6937//
-module ISO_6937// ISO-10646/UCS4/ ISO_6937 1
-module ISO-10646/UCS4/ ISO_6937// ISO_6937 1
+module ISO_6937// INTERNAL ISO_6937 1
+module INTERNAL ISO_6937// ISO_6937 1
# from to module cost
alias SHIFT-JIS// SJIS//
-module SJIS// ISO-10646/UCS4/ SJIS 1
-module ISO-10646/UCS4/ SJIS// SJIS 1
+module SJIS// INTERNAL SJIS 1
+module INTERNAL SJIS// SJIS 1
# from to module cost
-module KOI-8// ISO-10646/UCS4/ KOI-8 1
-module ISO-10646/UCS4/ KOI-8// KOI-8 1
+module KOI-8// INTERNAL KOI-8 1
+module INTERNAL KOI-8// KOI-8 1
# from to module cost
-module KOI8-R// ISO-10646/UCS4/ KOI8-R 1
-module ISO-10646/UCS4/ KOI8-R// KOI8-R 1
+module KOI8-R// INTERNAL KOI8-R 1
+module INTERNAL KOI8-R// KOI8-R 1
# from to module cost
alias ISO-IR-19// LATIN-GREEK//
-module LATIN-GREEK// ISO-10646/UCS4/ LATIN-GREEK 1
-module ISO-10646/UCS4/ LATIN-GREEK// LATIN-GREEK 1
+module LATIN-GREEK// INTERNAL LATIN-GREEK 1
+module INTERNAL LATIN-GREEK// LATIN-GREEK 1
# from to module cost
alias ISO-IR-27// LATIN-GREEK//
-module LATIN-GREEK-1// ISO-10646/UCS4/ LATIN-GREEK-1 1
-module ISO-10646/UCS4/ LATIN-GREEK-1// LATIN-GREEK-1 1
+module LATIN-GREEK-1// INTERNAL LATIN-GREEK-1 1
+module INTERNAL LATIN-GREEK-1// LATIN-GREEK-1 1
# from to module cost
alias ROMAN8// HP-ROMAN8//
alias R8// HP-ROMAN8//
-module HP-ROMAN8// ISO-10646/UCS4/ HP-ROMAN8 1
-module ISO-10646/UCS4/ HP-ROMAN8// HP-ROMAN8 1
+module HP-ROMAN8// INTERNAL HP-ROMAN8 1
+module INTERNAL HP-ROMAN8// HP-ROMAN8 1
# from to module cost
-module EBCDIC-AT-DE// ISO-10646/UCS4/ EBCDIC-AT-DE 1
-module ISO-10646/UCS4/ EBCDIC-AT-DE// EBCDIC-AT-DE 1
+module EBCDIC-AT-DE// INTERNAL EBCDIC-AT-DE 1
+module INTERNAL EBCDIC-AT-DE// EBCDIC-AT-DE 1
# from to module cost
-module EBCDIC-AT-DE-A// ISO-10646/UCS4/ EBCDIC-AT-DE-A 1
-module ISO-10646/UCS4/ EBCDIC-AT-DE-A// EBCDIC-AT-DE-A 1
+module EBCDIC-AT-DE-A// INTERNAL EBCDIC-AT-DE-A 1
+module INTERNAL EBCDIC-AT-DE-A// EBCDIC-AT-DE-A 1
# from to module cost
-module EBCDIC-CA-FR// ISO-10646/UCS4/ EBCDIC-CA-FR 1
-module ISO-10646/UCS4/ EBCDIC-CA-FR// EBCDIC-CA-FR 1
+module EBCDIC-CA-FR// INTERNAL EBCDIC-CA-FR 1
+module INTERNAL EBCDIC-CA-FR// EBCDIC-CA-FR 1
# from to module cost
alias EUCKR// EUC-KR//
-module EUC-KR// ISO-10646/UCS4/ EUC-KR 1
-module ISO-10646/UCS4/ EUC-KR// EUC-KR 1
+module EUC-KR// INTERNAL EUC-KR 1
+module INTERNAL EUC-KR// EUC-KR 1
# from to module cost
alias MSCP949// UHC//
-module UHC// ISO-10646/UCS4/ UHC 1
-module ISO-10646/UCS4/ UHC// UHC 1
+module UHC// INTERNAL UHC 1
+module INTERNAL UHC// UHC 1
# from to module cost
alias MSCP1361// JOHAB//
-module JOHAB// ISO-10646/UCS4/ JOHAB 1
-module ISO-10646/UCS4/ JOHAB// JOHAB 1
+module JOHAB// INTERNAL JOHAB 1
+module INTERNAL JOHAB// JOHAB 1
# from to module cost
alias BIG-FIVE// BIG5//
alias BIGFIVE// BIG5//
alias BIG-5// BIG5//
-module BIG5// ISO-10646/UCS4/ BIG5 1
-module ISO-10646/UCS4/ BIG5// BIG5 1
+module BIG5// INTERNAL BIG5 1
+module INTERNAL BIG5// BIG5 1
# from to module cost
alias EUCJP// EUC-JP//
-module EUC-JP// ISO-10646/UCS4/ EUC-JP 1
-module ISO-10646/UCS4/ EUC-JP// EUC-JP 1
+module EUC-JP// INTERNAL EUC-JP 1
+module INTERNAL EUC-JP// EUC-JP 1
# from to module cost
alias EUCCN// EUC-CN//
-module EUC-CN// ISO-10646/UCS4/ EUC-CN 1
-module ISO-10646/UCS4/ EUC-CN// EUC-CN 1
+module EUC-CN// INTERNAL EUC-CN 1
+module INTERNAL EUC-CN// EUC-CN 1
# from to module cost
alias EUCTW// EUC-TW//
-module EUC-TW// ISO-10646/UCS4/ EUC-TW 1
-module ISO-10646/UCS4/ EUC-TW// EUC-TW 1
+module EUC-TW// INTERNAL EUC-TW 1
+module INTERNAL EUC-TW// EUC-TW 1
diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c
index a9705f0636..53ca76cb14 100644
--- a/iconvdata/iso646.c
+++ b/iconvdata/iso646.c
@@ -195,6 +195,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
}
out_from:
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -243,6 +244,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
}
out_to:
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars < data->outbufsize)
diff --git a/iconvdata/iso6937.c b/iconvdata/iso6937.c
index 30c3831a29..21e3ab4898 100644
--- a/iconvdata/iso6937.c
+++ b/iconvdata/iso6937.c
@@ -492,6 +492,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -593,6 +594,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/iso8859-1.c b/iconvdata/iso8859-1.c
index ab69c4d1a4..b9484a06b7 100644
--- a/iconvdata/iso8859-1.c
+++ b/iconvdata/iso8859-1.c
@@ -104,6 +104,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -129,6 +130,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars < data->outbufsize)
diff --git a/iconvdata/johab.c b/iconvdata/johab.c
index 51b235c924..c9912a71d0 100644
--- a/iconvdata/johab.c
+++ b/iconvdata/johab.c
@@ -408,6 +408,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -462,6 +463,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/run-iconv-test.sh b/iconvdata/run-iconv-test.sh
index 2ef394d4a3..ebafa2f298 100755
--- a/iconvdata/run-iconv-test.sh
+++ b/iconvdata/run-iconv-test.sh
@@ -44,14 +44,18 @@ while read from to targets; do
for t in $targets; do
$ICONV -f $from -t $t testdata/$from > $temp1 ||
{ echo "*** conversion from $from to $t failed"; exit 1; }
+ if test -s testdata/$from..$t; then
+ cmp $temp1 testdata/$from..$t >& /dev/null ||
+ { echo "*** $from -> $t conversion failed"; exit 1; }
+ fi
$ICONV -f $t -t $to -o $temp2 $temp1 ||
{ echo "*** conversion from $t to $to failed"; exit 1; }
test -s $temp1 && cmp testdata/$from $temp2 >& /dev/null ||
- { echo "*** $from -> $t -> $to conversion failed"; exit 1; }
+ { echo "*** $from -> t -> $to conversion failed"; exit 1; }
# All tests ok.
echo "$from -> $t -> $to ok"
- #rm -f $name1 $name2
+ rm -f $temp1 $temp2
done
done < TESTS
diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c
index 7a81c017e9..33dc2f1d65 100644
--- a/iconvdata/sjis.c
+++ b/iconvdata/sjis.c
@@ -4128,6 +4128,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -4181,6 +4182,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/t61.c b/iconvdata/t61.c
index d532438bf9..b77ee71248 100644
--- a/iconvdata/t61.c
+++ b/iconvdata/t61.c
@@ -483,6 +483,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -542,6 +543,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/iconvdata/testdata/ISO-8859-1..UTF8 b/iconvdata/testdata/ISO-8859-1..UTF8
new file mode 100644
index 0000000000..8ad5d2cb1c
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-1..UTF8
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
+ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+ À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+ Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+ à á â ã ä å æ ç è é ê ë ì í î ï
+ ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
diff --git a/iconvdata/testdata/ISO-8859-10 b/iconvdata/testdata/ISO-8859-10
new file mode 100644
index 0000000000..7d3f9b2d8f
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-10
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-10..UCS2 b/iconvdata/testdata/ISO-8859-10..UCS2
new file mode 100644
index 0000000000..0764f068f4
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-10..UCS2
Binary files differ
diff --git a/iconvdata/testdata/ISO-8859-2 b/iconvdata/testdata/ISO-8859-2
new file mode 100644
index 0000000000..7d3f9b2d8f
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-2
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-2..UCS4 b/iconvdata/testdata/ISO-8859-2..UCS4
new file mode 100644
index 0000000000..1795522591
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-2..UCS4
Binary files differ
diff --git a/iconvdata/testdata/ISO-8859-2..UTF8 b/iconvdata/testdata/ISO-8859-2..UTF8
new file mode 100644
index 0000000000..5428c1fd85
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-2..UTF8
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+   Ą ˘ Ł ¤ Ľ Ś § ¨ Š Ş Ť Ź ­ Ž Ż
+ ° ą ˛ ł ´ ľ ś ˇ ¸ š ş ť ź ˝ ž ż
+ Ŕ Á Â Ă Ä Ĺ Ć Ç Č É Ę Ë Ě Í Î Ď
+ Đ Ń Ň Ó Ô Ő Ö × Ř Ů Ú Ű Ü Ý Ţ ß
+ ŕ á â ă ä ĺ ć ç č é ę ë ě í î ď
+ đ ń ň ó ô ő ö ÷ ř ů ú ű ü ý ţ ˙
diff --git a/iconvdata/testdata/ISO-8859-3 b/iconvdata/testdata/ISO-8859-3
new file mode 100644
index 0000000000..e85c3bd0ef
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-3
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-4 b/iconvdata/testdata/ISO-8859-4
new file mode 100644
index 0000000000..7d3f9b2d8f
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-4
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-5 b/iconvdata/testdata/ISO-8859-5
new file mode 100644
index 0000000000..7d3f9b2d8f
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-5
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-6 b/iconvdata/testdata/ISO-8859-6
new file mode 100644
index 0000000000..047664e5f7
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-6
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-7 b/iconvdata/testdata/ISO-8859-7
new file mode 100644
index 0000000000..c90d1613f0
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-7
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-8 b/iconvdata/testdata/ISO-8859-8
new file mode 100644
index 0000000000..42edc071e9
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-8
@@ -0,0 +1,11 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
diff --git a/iconvdata/testdata/ISO-8859-9 b/iconvdata/testdata/ISO-8859-9
new file mode 100644
index 0000000000..7d3f9b2d8f
--- /dev/null
+++ b/iconvdata/testdata/ISO-8859-9
@@ -0,0 +1,12 @@
+ ! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+
+
+
+
+
+
diff --git a/iconvdata/uhc.c b/iconvdata/uhc.c
index c1d5a40d8e..ed4b7adb70 100644
--- a/iconvdata/uhc.c
+++ b/iconvdata/uhc.c
@@ -2775,6 +2775,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
++cnt;
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outwchars;
}
else
@@ -2816,6 +2817,7 @@ gconv (struct gconv_step *step, struct gconv_step_data *data,
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
+ inbuf += cnt;
data->outbufavail = outchars;
if (outchars + extra < data->outbufsize)
diff --git a/stdlib/testmb.c b/stdlib/testmb.c
index 117ade1a5b..45dae7db61 100644
--- a/stdlib/testmb.c
+++ b/stdlib/testmb.c
@@ -40,11 +40,7 @@ main (int argc, char *argv[])
int r;
char c = 'x';
wchar_t wc;
- char *mbc;
-
- mbc = (char *) malloc (MB_CUR_MAX);
- mbc[0] = c;
- mbc[1] = '\0';
+ char mbc[MB_CUR_MAX];
if ((r = mbtowc (&wc, &c, MB_CUR_MAX)) <= 0)
{
diff --git a/sysdeps/i386/bits/byteswap.h b/sysdeps/i386/bits/byteswap.h
index bf55c890ba..1eef351a0c 100644
--- a/sysdeps/i386/bits/byteswap.h
+++ b/sysdeps/i386/bits/byteswap.h
@@ -1,5 +1,5 @@
/* Macros to swap the order of bytes in integer values.
- Copyright (C) 1997 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -32,7 +32,7 @@
__v = __bswap_constant_16 (x); \
else \
__asm__ __volatile__ ("rorw $8, %w0" \
- : "=q" (__v) \
+ : "=r" (__v) \
: "0" ((unsigned short int) (x)) \
: "cc"); \
__v; })
@@ -59,7 +59,7 @@
__asm__ __volatile__ ("rorw $8, %w0;" \
"rorl $16, %0;" \
"rorw $8, %w0" \
- : "=q" (__v) \
+ : "=r" (__v) \
: "0" ((unsigned int) (x)) \
: "cc"); \
__v; })
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index cf854d9125..c7e5651fe6 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -38,8 +38,8 @@ static struct gconv_step to_wc =
modname: NULL,
counter: INT_MAX,
from_name: "ANSI_X3.4-1968",
- to_name: "ISO-10646/UCS4/",
- fct: __gconv_transform_ascii_ucs4,
+ to_name: "#INTERNAL#",
+ fct: __gconv_transform_ascii_internal,
init_fct: NULL,
end_fct: NULL,
data: NULL
@@ -50,9 +50,9 @@ static struct gconv_step to_mb =
shlib_handle: NULL,
modname: NULL,
counter: INT_MAX,
- from_name: "ISO-10646/UCS4/",
+ from_name: "#INTERNAL#",
to_name: "ANSI_X3.4-1968",
- fct: __gconv_transform_ucs4_ascii,
+ fct: __gconv_transform_internal_ascii,
init_fct: NULL,
end_fct: NULL,
data: NULL
@@ -113,8 +113,8 @@ __wcsmbs_load_conv (const struct locale_data *new_category)
/* Get name of charset of the locale. */
charset_name = new_category->values[_NL_ITEM_INDEX(CODESET)].string;
- __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "ISO-10646/UCS4/");
- __wcsmbs_gconv_fcts.towc = getfct ("ISO-10646/UCS4/", charset_name);
+ __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "#INTERNAL#");
+ __wcsmbs_gconv_fcts.towc = getfct ("#INTERNAL#", charset_name);
/* If any of the conversion functions is not available we don't
use any since this would mean we cannot convert back and