diff options
author | Nikos Mavrogiannopoulos <nmav@gnutls.org> | 2012-11-21 23:37:30 +0100 |
---|---|---|
committer | Nikos Mavrogiannopoulos <nmav@gnutls.org> | 2012-11-21 23:37:30 +0100 |
commit | a53aa1f9a32e47b465d5b6ef3e3f5ed6b918a1c5 (patch) | |
tree | 2f9f24ac951e6f939b5403af3eae079d6792a0c1 | |
parent | 06fdce3411941982c5665d4152bac396bf648d5f (diff) | |
download | gnutls-a53aa1f9a32e47b465d5b6ef3e3f5ed6b918a1c5.tar.gz |
iconv() will include the UCS2->UTF8 convertion in systems that is not provided.
56 files changed, 11256 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore index 5ef01a8164..a2a407e99e 100644 --- a/.gitignore +++ b/.gitignore @@ -604,3 +604,12 @@ src/libcmd-danetool.la src/danetool tests/key-openssl tests/mini-dtls-srtp +gl/tests/test-iconv +gl/unused-parameter.h +gl/unitypes.h +.dirstamp +gl/unistr.h +gl/tests/unused-parameter.h +gl/tests/locale.h +gl/iconv.h +build-aux/snippet/unused-parameter.h diff --git a/gl/Makefile.am b/gl/Makefile.am index 1e232c3205..13660abb14 100644 --- a/gl/Makefile.am +++ b/gl/Makefile.am @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --dir=. --local-dir=gl/override --lib=libgnu --source-base=gl --m4-base=gl/m4 --doc-base=doc --tests-base=gl/tests --aux-dir=build-aux --with-tests --avoid=alignof-tests --avoid=lock-tests --avoid=lseek-tests --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alloca alphasort argp base64 bind byteswap c-ctype close connect error extensions func gendocs getaddrinfo getpass getsubopt gettext gettime hash-pjw-bare havelib iconv inet_ntop inet_pton lib-msvc-compat lib-symbol-versions listen maintainer-makefile manywarnings memmem-simple minmax netdb netinet_in pmccabe2html progname read-file recv recvfrom scandir select send sendto servent setsockopt shutdown snprintf socket sockets socklen stdint strcase strndup strtok_r strverscmp sys_socket sys_stat time_r timer-time timespec u64 unistd valgrind-tests vasprintf version-etc version-etc-fsf vfprintf-posix vprintf-posix vsnprintf warnings +# Reproduce by: gnulib-tool --import --dir=. --local-dir=gl/override --lib=libgnu --source-base=gl --m4-base=gl/m4 --doc-base=doc --tests-base=gl/tests --aux-dir=build-aux --with-tests --avoid=alignof-tests --avoid=lock-tests --avoid=lseek-tests --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alloca alphasort argp base64 bind byteswap c-ctype close connect error extensions func gendocs getaddrinfo getpass getsubopt gettext gettime hash-pjw-bare havelib iconv iconv_open-utf inet_ntop inet_pton lib-msvc-compat lib-symbol-versions listen maintainer-makefile manywarnings memmem-simple minmax netdb netinet_in pmccabe2html progname read-file recv recvfrom scandir select send sendto servent setsockopt shutdown snprintf socket sockets socklen stdint strcase strndup strtok_r strverscmp sys_socket sys_stat time_r timer-time timespec u64 unistd valgrind-tests vasprintf version-etc version-etc-fsf vfprintf-posix vprintf-posix vsnprintf warnings AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects @@ -205,6 +205,12 @@ libgnu_la_SOURCES += c-ctype.h c-ctype.c ## end gnulib module c-ctype +## begin gnulib module c-strcase + +libgnu_la_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c + +## end gnulib module c-strcase + ## begin gnulib module close @@ -607,6 +613,12 @@ EXTRA_DIST += $(top_srcdir)/GNUmakefile ## end gnulib module gnumakefile +## begin gnulib module gperf + +GPERF = gperf + +## end gnulib module gperf + ## begin gnulib module hash-pjw-bare libgnu_la_SOURCES += hash-pjw-bare.h hash-pjw-bare.c @@ -620,6 +632,70 @@ EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath ## end gnulib module havelib +## begin gnulib module iconv-h + +BUILT_SOURCES += $(ICONV_H) + +# We need the following in order to create <iconv.h> when the system +# doesn't have one that works with the given compiler. +if GL_GENERATE_ICONV_H +iconv.h: iconv.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ + sed -e 's|@''GUARD_PREFIX''@|GL|g' \ + -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ + -e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \ + -e 's/@''GNULIB_ICONV''@/$(GNULIB_ICONV)/g' \ + -e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \ + -e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \ + -e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \ + -e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \ + -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ + -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ + -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ + < $(srcdir)/iconv.in.h; \ + } > $@-t && \ + mv $@-t $@ +else +iconv.h: $(top_builddir)/config.status + rm -f $@ +endif +MOSTLYCLEANFILES += iconv.h iconv.h-t + +EXTRA_DIST += iconv.in.h + +## end gnulib module iconv-h + +## begin gnulib module iconv_open + +iconv_open-aix.h: iconv_open-aix.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-aix.gperf > $(srcdir)/iconv_open-aix.h-t + mv $(srcdir)/iconv_open-aix.h-t $(srcdir)/iconv_open-aix.h +iconv_open-hpux.h: iconv_open-hpux.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-hpux.gperf > $(srcdir)/iconv_open-hpux.h-t + mv $(srcdir)/iconv_open-hpux.h-t $(srcdir)/iconv_open-hpux.h +iconv_open-irix.h: iconv_open-irix.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-irix.gperf > $(srcdir)/iconv_open-irix.h-t + mv $(srcdir)/iconv_open-irix.h-t $(srcdir)/iconv_open-irix.h +iconv_open-osf.h: iconv_open-osf.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-osf.gperf > $(srcdir)/iconv_open-osf.h-t + mv $(srcdir)/iconv_open-osf.h-t $(srcdir)/iconv_open-osf.h +iconv_open-solaris.h: iconv_open-solaris.gperf + $(GPERF) -m 10 $(srcdir)/iconv_open-solaris.gperf > $(srcdir)/iconv_open-solaris.h-t + mv $(srcdir)/iconv_open-solaris.h-t $(srcdir)/iconv_open-solaris.h +BUILT_SOURCES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h +MOSTLYCLEANFILES += iconv_open-aix.h-t iconv_open-hpux.h-t iconv_open-irix.h-t iconv_open-osf.h-t iconv_open-solaris.h-t +MAINTAINERCLEANFILES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h +EXTRA_DIST += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h iconv_open-solaris.h + +EXTRA_DIST += iconv.c iconv_close.c iconv_open-aix.gperf iconv_open-hpux.gperf iconv_open-irix.gperf iconv_open-osf.gperf iconv_open-solaris.gperf iconv_open.c + +EXTRA_libgnu_la_SOURCES += iconv.c iconv_close.c iconv_open.c + +## end gnulib module iconv_open + ## begin gnulib module inet_ntop @@ -1387,6 +1463,31 @@ EXTRA_DIST += $(top_srcdir)/build-aux/snippet/c++defs.h ## end gnulib module snippet/c++defs +## begin gnulib module snippet/unused-parameter + +# The BUILT_SOURCES created by this Makefile snippet are not used via #include +# statements but through direct file reference. Therefore this snippet must be +# present in all Makefile.am that need it. This is ensured by the applicability +# 'all' defined above. + +BUILT_SOURCES += unused-parameter.h +# The unused-parameter.h that gets inserted into generated .h files is the same +# as build-aux/snippet/unused-parameter.h, except that it has the copyright +# header cut off. +unused-parameter.h: $(top_srcdir)/build-aux/snippet/unused-parameter.h + $(AM_V_GEN)rm -f $@-t $@ && \ + sed -n -e '/GL_UNUSED_PARAMETER/,$$p' \ + < $(top_srcdir)/build-aux/snippet/unused-parameter.h \ + > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += unused-parameter.h unused-parameter.h-t + +UNUSED_PARAMETER_H=unused-parameter.h + +EXTRA_DIST += $(top_srcdir)/build-aux/snippet/unused-parameter.h + +## end gnulib module snippet/unused-parameter + ## begin gnulib module snippet/warn-on-use BUILT_SOURCES += warn-on-use.h @@ -2514,6 +2615,54 @@ EXTRA_DIST += unistd.in.h ## end gnulib module unistd +## begin gnulib module unistr/base + +BUILT_SOURCES += $(LIBUNISTRING_UNISTR_H) + +unistr.h: unistr.in.h + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/unistr.in.h; \ + } > $@-t && \ + mv -f $@-t $@ +MOSTLYCLEANFILES += unistr.h unistr.h-t + +EXTRA_DIST += unistr.in.h + +## end gnulib module unistr/base + +## begin gnulib module unistr/u8-mbtoucr + +if LIBUNISTRING_COMPILE_UNISTR_U8_MBTOUCR +libgnu_la_SOURCES += unistr/u8-mbtoucr.c +endif + +## end gnulib module unistr/u8-mbtoucr + +## begin gnulib module unistr/u8-uctomb + +if LIBUNISTRING_COMPILE_UNISTR_U8_UCTOMB +libgnu_la_SOURCES += unistr/u8-uctomb.c unistr/u8-uctomb-aux.c +endif + +## end gnulib module unistr/u8-uctomb + +## begin gnulib module unitypes + +BUILT_SOURCES += $(LIBUNISTRING_UNITYPES_H) + +unitypes.h: unitypes.in.h + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ + cat $(srcdir)/unitypes.in.h; \ + } > $@-t && \ + mv -f $@-t $@ +MOSTLYCLEANFILES += unitypes.h unitypes.h-t + +EXTRA_DIST += unitypes.in.h + +## end gnulib module unitypes + ## begin gnulib module useless-if-before-free diff --git a/gl/c-strcase.h b/gl/c-strcase.h new file mode 100644 index 0000000000..fdef2385ea --- /dev/null +++ b/gl/c-strcase.h @@ -0,0 +1,56 @@ +/* Case-insensitive string comparison functions in C locale. + Copyright (C) 1995-1996, 2001, 2003, 2005, 2009-2012 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef C_STRCASE_H +#define C_STRCASE_H + +#include <stddef.h> + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. More precisely, one of the string arguments must be an ASCII + string; the other one can also contain non-ASCII characters (but then + the comparison result will be nonzero). */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Compare strings S1 and S2, ignoring case, returning less than, equal to or + greater than zero if S1 is lexicographically less than, equal to or greater + than S2. */ +extern int c_strcasecmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE; + +/* Compare no more than N characters of strings S1 and S2, ignoring case, + returning less than, equal to or greater than zero if S1 is + lexicographically less than, equal to or greater than S2. */ +extern int c_strncasecmp (const char *s1, const char *s2, size_t n) + _GL_ATTRIBUTE_PURE; + + +#ifdef __cplusplus +} +#endif + + +#endif /* C_STRCASE_H */ diff --git a/gl/c-strcasecmp.c b/gl/c-strcasecmp.c new file mode 100644 index 0000000000..d8332caf83 --- /dev/null +++ b/gl/c-strcasecmp.c @@ -0,0 +1,56 @@ +/* c-strcasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006, 2009-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "c-strcase.h" + +#include <limits.h> + +#include "c-ctype.h" + +int +c_strcasecmp (const char *s1, const char *s2) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/gl/c-strncasecmp.c b/gl/c-strncasecmp.c new file mode 100644 index 0000000000..47fb5fdb67 --- /dev/null +++ b/gl/c-strncasecmp.c @@ -0,0 +1,56 @@ +/* c-strncasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006, 2009-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "c-strcase.h" + +#include <limits.h> + +#include "c-ctype.h" + +int +c_strncasecmp (const char *s1, const char *s2, size_t n) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2 || n == 0) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (--n == 0 || c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} diff --git a/gl/iconv.c b/gl/iconv.c new file mode 100644 index 0000000000..66966ddf09 --- /dev/null +++ b/gl/iconv.c @@ -0,0 +1,449 @@ +/* Character set conversion. + Copyright (C) 1999-2001, 2007, 2009-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <iconv.h> + +#include <stddef.h> + +#if REPLACE_ICONV_UTF +# include <errno.h> +# include <stdint.h> +# include <stdlib.h> +# include "unistr.h" +# ifndef uintptr_t +# define uintptr_t unsigned long +# endif +#endif + +#if REPLACE_ICONV_UTF + +/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */ + +/* Return code if invalid. (xxx_mbtowc) */ +# define RET_ILSEQ -1 +/* Return code if no bytes were read. (xxx_mbtowc) */ +# define RET_TOOFEW -2 + +/* Return code if invalid. (xxx_wctomb) */ +# define RET_ILUNI -1 +/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ +# define RET_TOOSMALL -2 + +/* + * UTF-16BE + */ + +/* Specification: RFC 2781 */ + +static int +utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 2) + { + ucs4_t wc = (s[0] << 8) + s[1]; + if (wc >= 0xd800 && wc < 0xdc00) + { + if (n >= 4) + { + ucs4_t wc2 = (s[2] << 8) + s[3]; + if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) + return RET_ILSEQ; + *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); + return 4; + } + } + else if (wc >= 0xdc00 && wc < 0xe000) + { + return RET_ILSEQ; + } + else + { + *pwc = wc; + return 2; + } + } + return RET_TOOFEW; +} + +static int +utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (!(wc >= 0xd800 && wc < 0xe000)) + { + if (wc < 0x10000) + { + if (n >= 2) + { + r[0] = (unsigned char) (wc >> 8); + r[1] = (unsigned char) wc; + return 2; + } + else + return RET_TOOSMALL; + } + else if (wc < 0x110000) + { + if (n >= 4) + { + ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); + ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); + r[0] = (unsigned char) (wc1 >> 8); + r[1] = (unsigned char) wc1; + r[2] = (unsigned char) (wc2 >> 8); + r[3] = (unsigned char) wc2; + return 4; + } + else + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} + +/* + * UTF-16LE + */ + +/* Specification: RFC 2781 */ + +static int +utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 2) + { + ucs4_t wc = s[0] + (s[1] << 8); + if (wc >= 0xd800 && wc < 0xdc00) + { + if (n >= 4) + { + ucs4_t wc2 = s[2] + (s[3] << 8); + if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) + return RET_ILSEQ; + *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); + return 4; + } + } + else if (wc >= 0xdc00 && wc < 0xe000) + { + return RET_ILSEQ; + } + else + { + *pwc = wc; + return 2; + } + } + return RET_TOOFEW; +} + +static int +utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (!(wc >= 0xd800 && wc < 0xe000)) + { + if (wc < 0x10000) + { + if (n >= 2) + { + r[0] = (unsigned char) wc; + r[1] = (unsigned char) (wc >> 8); + return 2; + } + else + return RET_TOOSMALL; + } + else if (wc < 0x110000) + { + if (n >= 4) + { + ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); + ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); + r[0] = (unsigned char) wc1; + r[1] = (unsigned char) (wc1 >> 8); + r[2] = (unsigned char) wc2; + r[3] = (unsigned char) (wc2 >> 8); + return 4; + } + else + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} + +/* + * UTF-32BE + */ + +/* Specification: Unicode 3.1 Standard Annex #19 */ + +static int +utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 4) + { + ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]; + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + *pwc = wc; + return 4; + } + else + return RET_ILSEQ; + } + return RET_TOOFEW; +} + +static int +utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + if (n >= 4) + { + r[0] = 0; + r[1] = (unsigned char) (wc >> 16); + r[2] = (unsigned char) (wc >> 8); + r[3] = (unsigned char) wc; + return 4; + } + else + return RET_TOOSMALL; + } + return RET_ILUNI; +} + +/* + * UTF-32LE + */ + +/* Specification: Unicode 3.1 Standard Annex #19 */ + +static int +utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n) +{ + if (n >= 4) + { + ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24); + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + *pwc = wc; + return 4; + } + else + return RET_ILSEQ; + } + return RET_TOOFEW; +} + +static int +utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n) +{ + if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) + { + if (n >= 4) + { + r[0] = (unsigned char) wc; + r[1] = (unsigned char) (wc >> 8); + r[2] = (unsigned char) (wc >> 16); + r[3] = 0; + return 4; + } + else + return RET_TOOSMALL; + } + return RET_ILUNI; +} + +#endif + +size_t +rpl_iconv (iconv_t cd, + ICONV_CONST char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +#undef iconv +{ +#if REPLACE_ICONV_UTF + switch ((uintptr_t) cd) + { + { + int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t); + + case (uintptr_t) _ICONV_UTF8_UTF16BE: + xxx_wctomb = utf16be_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF16LE: + xxx_wctomb = utf16le_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF32BE: + xxx_wctomb = utf32be_wctomb; + goto loop_from_utf8; + case (uintptr_t) _ICONV_UTF8_UTF32LE: + xxx_wctomb = utf32le_wctomb; + goto loop_from_utf8; + + loop_from_utf8: + if (inbuf == NULL || *inbuf == NULL) + return 0; + { + ICONV_CONST char *inptr = *inbuf; + size_t inleft = *inbytesleft; + char *outptr = *outbuf; + size_t outleft = *outbytesleft; + size_t res = 0; + while (inleft > 0) + { + ucs4_t uc; + int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft); + if (m <= 0) + { + if (m == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (m == -2) + { + errno = EINVAL; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft); + if (n < 0) + { + if (n == RET_ILUNI) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (n == RET_TOOSMALL) + { + errno = E2BIG; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + inptr += m; + inleft -= m; + outptr += n; + outleft -= n; + } + } + } + *inbuf = inptr; + *inbytesleft = inleft; + *outbuf = outptr; + *outbytesleft = outleft; + return res; + } + } + + { + int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t); + + case (uintptr_t) _ICONV_UTF16BE_UTF8: + xxx_mbtowc = utf16be_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF16LE_UTF8: + xxx_mbtowc = utf16le_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF32BE_UTF8: + xxx_mbtowc = utf32be_mbtowc; + goto loop_to_utf8; + case (uintptr_t) _ICONV_UTF32LE_UTF8: + xxx_mbtowc = utf32le_mbtowc; + goto loop_to_utf8; + + loop_to_utf8: + if (inbuf == NULL || *inbuf == NULL) + return 0; + { + ICONV_CONST char *inptr = *inbuf; + size_t inleft = *inbytesleft; + char *outptr = *outbuf; + size_t outleft = *outbytesleft; + size_t res = 0; + while (inleft > 0) + { + ucs4_t uc; + int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft); + if (m <= 0) + { + if (m == RET_ILSEQ) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (m == RET_TOOFEW) + { + errno = EINVAL; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + int n = u8_uctomb ((uint8_t *) outptr, uc, outleft); + if (n < 0) + { + if (n == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + break; + } + if (n == -2) + { + errno = E2BIG; + res = (size_t)(-1); + break; + } + abort (); + } + else + { + inptr += m; + inleft -= m; + outptr += n; + outleft -= n; + } + } + } + *inbuf = inptr; + *inbytesleft = inleft; + *outbuf = outptr; + *outbytesleft = outleft; + return res; + } + } + } +#endif + return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft); +} diff --git a/gl/iconv.in.h b/gl/iconv.in.h new file mode 100644 index 0000000000..1ee42398b7 --- /dev/null +++ b/gl/iconv.in.h @@ -0,0 +1,110 @@ +/* A GNU-like <iconv.h>. + + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _@GUARD_PREFIX@_ICONV_H + +#if __GNUC__ >= 3 +@PRAGMA_SYSTEM_HEADER@ +#endif +@PRAGMA_COLUMNS@ + +/* The include_next requires a split double-inclusion guard. */ +#@INCLUDE_NEXT@ @NEXT_ICONV_H@ + +#ifndef _@GUARD_PREFIX@_ICONV_H +#define _@GUARD_PREFIX@_ICONV_H + +/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */ + +/* The definition of _GL_ARG_NONNULL is copied here. */ + +/* The definition of _GL_WARN_ON_USE is copied here. */ + + +#if @GNULIB_ICONV@ +# if @REPLACE_ICONV_OPEN@ +/* An iconv_open wrapper that supports the IANA standardized encoding names + ("ISO-8859-1" etc.) as far as possible. */ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define iconv_open rpl_iconv_open +# endif +_GL_FUNCDECL_RPL (iconv_open, iconv_t, + (const char *tocode, const char *fromcode) + _GL_ARG_NONNULL ((1, 2))); +_GL_CXXALIAS_RPL (iconv_open, iconv_t, + (const char *tocode, const char *fromcode)); +# else +_GL_CXXALIAS_SYS (iconv_open, iconv_t, + (const char *tocode, const char *fromcode)); +# endif +_GL_CXXALIASWARN (iconv_open); +#endif + +#if @REPLACE_ICONV_UTF@ +/* Special constants for supporting UTF-{16,32}{BE,LE} encodings. + Not public. */ +# define _ICONV_UTF8_UTF16BE (iconv_t)(-161) +# define _ICONV_UTF8_UTF16LE (iconv_t)(-162) +# define _ICONV_UTF8_UTF32BE (iconv_t)(-163) +# define _ICONV_UTF8_UTF32LE (iconv_t)(-164) +# define _ICONV_UTF16BE_UTF8 (iconv_t)(-165) +# define _ICONV_UTF16LE_UTF8 (iconv_t)(-166) +# define _ICONV_UTF32BE_UTF8 (iconv_t)(-167) +# define _ICONV_UTF32LE_UTF8 (iconv_t)(-168) +#endif + +#if @GNULIB_ICONV@ +# if @REPLACE_ICONV@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define iconv rpl_iconv +# endif +_GL_FUNCDECL_RPL (iconv, size_t, + (iconv_t cd, + @ICONV_CONST@ char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft)); +_GL_CXXALIAS_RPL (iconv, size_t, + (iconv_t cd, + @ICONV_CONST@ char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft)); +# else +_GL_CXXALIAS_SYS (iconv, size_t, + (iconv_t cd, + @ICONV_CONST@ char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft)); +# endif +_GL_CXXALIASWARN (iconv); +# ifndef ICONV_CONST +# define ICONV_CONST @ICONV_CONST@ +# endif +#endif + +#if @GNULIB_ICONV@ +# if @REPLACE_ICONV@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# define iconv_close rpl_iconv_close +# endif +_GL_FUNCDECL_RPL (iconv_close, int, (iconv_t cd)); +_GL_CXXALIAS_RPL (iconv_close, int, (iconv_t cd)); +# else +_GL_CXXALIAS_SYS (iconv_close, int, (iconv_t cd)); +# endif +_GL_CXXALIASWARN (iconv_close); +#endif + + +#endif /* _@GUARD_PREFIX@_ICONV_H */ +#endif /* _@GUARD_PREFIX@_ICONV_H */ diff --git a/gl/iconv_close.c b/gl/iconv_close.c new file mode 100644 index 0000000000..c02f76c649 --- /dev/null +++ b/gl/iconv_close.c @@ -0,0 +1,46 @@ +/* Character set conversion. + Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <iconv.h> + +#include <stdint.h> +#ifndef uintptr_t +# define uintptr_t unsigned long +#endif + +int +rpl_iconv_close (iconv_t cd) +#undef iconv_close +{ +#if REPLACE_ICONV_UTF + switch ((uintptr_t) cd) + { + case (uintptr_t) _ICONV_UTF8_UTF16BE: + case (uintptr_t) _ICONV_UTF8_UTF16LE: + case (uintptr_t) _ICONV_UTF8_UTF32BE: + case (uintptr_t) _ICONV_UTF8_UTF32LE: + case (uintptr_t) _ICONV_UTF16BE_UTF8: + case (uintptr_t) _ICONV_UTF16LE_UTF8: + case (uintptr_t) _ICONV_UTF32BE_UTF8: + case (uintptr_t) _ICONV_UTF32LE_UTF8: + return 0; + } +#endif + return iconv_close (cd); +} diff --git a/gl/iconv_open-aix.gperf b/gl/iconv_open-aix.gperf new file mode 100644 index 0000000000..6782b9956c --- /dev/null +++ b/gl/iconv_open-aix.gperf @@ -0,0 +1,44 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On AIX 5.1, look in /usr/lib/nls/loc/uconvTable. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +CP437, "IBM-437" +CP850, "IBM-850" +CP852, "IBM-852" +CP856, "IBM-856" +CP857, "IBM-857" +CP861, "IBM-861" +CP865, "IBM-865" +CP869, "IBM-869" +ISO-8859-13, "IBM-921" +CP922, "IBM-922" +CP932, "IBM-932" +CP943, "IBM-943" +CP1046, "IBM-1046" +CP1124, "IBM-1124" +CP1125, "IBM-1125" +CP1129, "IBM-1129" +CP1252, "IBM-1252" +GB2312, "IBM-eucCN" +EUC-JP, "IBM-eucJP" +EUC-KR, "IBM-eucKR" +EUC-TW, "IBM-eucTW" +BIG5, "big5" diff --git a/gl/iconv_open-aix.h b/gl/iconv_open-aix.h new file mode 100644 index 0000000000..0ffc3fef16 --- /dev/null +++ b/gl/iconv_open-aix.h @@ -0,0 +1,256 @@ +/* ANSI-C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -m 10 ./iconv_open-aix.gperf */ +/* Computed positions: -k'4,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "./iconv_open-aix.gperf" +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; + +#define TOTAL_KEYWORDS 32 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 6 +#define MAX_HASH_VALUE 44 +/* maximum key range = 39, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +mapping_hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 0, 4, 25, + 0, 11, 24, 9, 17, 3, 14, 21, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 3, 45, 1, 45, 45, 45, 45, 0, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45 + }; + return len + asso_values[(unsigned char)str[3]+2] + asso_values[(unsigned char)str[len - 1]]; +} + +struct stringpool_t + { + char stringpool_str6[sizeof("EUC-TW")]; + char stringpool_str7[sizeof("EUC-KR")]; + char stringpool_str8[sizeof("CP852")]; + char stringpool_str9[sizeof("EUC-JP")]; + char stringpool_str10[sizeof("ISO-8859-2")]; + char stringpool_str11[sizeof("CP857")]; + char stringpool_str12[sizeof("CP850")]; + char stringpool_str13[sizeof("ISO-8859-7")]; + char stringpool_str14[sizeof("CP932")]; + char stringpool_str15[sizeof("GB2312")]; + char stringpool_str16[sizeof("BIG5")]; + char stringpool_str17[sizeof("CP437")]; + char stringpool_str19[sizeof("ISO-8859-5")]; + char stringpool_str20[sizeof("ISO-8859-15")]; + char stringpool_str21[sizeof("ISO-8859-3")]; + char stringpool_str22[sizeof("ISO-8859-13")]; + char stringpool_str23[sizeof("CP1046")]; + char stringpool_str24[sizeof("ISO-8859-8")]; + char stringpool_str25[sizeof("CP856")]; + char stringpool_str26[sizeof("CP1125")]; + char stringpool_str27[sizeof("ISO-8859-6")]; + char stringpool_str28[sizeof("CP865")]; + char stringpool_str29[sizeof("CP922")]; + char stringpool_str30[sizeof("CP1252")]; + char stringpool_str31[sizeof("ISO-8859-9")]; + char stringpool_str33[sizeof("CP943")]; + char stringpool_str34[sizeof("ISO-8859-4")]; + char stringpool_str35[sizeof("ISO-8859-1")]; + char stringpool_str38[sizeof("CP1129")]; + char stringpool_str40[sizeof("CP869")]; + char stringpool_str41[sizeof("CP1124")]; + char stringpool_str44[sizeof("CP861")]; + }; +static const struct stringpool_t stringpool_contents = + { + "EUC-TW", + "EUC-KR", + "CP852", + "EUC-JP", + "ISO-8859-2", + "CP857", + "CP850", + "ISO-8859-7", + "CP932", + "GB2312", + "BIG5", + "CP437", + "ISO-8859-5", + "ISO-8859-15", + "ISO-8859-3", + "ISO-8859-13", + "CP1046", + "ISO-8859-8", + "CP856", + "CP1125", + "ISO-8859-6", + "CP865", + "CP922", + "CP1252", + "ISO-8859-9", + "CP943", + "ISO-8859-4", + "ISO-8859-1", + "CP1129", + "CP869", + "CP1124", + "CP861" + }; +#define stringpool ((const char *) &stringpool_contents) + +static const struct mapping mappings[] = + { + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, +#line 43 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str6, "IBM-eucTW"}, +#line 42 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str7, "IBM-eucKR"}, +#line 25 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str8, "IBM-852"}, +#line 41 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str9, "IBM-eucJP"}, +#line 14 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str10, "ISO8859-2"}, +#line 27 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str11, "IBM-857"}, +#line 24 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str12, "IBM-850"}, +#line 19 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str13, "ISO8859-7"}, +#line 33 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str14, "IBM-932"}, +#line 40 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str15, "IBM-eucCN"}, +#line 44 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str16, "big5"}, +#line 23 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str17, "IBM-437"}, + {-1}, +#line 17 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str19, "ISO8859-5"}, +#line 22 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str20, "ISO8859-15"}, +#line 15 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str21, "ISO8859-3"}, +#line 31 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str22, "IBM-921"}, +#line 35 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str23, "IBM-1046"}, +#line 20 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str24, "ISO8859-8"}, +#line 26 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str25, "IBM-856"}, +#line 37 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str26, "IBM-1125"}, +#line 18 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str27, "ISO8859-6"}, +#line 29 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str28, "IBM-865"}, +#line 32 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str29, "IBM-922"}, +#line 39 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str30, "IBM-1252"}, +#line 21 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str31, "ISO8859-9"}, + {-1}, +#line 34 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str33, "IBM-943"}, +#line 16 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str34, "ISO8859-4"}, +#line 13 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str35, "ISO8859-1"}, + {-1}, {-1}, +#line 38 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str38, "IBM-1129"}, + {-1}, +#line 30 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str40, "IBM-869"}, +#line 36 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str41, "IBM-1124"}, + {-1}, {-1}, +#line 28 "./iconv_open-aix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str44, "IBM-861"} + }; + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct mapping * +mapping_lookup (register const char *str, register unsigned int len) +{ + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = mapping_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = mappings[key].standard_name; + if (o >= 0) + { + register const char *s = o + stringpool; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &mappings[key]; + } + } + } + return 0; +} diff --git a/gl/iconv_open-hpux.gperf b/gl/iconv_open-hpux.gperf new file mode 100644 index 0000000000..5a35c83e1e --- /dev/null +++ b/gl/iconv_open-hpux.gperf @@ -0,0 +1,56 @@ +struct mapping { int standard_name; const char vendor_name[9 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On HP-UX 11.11, look in /usr/lib/nls/iconv. +ISO-8859-1, "iso88591" +ISO-8859-2, "iso88592" +ISO-8859-5, "iso88595" +ISO-8859-6, "iso88596" +ISO-8859-7, "iso88597" +ISO-8859-8, "iso88598" +ISO-8859-9, "iso88599" +ISO-8859-15, "iso885915" +CP437, "cp437" +CP775, "cp775" +CP850, "cp850" +CP852, "cp852" +CP855, "cp855" +CP857, "cp857" +CP861, "cp861" +CP862, "cp862" +CP864, "cp864" +CP865, "cp865" +CP866, "cp866" +CP869, "cp869" +CP874, "cp874" +CP1250, "cp1250" +CP1251, "cp1251" +CP1252, "cp1252" +CP1253, "cp1253" +CP1254, "cp1254" +CP1255, "cp1255" +CP1256, "cp1256" +CP1257, "cp1257" +CP1258, "cp1258" +HP-ROMAN8, "roman8" +HP-ARABIC8, "arabic8" +HP-GREEK8, "greek8" +HP-HEBREW8, "hebrew8" +HP-TURKISH8, "turkish8" +HP-KANA8, "kana8" +TIS-620, "tis620" +GB2312, "hp15CN" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +BIG5, "big5" +SHIFT_JIS, "sjis" +UTF-8, "utf8" diff --git a/gl/iconv_open-hpux.h b/gl/iconv_open-hpux.h new file mode 100644 index 0000000000..8f9f0a9ad4 --- /dev/null +++ b/gl/iconv_open-hpux.h @@ -0,0 +1,299 @@ +/* ANSI-C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -m 10 ./iconv_open-hpux.gperf */ +/* Computed positions: -k'4,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "./iconv_open-hpux.gperf" +struct mapping { int standard_name; const char vendor_name[9 + 1]; }; + +#define TOTAL_KEYWORDS 44 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 6 +#define MAX_HASH_VALUE 49 +/* maximum key range = 44, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +mapping_hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 1, 2, + 24, 43, 5, 10, 0, 13, 32, 3, 19, 18, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 5, + 50, 50, 50, 50, 14, 5, 0, 50, 50, 0, + 27, 50, 12, 14, 50, 50, 0, 5, 2, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50 + }; + return len + asso_values[(unsigned char)str[3]+4] + asso_values[(unsigned char)str[len - 1]]; +} + +struct stringpool_t + { + char stringpool_str6[sizeof("CP1256")]; + char stringpool_str7[sizeof("CP1250")]; + char stringpool_str8[sizeof("CP1251")]; + char stringpool_str9[sizeof("CP850")]; + char stringpool_str10[sizeof("TIS-620")]; + char stringpool_str11[sizeof("CP1254")]; + char stringpool_str12[sizeof("ISO-8859-6")]; + char stringpool_str13[sizeof("EUC-TW")]; + char stringpool_str14[sizeof("ISO-8859-1")]; + char stringpool_str15[sizeof("ISO-8859-9")]; + char stringpool_str16[sizeof("CP1255")]; + char stringpool_str17[sizeof("BIG5")]; + char stringpool_str18[sizeof("CP855")]; + char stringpool_str19[sizeof("CP1257")]; + char stringpool_str20[sizeof("EUC-KR")]; + char stringpool_str21[sizeof("CP857")]; + char stringpool_str22[sizeof("ISO-8859-5")]; + char stringpool_str23[sizeof("ISO-8859-15")]; + char stringpool_str24[sizeof("CP866")]; + char stringpool_str25[sizeof("ISO-8859-7")]; + char stringpool_str26[sizeof("CP861")]; + char stringpool_str27[sizeof("CP869")]; + char stringpool_str28[sizeof("CP874")]; + char stringpool_str29[sizeof("CP864")]; + char stringpool_str30[sizeof("CP1252")]; + char stringpool_str31[sizeof("CP437")]; + char stringpool_str32[sizeof("CP852")]; + char stringpool_str33[sizeof("CP775")]; + char stringpool_str34[sizeof("CP865")]; + char stringpool_str35[sizeof("EUC-JP")]; + char stringpool_str36[sizeof("ISO-8859-2")]; + char stringpool_str37[sizeof("SHIFT_JIS")]; + char stringpool_str38[sizeof("CP1258")]; + char stringpool_str39[sizeof("UTF-8")]; + char stringpool_str40[sizeof("HP-KANA8")]; + char stringpool_str41[sizeof("HP-ROMAN8")]; + char stringpool_str42[sizeof("HP-HEBREW8")]; + char stringpool_str43[sizeof("GB2312")]; + char stringpool_str44[sizeof("ISO-8859-8")]; + char stringpool_str45[sizeof("HP-TURKISH8")]; + char stringpool_str46[sizeof("HP-GREEK8")]; + char stringpool_str47[sizeof("HP-ARABIC8")]; + char stringpool_str48[sizeof("CP862")]; + char stringpool_str49[sizeof("CP1253")]; + }; +static const struct stringpool_t stringpool_contents = + { + "CP1256", + "CP1250", + "CP1251", + "CP850", + "TIS-620", + "CP1254", + "ISO-8859-6", + "EUC-TW", + "ISO-8859-1", + "ISO-8859-9", + "CP1255", + "BIG5", + "CP855", + "CP1257", + "EUC-KR", + "CP857", + "ISO-8859-5", + "ISO-8859-15", + "CP866", + "ISO-8859-7", + "CP861", + "CP869", + "CP874", + "CP864", + "CP1252", + "CP437", + "CP852", + "CP775", + "CP865", + "EUC-JP", + "ISO-8859-2", + "SHIFT_JIS", + "CP1258", + "UTF-8", + "HP-KANA8", + "HP-ROMAN8", + "HP-HEBREW8", + "GB2312", + "ISO-8859-8", + "HP-TURKISH8", + "HP-GREEK8", + "HP-ARABIC8", + "CP862", + "CP1253" + }; +#define stringpool ((const char *) &stringpool_contents) + +static const struct mapping mappings[] = + { + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, +#line 40 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str6, "cp1256"}, +#line 34 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str7, "cp1250"}, +#line 35 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str8, "cp1251"}, +#line 23 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str9, "cp850"}, +#line 49 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str10, "tis620"}, +#line 38 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str11, "cp1254"}, +#line 16 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str12, "iso88596"}, +#line 53 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str13, "eucTW"}, +#line 13 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str14, "iso88591"}, +#line 19 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str15, "iso88599"}, +#line 39 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str16, "cp1255"}, +#line 54 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str17, "big5"}, +#line 25 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str18, "cp855"}, +#line 41 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str19, "cp1257"}, +#line 52 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str20, "eucKR"}, +#line 26 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str21, "cp857"}, +#line 15 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str22, "iso88595"}, +#line 20 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str23, "iso885915"}, +#line 31 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str24, "cp866"}, +#line 17 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str25, "iso88597"}, +#line 27 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str26, "cp861"}, +#line 32 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str27, "cp869"}, +#line 33 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str28, "cp874"}, +#line 29 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str29, "cp864"}, +#line 36 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str30, "cp1252"}, +#line 21 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str31, "cp437"}, +#line 24 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str32, "cp852"}, +#line 22 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str33, "cp775"}, +#line 30 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str34, "cp865"}, +#line 51 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str35, "eucJP"}, +#line 14 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str36, "iso88592"}, +#line 55 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str37, "sjis"}, +#line 42 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str38, "cp1258"}, +#line 56 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str39, "utf8"}, +#line 48 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str40, "kana8"}, +#line 43 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str41, "roman8"}, +#line 46 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str42, "hebrew8"}, +#line 50 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str43, "hp15CN"}, +#line 18 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str44, "iso88598"}, +#line 47 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str45, "turkish8"}, +#line 45 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str46, "greek8"}, +#line 44 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str47, "arabic8"}, +#line 28 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str48, "cp862"}, +#line 37 "./iconv_open-hpux.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str49, "cp1253"} + }; + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct mapping * +mapping_lookup (register const char *str, register unsigned int len) +{ + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = mapping_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = mappings[key].standard_name; + if (o >= 0) + { + register const char *s = o + stringpool; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &mappings[key]; + } + } + } + return 0; +} diff --git a/gl/iconv_open-irix.gperf b/gl/iconv_open-irix.gperf new file mode 100644 index 0000000000..3672a80137 --- /dev/null +++ b/gl/iconv_open-irix.gperf @@ -0,0 +1,31 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On IRIX 6.5, look in /usr/lib/iconv and /usr/lib/international/encodings. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +KOI8-R, "KOI8" +CP855, "DOS855" +CP1251, "WIN1251" +GB2312, "eucCN" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +SHIFT_JIS, "sjis" +TIS-620, "TIS620" diff --git a/gl/iconv_open-irix.h b/gl/iconv_open-irix.h new file mode 100644 index 0000000000..520582e52e --- /dev/null +++ b/gl/iconv_open-irix.h @@ -0,0 +1,199 @@ +/* ANSI-C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -m 10 ./iconv_open-irix.gperf */ +/* Computed positions: -k'1,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "./iconv_open-irix.gperf" +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; + +#define TOTAL_KEYWORDS 19 +#define MIN_WORD_LENGTH 5 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 5 +#define MAX_HASH_VALUE 23 +/* maximum key range = 19, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +mapping_hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 8, 2, + 5, 12, 11, 0, 10, 9, 8, 7, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 0, 24, 0, + 24, 5, 24, 0, 24, 7, 24, 24, 24, 24, + 7, 24, 1, 0, 8, 24, 24, 0, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24 + }; + return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]]; +} + +struct stringpool_t + { + char stringpool_str5[sizeof("CP855")]; + char stringpool_str6[sizeof("EUC-TW")]; + char stringpool_str7[sizeof("EUC-KR")]; + char stringpool_str8[sizeof("CP1251")]; + char stringpool_str9[sizeof("SHIFT_JIS")]; + char stringpool_str10[sizeof("ISO-8859-5")]; + char stringpool_str11[sizeof("ISO-8859-15")]; + char stringpool_str12[sizeof("ISO-8859-1")]; + char stringpool_str13[sizeof("EUC-JP")]; + char stringpool_str14[sizeof("KOI8-R")]; + char stringpool_str15[sizeof("ISO-8859-2")]; + char stringpool_str16[sizeof("GB2312")]; + char stringpool_str17[sizeof("ISO-8859-9")]; + char stringpool_str18[sizeof("ISO-8859-8")]; + char stringpool_str19[sizeof("ISO-8859-7")]; + char stringpool_str20[sizeof("ISO-8859-6")]; + char stringpool_str21[sizeof("ISO-8859-4")]; + char stringpool_str22[sizeof("ISO-8859-3")]; + char stringpool_str23[sizeof("TIS-620")]; + }; +static const struct stringpool_t stringpool_contents = + { + "CP855", + "EUC-TW", + "EUC-KR", + "CP1251", + "SHIFT_JIS", + "ISO-8859-5", + "ISO-8859-15", + "ISO-8859-1", + "EUC-JP", + "KOI8-R", + "ISO-8859-2", + "GB2312", + "ISO-8859-9", + "ISO-8859-8", + "ISO-8859-7", + "ISO-8859-6", + "ISO-8859-4", + "ISO-8859-3", + "TIS-620" + }; +#define stringpool ((const char *) &stringpool_contents) + +static const struct mapping mappings[] = + { + {-1}, {-1}, {-1}, {-1}, {-1}, +#line 24 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str5, "DOS855"}, +#line 29 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str6, "eucTW"}, +#line 28 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str7, "eucKR"}, +#line 25 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str8, "WIN1251"}, +#line 30 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str9, "sjis"}, +#line 17 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str10, "ISO8859-5"}, +#line 22 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str11, "ISO8859-15"}, +#line 13 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str12, "ISO8859-1"}, +#line 27 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str13, "eucJP"}, +#line 23 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str14, "KOI8"}, +#line 14 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str15, "ISO8859-2"}, +#line 26 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str16, "eucCN"}, +#line 21 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str17, "ISO8859-9"}, +#line 20 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str18, "ISO8859-8"}, +#line 19 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str19, "ISO8859-7"}, +#line 18 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str20, "ISO8859-6"}, +#line 16 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str21, "ISO8859-4"}, +#line 15 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str22, "ISO8859-3"}, +#line 31 "./iconv_open-irix.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str23, "TIS620"} + }; + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct mapping * +mapping_lookup (register const char *str, register unsigned int len) +{ + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = mapping_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = mappings[key].standard_name; + if (o >= 0) + { + register const char *s = o + stringpool; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &mappings[key]; + } + } + } + return 0; +} diff --git a/gl/iconv_open-osf.gperf b/gl/iconv_open-osf.gperf new file mode 100644 index 0000000000..f468ff6093 --- /dev/null +++ b/gl/iconv_open-osf.gperf @@ -0,0 +1,50 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On OSF/1 5.1, look in /usr/lib/nls/loc/iconv. +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +CP437, "cp437" +CP775, "cp775" +CP850, "cp850" +CP852, "cp852" +CP855, "cp855" +CP857, "cp857" +CP861, "cp861" +CP862, "cp862" +CP865, "cp865" +CP866, "cp866" +CP869, "cp869" +CP874, "cp874" +CP949, "KSC5601" +CP1250, "cp1250" +CP1251, "cp1251" +CP1252, "cp1252" +CP1253, "cp1253" +CP1254, "cp1254" +CP1255, "cp1255" +CP1256, "cp1256" +CP1257, "cp1257" +CP1258, "cp1258" +EUC-JP, "eucJP" +EUC-KR, "eucKR" +EUC-TW, "eucTW" +BIG5, "big5" +SHIFT_JIS, "SJIS" +TIS-620, "TACTIS" diff --git a/gl/iconv_open-osf.h b/gl/iconv_open-osf.h new file mode 100644 index 0000000000..85e4c0f8fd --- /dev/null +++ b/gl/iconv_open-osf.h @@ -0,0 +1,278 @@ +/* ANSI-C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -m 10 ./iconv_open-osf.gperf */ +/* Computed positions: -k'4,$' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "./iconv_open-osf.gperf" +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; + +#define TOTAL_KEYWORDS 38 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 6 +#define MAX_HASH_VALUE 47 +/* maximum key range = 42, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +mapping_hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 2, 29, + 24, 34, 31, 0, 15, 14, 10, 13, 2, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 7, 48, 48, 48, 48, 48, 48, + 11, 48, 2, 7, 48, 48, 48, 1, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 + }; + return len + asso_values[(unsigned char)str[3]+3] + asso_values[(unsigned char)str[len - 1]]; +} + +struct stringpool_t + { + char stringpool_str6[sizeof("CP1255")]; + char stringpool_str7[sizeof("CP775")]; + char stringpool_str8[sizeof("CP1250")]; + char stringpool_str9[sizeof("EUC-TW")]; + char stringpool_str10[sizeof("EUC-KR")]; + char stringpool_str11[sizeof("TIS-620")]; + char stringpool_str12[sizeof("ISO-8859-5")]; + char stringpool_str13[sizeof("ISO-8859-15")]; + char stringpool_str14[sizeof("BIG5")]; + char stringpool_str15[sizeof("CP855")]; + char stringpool_str16[sizeof("CP1258")]; + char stringpool_str17[sizeof("CP850")]; + char stringpool_str18[sizeof("CP865")]; + char stringpool_str19[sizeof("EUC-JP")]; + char stringpool_str20[sizeof("CP1257")]; + char stringpool_str21[sizeof("CP1256")]; + char stringpool_str22[sizeof("ISO-8859-8")]; + char stringpool_str23[sizeof("SHIFT_JIS")]; + char stringpool_str25[sizeof("ISO-8859-9")]; + char stringpool_str26[sizeof("ISO-8859-7")]; + char stringpool_str27[sizeof("ISO-8859-6")]; + char stringpool_str29[sizeof("CP857")]; + char stringpool_str30[sizeof("CP1252")]; + char stringpool_str31[sizeof("CP869")]; + char stringpool_str32[sizeof("CP949")]; + char stringpool_str33[sizeof("CP866")]; + char stringpool_str34[sizeof("CP437")]; + char stringpool_str35[sizeof("CP1251")]; + char stringpool_str36[sizeof("ISO-8859-2")]; + char stringpool_str37[sizeof("CP1254")]; + char stringpool_str38[sizeof("CP874")]; + char stringpool_str39[sizeof("CP852")]; + char stringpool_str40[sizeof("CP1253")]; + char stringpool_str41[sizeof("ISO-8859-1")]; + char stringpool_str42[sizeof("CP862")]; + char stringpool_str43[sizeof("ISO-8859-4")]; + char stringpool_str46[sizeof("ISO-8859-3")]; + char stringpool_str47[sizeof("CP861")]; + }; +static const struct stringpool_t stringpool_contents = + { + "CP1255", + "CP775", + "CP1250", + "EUC-TW", + "EUC-KR", + "TIS-620", + "ISO-8859-5", + "ISO-8859-15", + "BIG5", + "CP855", + "CP1258", + "CP850", + "CP865", + "EUC-JP", + "CP1257", + "CP1256", + "ISO-8859-8", + "SHIFT_JIS", + "ISO-8859-9", + "ISO-8859-7", + "ISO-8859-6", + "CP857", + "CP1252", + "CP869", + "CP949", + "CP866", + "CP437", + "CP1251", + "ISO-8859-2", + "CP1254", + "CP874", + "CP852", + "CP1253", + "ISO-8859-1", + "CP862", + "ISO-8859-4", + "ISO-8859-3", + "CP861" + }; +#define stringpool ((const char *) &stringpool_contents) + +static const struct mapping mappings[] = + { + {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, +#line 41 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str6, "cp1255"}, +#line 24 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str7, "cp775"}, +#line 36 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str8, "cp1250"}, +#line 47 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str9, "eucTW"}, +#line 46 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str10, "eucKR"}, +#line 50 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str11, "TACTIS"}, +#line 17 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str12, "ISO8859-5"}, +#line 22 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str13, "ISO8859-15"}, +#line 48 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str14, "big5"}, +#line 27 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str15, "cp855"}, +#line 44 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str16, "cp1258"}, +#line 25 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str17, "cp850"}, +#line 31 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str18, "cp865"}, +#line 45 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str19, "eucJP"}, +#line 43 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str20, "cp1257"}, +#line 42 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str21, "cp1256"}, +#line 20 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str22, "ISO8859-8"}, +#line 49 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str23, "SJIS"}, + {-1}, +#line 21 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str25, "ISO8859-9"}, +#line 19 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str26, "ISO8859-7"}, +#line 18 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str27, "ISO8859-6"}, + {-1}, +#line 28 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str29, "cp857"}, +#line 38 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str30, "cp1252"}, +#line 33 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str31, "cp869"}, +#line 35 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str32, "KSC5601"}, +#line 32 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str33, "cp866"}, +#line 23 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str34, "cp437"}, +#line 37 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str35, "cp1251"}, +#line 14 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str36, "ISO8859-2"}, +#line 40 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str37, "cp1254"}, +#line 34 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str38, "cp874"}, +#line 26 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str39, "cp852"}, +#line 39 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str40, "cp1253"}, +#line 13 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str41, "ISO8859-1"}, +#line 30 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str42, "cp862"}, +#line 16 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str43, "ISO8859-4"}, + {-1}, {-1}, +#line 15 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str46, "ISO8859-3"}, +#line 29 "./iconv_open-osf.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str47, "cp861"} + }; + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct mapping * +mapping_lookup (register const char *str, register unsigned int len) +{ + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = mapping_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = mappings[key].standard_name; + if (o >= 0) + { + register const char *s = o + stringpool; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &mappings[key]; + } + } + } + return 0; +} diff --git a/gl/iconv_open-solaris.gperf b/gl/iconv_open-solaris.gperf new file mode 100644 index 0000000000..7d7da38e6e --- /dev/null +++ b/gl/iconv_open-solaris.gperf @@ -0,0 +1,30 @@ +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; +%struct-type +%language=ANSI-C +%define slot-name standard_name +%define hash-function-name mapping_hash +%define lookup-function-name mapping_lookup +%readonly-tables +%global-table +%define word-array-name mappings +%pic +%% +# On Solaris 10, look in the "iconv -l" output. Some aliases are advertised but +# not actually supported by the iconv() function and by the 'iconv' program. +# For example: +# $ echo abc | iconv -f 646 -t ISO-8859-1 +# Not supported 646 to ISO-8859-1 +# $ echo abc | iconv -f 646 -t ISO8859-1 +$ abc +ASCII, "646" +ISO-8859-1, "ISO8859-1" +ISO-8859-2, "ISO8859-2" +ISO-8859-3, "ISO8859-3" +ISO-8859-4, "ISO8859-4" +ISO-8859-5, "ISO8859-5" +ISO-8859-6, "ISO8859-6" +ISO-8859-7, "ISO8859-7" +ISO-8859-8, "ISO8859-8" +ISO-8859-9, "ISO8859-9" +ISO-8859-15, "ISO8859-15" +CP1251, "ansi-1251" diff --git a/gl/iconv_open-solaris.h b/gl/iconv_open-solaris.h new file mode 100644 index 0000000000..2963145be8 --- /dev/null +++ b/gl/iconv_open-solaris.h @@ -0,0 +1,190 @@ +/* ANSI-C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -m 10 ./iconv_open-solaris.gperf */ +/* Computed positions: -k'10' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "./iconv_open-solaris.gperf" +struct mapping { int standard_name; const char vendor_name[10 + 1]; }; + +#define TOTAL_KEYWORDS 13 +#define MIN_WORD_LENGTH 5 +#define MAX_WORD_LENGTH 11 +#define MIN_HASH_VALUE 5 +#define MAX_HASH_VALUE 19 +/* maximum key range = 15, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +mapping_hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, + 9, 8, 7, 6, 5, 4, 3, 2, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[9]]; + /*FALLTHROUGH*/ + case 9: + case 8: + case 7: + case 6: + case 5: + break; + } + return hval; +} + +struct stringpool_t + { + char stringpool_str5[sizeof("ASCII")]; + char stringpool_str6[sizeof("CP1251")]; + char stringpool_str7[sizeof("$ abc")]; + char stringpool_str10[sizeof("ISO-8859-1")]; + char stringpool_str11[sizeof("ISO-8859-15")]; + char stringpool_str12[sizeof("ISO-8859-9")]; + char stringpool_str13[sizeof("ISO-8859-8")]; + char stringpool_str14[sizeof("ISO-8859-7")]; + char stringpool_str15[sizeof("ISO-8859-6")]; + char stringpool_str16[sizeof("ISO-8859-5")]; + char stringpool_str17[sizeof("ISO-8859-4")]; + char stringpool_str18[sizeof("ISO-8859-3")]; + char stringpool_str19[sizeof("ISO-8859-2")]; + }; +static const struct stringpool_t stringpool_contents = + { + "ASCII", + "CP1251", + "$ abc", + "ISO-8859-1", + "ISO-8859-15", + "ISO-8859-9", + "ISO-8859-8", + "ISO-8859-7", + "ISO-8859-6", + "ISO-8859-5", + "ISO-8859-4", + "ISO-8859-3", + "ISO-8859-2" + }; +#define stringpool ((const char *) &stringpool_contents) + +static const struct mapping mappings[] = + { + {-1}, {-1}, {-1}, {-1}, {-1}, +#line 19 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str5, "646"}, +#line 30 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str6, "ansi-1251"}, +#line 18 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str7}, + {-1}, {-1}, +#line 20 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str10, "ISO8859-1"}, +#line 29 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str11, "ISO8859-15"}, +#line 28 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str12, "ISO8859-9"}, +#line 27 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str13, "ISO8859-8"}, +#line 26 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str14, "ISO8859-7"}, +#line 25 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str15, "ISO8859-6"}, +#line 24 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str16, "ISO8859-5"}, +#line 23 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str17, "ISO8859-4"}, +#line 22 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str18, "ISO8859-3"}, +#line 21 "./iconv_open-solaris.gperf" + {(int)(long)&((struct stringpool_t *)0)->stringpool_str19, "ISO8859-2"} + }; + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct mapping * +mapping_lookup (register const char *str, register unsigned int len) +{ + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = mapping_hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register int o = mappings[key].standard_name; + if (o >= 0) + { + register const char *s = o + stringpool; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &mappings[key]; + } + } + } + return 0; +} diff --git a/gl/iconv_open.c b/gl/iconv_open.c new file mode 100644 index 0000000000..76536176b0 --- /dev/null +++ b/gl/iconv_open.c @@ -0,0 +1,172 @@ +/* Character set conversion. + Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <iconv.h> + +#include <errno.h> +#include <string.h> +#include "c-ctype.h" +#include "c-strcase.h" + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* Namespace cleanliness. */ +#define mapping_lookup rpl_iconv_open_mapping_lookup + +/* The macro ICONV_FLAVOR is defined to one of these or undefined. */ + +#define ICONV_FLAVOR_AIX "iconv_open-aix.h" +#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h" +#define ICONV_FLAVOR_IRIX "iconv_open-irix.h" +#define ICONV_FLAVOR_OSF "iconv_open-osf.h" +#define ICONV_FLAVOR_SOLARIS "iconv_open-solaris.h" + +#ifdef ICONV_FLAVOR +# include ICONV_FLAVOR +#endif + +iconv_t +rpl_iconv_open (const char *tocode, const char *fromcode) +#undef iconv_open +{ + char fromcode_upper[32]; + char tocode_upper[32]; + char *fromcode_upper_end; + char *tocode_upper_end; + +#if REPLACE_ICONV_UTF + /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}. + Do this here, before calling the real iconv_open(), because OSF/1 5.1 + iconv() to these encoding inserts a BOM, which is wrong. + We do not need to handle conversion between arbitrary encodings and + UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step + conversion through UTF-8. + The _ICONV_* constants are chosen to be disjoint from any iconv_t + returned by the system's iconv_open() functions. Recall that iconv_t + is a scalar type. */ + if (c_toupper (fromcode[0]) == 'U' + && c_toupper (fromcode[1]) == 'T' + && c_toupper (fromcode[2]) == 'F' + && fromcode[3] == '-') + { + if (c_toupper (tocode[0]) == 'U' + && c_toupper (tocode[1]) == 'T' + && c_toupper (tocode[2]) == 'F' + && tocode[3] == '-') + { + if (strcmp (fromcode + 4, "8") == 0) + { + if (c_strcasecmp (tocode + 4, "16BE") == 0) + return _ICONV_UTF8_UTF16BE; + if (c_strcasecmp (tocode + 4, "16LE") == 0) + return _ICONV_UTF8_UTF16LE; + if (c_strcasecmp (tocode + 4, "32BE") == 0) + return _ICONV_UTF8_UTF32BE; + if (c_strcasecmp (tocode + 4, "32LE") == 0) + return _ICONV_UTF8_UTF32LE; + } + else if (strcmp (tocode + 4, "8") == 0) + { + if (c_strcasecmp (fromcode + 4, "16BE") == 0) + return _ICONV_UTF16BE_UTF8; + if (c_strcasecmp (fromcode + 4, "16LE") == 0) + return _ICONV_UTF16LE_UTF8; + if (c_strcasecmp (fromcode + 4, "32BE") == 0) + return _ICONV_UTF32BE_UTF8; + if (c_strcasecmp (fromcode + 4, "32LE") == 0) + return _ICONV_UTF32LE_UTF8; + } + } + } +#endif + + /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1 + here. This would lead to programs that work in some locales (such as the + "C" or "en_US" locales) but do not work in East Asian locales. It is + better if programmers make their programs depend on GNU libiconv (except + on glibc systems), e.g. by using the AM_ICONV macro and documenting the + dependency in an INSTALL or DEPENDENCIES file. */ + + /* Try with the original names first. + This covers the case when fromcode or tocode is a lowercase encoding name + that is understood by the system's iconv_open but not listed in our + mappings table. */ + { + iconv_t cd = iconv_open (tocode, fromcode); + if (cd != (iconv_t)(-1)) + return cd; + } + + /* Convert the encodings to upper case, because + 1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case + matters, + 2. it makes searching in the table faster. */ + { + const char *p = fromcode; + char *q = fromcode_upper; + while ((*q = c_toupper (*p)) != '\0') + { + p++; + q++; + if (q == &fromcode_upper[SIZEOF (fromcode_upper)]) + { + errno = EINVAL; + return (iconv_t)(-1); + } + } + fromcode_upper_end = q; + } + + { + const char *p = tocode; + char *q = tocode_upper; + while ((*q = c_toupper (*p)) != '\0') + { + p++; + q++; + if (q == &tocode_upper[SIZEOF (tocode_upper)]) + { + errno = EINVAL; + return (iconv_t)(-1); + } + } + tocode_upper_end = q; + } + +#ifdef ICONV_FLAVOR + /* Apply the mappings. */ + { + const struct mapping *m = + mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper); + + fromcode = (m != NULL ? m->vendor_name : fromcode_upper); + } + { + const struct mapping *m = + mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper); + + tocode = (m != NULL ? m->vendor_name : tocode_upper); + } +#else + fromcode = fromcode_upper; + tocode = tocode_upper; +#endif + + return iconv_open (tocode, fromcode); +} diff --git a/gl/m4/gnulib-cache.m4 b/gl/m4/gnulib-cache.m4 index 0a3d49ba9f..2bb7a9ac42 100644 --- a/gl/m4/gnulib-cache.m4 +++ b/gl/m4/gnulib-cache.m4 @@ -27,7 +27,7 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=gl/override --lib=libgnu --source-base=gl --m4-base=gl/m4 --doc-base=doc --tests-base=gl/tests --aux-dir=build-aux --with-tests --avoid=alignof-tests --avoid=lock-tests --avoid=lseek-tests --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alloca alphasort argp base64 bind byteswap c-ctype close connect error extensions func gendocs getaddrinfo getpass getsubopt gettext gettime hash-pjw-bare havelib iconv inet_ntop inet_pton lib-msvc-compat lib-symbol-versions listen maintainer-makefile manywarnings memmem-simple minmax netdb netinet_in pmccabe2html progname read-file recv recvfrom scandir select send sendto servent setsockopt shutdown snprintf socket sockets socklen stdint strcase strndup strtok_r strverscmp sys_socket sys_stat time_r timer-time timespec u64 unistd valgrind-tests vasprintf version-etc version-etc-fsf vfprintf-posix vprintf-posix vsnprintf warnings +# gnulib-tool --import --dir=. --local-dir=gl/override --lib=libgnu --source-base=gl --m4-base=gl/m4 --doc-base=doc --tests-base=gl/tests --aux-dir=build-aux --with-tests --avoid=alignof-tests --avoid=lock-tests --avoid=lseek-tests --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alloca alphasort argp base64 bind byteswap c-ctype close connect error extensions func gendocs getaddrinfo getpass getsubopt gettext gettime hash-pjw-bare havelib iconv iconv_open-utf inet_ntop inet_pton lib-msvc-compat lib-symbol-versions listen maintainer-makefile manywarnings memmem-simple minmax netdb netinet_in pmccabe2html progname read-file recv recvfrom scandir select send sendto servent setsockopt shutdown snprintf socket sockets socklen stdint strcase strndup strtok_r strverscmp sys_socket sys_stat time_r timer-time timespec u64 unistd valgrind-tests vasprintf version-etc version-etc-fsf vfprintf-posix vprintf-posix vsnprintf warnings # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([gl/override]) @@ -54,6 +54,7 @@ gl_MODULES([ hash-pjw-bare havelib iconv + iconv_open-utf inet_ntop inet_pton lib-msvc-compat diff --git a/gl/m4/gnulib-comp.m4 b/gl/m4/gnulib-comp.m4 index afef0fb8b7..d3703caa09 100644 --- a/gl/m4/gnulib-comp.m4 +++ b/gl/m4/gnulib-comp.m4 @@ -59,6 +59,8 @@ AC_DEFUN([gl_EARLY], # Code from module byteswap-tests: # Code from module c-ctype: # Code from module c-ctype-tests: + # Code from module c-strcase: + # Code from module c-strcase-tests: # Code from module clock-time: # Code from module close: # Code from module close-tests: @@ -140,11 +142,17 @@ AC_DEFUN([gl_EARLY], # Code from module gettimeofday: # Code from module gettimeofday-tests: # Code from module gnumakefile: + # Code from module gperf: # Code from module hash-pjw-bare: # Code from module havelib: # Code from module hostent: # Code from module iconv: + # Code from module iconv-h: + # Code from module iconv-h-tests: # Code from module iconv-tests: + # Code from module iconv_open: + # Code from module iconv_open-utf: + # Code from module iconv_open-utf-tests: # Code from module ignore-value: # Code from module ignore-value-tests: # Code from module include_next: @@ -152,6 +160,7 @@ AC_DEFUN([gl_EARLY], # Code from module inet_ntop-tests: # Code from module inet_pton: # Code from module inet_pton-tests: + # Code from module inline: # Code from module intprops: # Code from module intprops-tests: # Code from module inttypes: @@ -171,6 +180,10 @@ AC_DEFUN([gl_EARLY], # Code from module lib-symbol-versions: # Code from module listen: # Code from module listen-tests: + # Code from module locale: + # Code from module locale-tests: + # Code from module localename: + # Code from module localename-tests: # Code from module lock: # Code from module lseek: # Code from module lstat: @@ -235,6 +248,8 @@ AC_DEFUN([gl_EARLY], # Code from module servent: # Code from module setenv: # Code from module setenv-tests: + # Code from module setlocale: + # Code from module setlocale-tests: # Code from module setsockopt: # Code from module setsockopt-tests: # Code from module shutdown: @@ -249,6 +264,7 @@ AC_DEFUN([gl_EARLY], # Code from module snippet/_Noreturn: # Code from module snippet/arg-nonnull: # Code from module snippet/c++defs: + # Code from module snippet/unused-parameter: # Code from module snippet/warn-on-use: # Code from module snprintf: # Code from module snprintf-tests: @@ -328,6 +344,12 @@ AC_DEFUN([gl_EARLY], # Code from module u64-tests: # Code from module unistd: # Code from module unistd-tests: + # Code from module unistr/base: + # Code from module unistr/u8-mbtoucr: + # Code from module unistr/u8-mbtoucr-tests: + # Code from module unistr/u8-uctomb: + # Code from module unistr/u8-uctomb-tests: + # Code from module unitypes: # Code from module unsetenv: # Code from module unsetenv-tests: # Code from module useless-if-before-free: @@ -563,6 +585,16 @@ AC_SUBST([LTALLOCA]) AM_ICONV m4_ifdef([gl_ICONV_MODULE_INDICATOR], [gl_ICONV_MODULE_INDICATOR([iconv])]) + gl_ICONV_H + gl_FUNC_ICONV_OPEN + if test $REPLACE_ICONV_OPEN = 1; then + AC_LIBOBJ([iconv_open]) + fi + if test $REPLACE_ICONV = 1; then + AC_LIBOBJ([iconv]) + AC_LIBOBJ([iconv_close]) + fi + gl_FUNC_ICONV_OPEN_UTF gl_FUNC_INET_NTOP if test $HAVE_INET_NTOP = 0 || test $REPLACE_INET_NTOP = 1; then AC_LIBOBJ([inet_ntop]) @@ -575,6 +607,7 @@ AC_SUBST([LTALLOCA]) gl_PREREQ_INET_PTON fi gl_ARPA_INET_MODULE_INDICATOR([inet_pton]) + gl_INLINE gl_FUNC_ISNAND_NO_LIBM if test $gl_func_isnand_no_libm != yes; then AC_LIBOBJ([isnand]) @@ -840,6 +873,12 @@ AC_SUBST([LTALLOCA]) gl_TIMER_TIME gl_TIMESPEC gl_UNISTD_H + gl_LIBUNISTRING_LIBHEADER([0.9.2], [unistr.h]) + gl_MODULE_INDICATOR([unistr/u8-mbtoucr]) + gl_LIBUNISTRING_MODULE([0.9], [unistr/u8-mbtoucr]) + gl_MODULE_INDICATOR([unistr/u8-uctomb]) + gl_LIBUNISTRING_MODULE([0.9], [unistr/u8-uctomb]) + gl_LIBUNISTRING_LIBHEADER([0.9], [unitypes.h]) gl_VALGRIND_TESTS gl_FUNC_VASNPRINTF gl_FUNC_VASPRINTF @@ -902,6 +941,8 @@ changequote([, ])dnl AC_SUBST([gltests_WITNESS]) gl_module_indicator_condition=$gltests_WITNESS m4_pushdef([gl_MODULE_INDICATOR_CONDITION], [$gl_module_indicator_condition]) + gt_LOCALE_FR + gt_LOCALE_TR_UTF8 gl_ENVIRON gl_UNISTD_MODULE_INDICATOR([environ]) gl_FCNTL_H @@ -944,6 +985,10 @@ changequote([, ])dnl gl_FLOAT_EXPONENT_LOCATION gl_LONG_DOUBLE_EXPONENT_LOCATION AC_REQUIRE([gl_LONG_DOUBLE_VS_DOUBLE]) + gl_LOCALE_H + AC_CHECK_FUNCS_ONCE([newlocale]) + gl_LOCALENAME + AC_CHECK_FUNCS_ONCE([newlocale]) gl_LOCK gl_FUNC_LSTAT if test $REPLACE_LSTAT = 1; then @@ -988,6 +1033,16 @@ changequote([, ])dnl AC_LIBOBJ([setenv]) fi gl_STDLIB_MODULE_INDICATOR([setenv]) + gl_FUNC_SETLOCALE + if test $REPLACE_SETLOCALE = 1; then + AC_LIBOBJ([setlocale]) + gl_PREREQ_SETLOCALE + fi + gl_LOCALE_MODULE_INDICATOR([setlocale]) + gt_LOCALE_FR + gt_LOCALE_FR_UTF8 + gt_LOCALE_JA + gt_LOCALE_ZH_CN AC_REQUIRE([gl_FLOAT_EXPONENT_LOCATION]) AC_REQUIRE([gl_DOUBLE_EXPONENT_LOCATION]) AC_REQUIRE([gl_LONG_DOUBLE_EXPONENT_LOCATION]) @@ -1126,6 +1181,7 @@ AC_DEFUN([gl_FILE_LIST], [ build-aux/snippet/_Noreturn.h build-aux/snippet/arg-nonnull.h build-aux/snippet/c++defs.h + build-aux/snippet/unused-parameter.h build-aux/snippet/warn-on-use.h build-aux/useless-if-before-free build-aux/vc-list-files @@ -1157,6 +1213,9 @@ AC_DEFUN([gl_FILE_LIST], [ lib/byteswap.in.h lib/c-ctype.c lib/c-ctype.h + lib/c-strcase.h + lib/c-strcasecmp.c + lib/c-strncasecmp.c lib/close.c lib/closedir.c lib/connect.c @@ -1203,6 +1262,15 @@ AC_DEFUN([gl_FILE_LIST], [ lib/glthread/threadlib.c lib/hash-pjw-bare.c lib/hash-pjw-bare.h + lib/iconv.c + lib/iconv.in.h + lib/iconv_close.c + lib/iconv_open-aix.gperf + lib/iconv_open-hpux.gperf + lib/iconv_open-irix.gperf + lib/iconv_open-osf.gperf + lib/iconv_open-solaris.gperf + lib/iconv_open.c lib/inet_ntop.c lib/inet_pton.c lib/intprops.h @@ -1302,6 +1370,11 @@ AC_DEFUN([gl_FILE_LIST], [ lib/u64.c lib/u64.h lib/unistd.in.h + lib/unistr.in.h + lib/unistr/u8-mbtoucr.c + lib/unistr/u8-uctomb-aux.c + lib/unistr/u8-uctomb.c + lib/unitypes.in.h lib/vasnprintf.c lib/vasnprintf.h lib/vasprintf.c @@ -1371,9 +1444,13 @@ AC_DEFUN([gl_FILE_LIST], [ m4/gnulib-common.m4 m4/hostent.m4 m4/iconv.m4 + m4/iconv_h.m4 + m4/iconv_open-utf.m4 + m4/iconv_open.m4 m4/include_next.m4 m4/inet_ntop.m4 m4/inet_pton.m4 + m4/inline.m4 m4/intdiv0.m4 m4/intl.m4 m4/intldir.m4 @@ -1395,6 +1472,13 @@ AC_DEFUN([gl_FILE_LIST], [ m4/lib-ld.m4 m4/lib-link.m4 m4/lib-prefix.m4 + m4/libunistring-base.m4 + m4/locale-fr.m4 + m4/locale-ja.m4 + m4/locale-tr.m4 + m4/locale-zh.m4 + m4/locale_h.m4 + m4/localename.m4 m4/lock.m4 m4/longlong.m4 m4/lseek.m4 @@ -1437,6 +1521,7 @@ AC_DEFUN([gl_FILE_LIST], [ m4/select.m4 m4/servent.m4 m4/setenv.m4 + m4/setlocale.m4 m4/signal_h.m4 m4/signbit.m4 m4/size_max.m4 @@ -1517,6 +1602,9 @@ AC_DEFUN([gl_FILE_LIST], [ tests/test-bind.c tests/test-byteswap.c tests/test-c-ctype.c + tests/test-c-strcase.sh + tests/test-c-strcasecmp.c + tests/test-c-strncasecmp.c tests/test-close.c tests/test-connect.c tests/test-dirent.c @@ -1568,6 +1656,8 @@ AC_DEFUN([gl_FILE_LIST], [ tests/test-getopt_long.h tests/test-getpeername.c tests/test-gettimeofday.c + tests/test-iconv-h.c + tests/test-iconv-utf.c tests/test-iconv.c tests/test-ignore-value.c tests/test-inet_ntop.c @@ -1583,6 +1673,8 @@ AC_DEFUN([gl_FILE_LIST], [ tests/test-isnanl-nolibm.c tests/test-isnanl.h tests/test-listen.c + tests/test-locale.c + tests/test-localename.c tests/test-lstat.c tests/test-lstat.h tests/test-malloc-gnu.c @@ -1615,6 +1707,10 @@ AC_DEFUN([gl_FILE_LIST], [ tests/test-send.c tests/test-sendto.c tests/test-setenv.c + tests/test-setlocale1.c + tests/test-setlocale1.sh + tests/test-setlocale2.c + tests/test-setlocale2.sh tests/test-setsockopt.c tests/test-shutdown.c tests/test-signal-h.c @@ -1666,6 +1762,8 @@ AC_DEFUN([gl_FILE_LIST], [ tests/test-vprintf-posix.sh tests/test-vsnprintf.c tests/test-wchar.c + tests/unistr/test-u8-mbtoucr.c + tests/unistr/test-u8-uctomb.c tests/zerosize-ptr.h tests=lib/binary-io.c tests=lib/binary-io.h @@ -1679,6 +1777,9 @@ AC_DEFUN([gl_FILE_LIST], [ tests=lib/ignore-value.h tests=lib/inttypes.in.h tests=lib/ioctl.c + tests=lib/locale.in.h + tests=lib/localename.c + tests=lib/localename.h tests=lib/lstat.c tests=lib/malloca.c tests=lib/malloca.h @@ -1690,6 +1791,7 @@ AC_DEFUN([gl_FILE_LIST], [ tests=lib/putenv.c tests=lib/same-inode.h tests=lib/setenv.c + tests=lib/setlocale.c tests=lib/stat.c tests=lib/strerror_r.c tests=lib/symlink.c diff --git a/gl/m4/iconv_h.m4 b/gl/m4/iconv_h.m4 new file mode 100644 index 0000000000..aa86cf8fee --- /dev/null +++ b/gl/m4/iconv_h.m4 @@ -0,0 +1,41 @@ +# iconv_h.m4 serial 8 +dnl Copyright (C) 2007-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_ICONV_H], +[ + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + + dnl Execute this unconditionally, because ICONV_H may be set by other + dnl modules, after this code is executed. + gl_CHECK_NEXT_HEADERS([iconv.h]) +]) + +dnl Unconditionally enables the replacement of <iconv.h>. +AC_DEFUN([gl_REPLACE_ICONV_H], +[ + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + ICONV_H='iconv.h' + AM_CONDITIONAL([GL_GENERATE_ICONV_H], [test -n "$ICONV_H"]) +]) + +AC_DEFUN([gl_ICONV_MODULE_INDICATOR], +[ + dnl Use AC_REQUIRE here, so that the default settings are expanded once only. + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + gl_MODULE_INDICATOR_SET_VARIABLE([$1]) +]) + +AC_DEFUN([gl_ICONV_H_DEFAULTS], +[ + GNULIB_ICONV=0; AC_SUBST([GNULIB_ICONV]) + dnl Assume proper GNU behavior unless another module says otherwise. + ICONV_CONST=; AC_SUBST([ICONV_CONST]) + REPLACE_ICONV=0; AC_SUBST([REPLACE_ICONV]) + REPLACE_ICONV_OPEN=0; AC_SUBST([REPLACE_ICONV_OPEN]) + REPLACE_ICONV_UTF=0; AC_SUBST([REPLACE_ICONV_UTF]) + ICONV_H=''; AC_SUBST([ICONV_H]) + AM_CONDITIONAL([GL_GENERATE_ICONV_H], [test -n "$ICONV_H"]) +]) diff --git a/gl/m4/iconv_open-utf.m4 b/gl/m4/iconv_open-utf.m4 new file mode 100644 index 0000000000..f450f55436 --- /dev/null +++ b/gl/m4/iconv_open-utf.m4 @@ -0,0 +1,231 @@ +# iconv_open-utf.m4 serial 1 +dnl Copyright (C) 2007-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +# A placeholder to ensure that this m4 file gets included by aclocal. +AC_DEFUN([gl_FUNC_ICONV_OPEN_UTF], []) + +AC_DEFUN([gl_FUNC_ICONV_OPEN_UTF_SUPPORT], +[ + dnl This macro relies on am_cv_func_iconv and gl_func_iconv_gnu from + dnl gl_FUNC_ICONV_OPEN, but is called from within gl_FUNC_ICONV_OPEN. + dnl *Not* AC_REQUIRE([gl_FUNC_ICONV_OPEN]). + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + if test "$am_cv_func_iconv" = yes; then + AC_CACHE_CHECK([whether iconv supports conversion between UTF-8 and UTF-{16,32}{BE,LE}], + [gl_cv_func_iconv_supports_utf], + [ + save_LIBS="$LIBS" + LIBS="$LIBS $LIBICONV" + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include <iconv.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +int main () +{ + int result = 0; + /* Test conversion from UTF-8 to UTF-16BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + iconv_t cd; + cd = iconv_open ("UTF-16BE", "UTF-8"); + if (cd == (iconv_t)(-1)) + result |= 1; + else + { + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (!(res == 0 && inbytesleft == 0)) + result |= 1; + else if (!(outptr == buf + (sizeof (expected) - 1))) + result |= 1; + else if (!(memcmp (buf, expected, sizeof (expected) - 1) == 0)) + result |= 1; + else if (!(iconv_close (cd) == 0)) + result |= 1; + } + } + /* Test conversion from UTF-8 to UTF-16LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000a\000p\000a\000n\000e\000s\000e\000 \000(\000\345\145\054\147\236\212)\000 \000[\000\065\330\015\335\065\330\036\335\065\330\055\335]\000"; + iconv_t cd; + cd = iconv_open ("UTF-16LE", "UTF-8"); + if (cd == (iconv_t)(-1)) + result |= 2; + else + { + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (!(res == 0 && inbytesleft == 0)) + result |= 2; + else if (!(outptr == buf + (sizeof (expected) - 1))) + result |= 2; + else if (!(memcmp (buf, expected, sizeof (expected) - 1) == 0)) + result |= 2; + else if (!(iconv_close (cd) == 0)) + result |= 2; + } + } + /* Test conversion from UTF-8 to UTF-32BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000\000\000J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\145\345\000\000\147\054\000\000\212\236\000\000\000)\000\000\000 \000\000\000[\000\001\325\015\000\001\325\036\000\001\325\055\000\000\000]"; + iconv_t cd; + cd = iconv_open ("UTF-32BE", "UTF-8"); + if (cd == (iconv_t)(-1)) + result |= 4; + else + { + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (!(res == 0 && inbytesleft == 0)) + result |= 4; + else if (!(outptr == buf + (sizeof (expected) - 1))) + result |= 4; + else if (!(memcmp (buf, expected, sizeof (expected) - 1) == 0)) + result |= 4; + else if (!(iconv_close (cd) == 0)) + result |= 4; + } + } + /* Test conversion from UTF-8 to UTF-32LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\000\345\145\000\000\054\147\000\000\236\212\000\000)\000\000\000 \000\000\000[\000\000\000\015\325\001\000\036\325\001\000\055\325\001\000]\000\000\000"; + iconv_t cd; + cd = iconv_open ("UTF-32LE", "UTF-8"); + if (cd == (iconv_t)(-1)) + result |= 8; + else + { + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (!(res == 0 && inbytesleft == 0)) + result |= 8; + else if (!(outptr == buf + (sizeof (expected) - 1))) + result |= 8; + else if (!(memcmp (buf, expected, sizeof (expected) - 1) == 0)) + result |= 8; + else if (!(iconv_close (cd) == 0)) + result |= 8; + } + } + /* Test conversion from UTF-16BE to UTF-8 with no errors. + This test fails on NetBSD 3.0. */ + { + static const char input[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + cd = iconv_open ("UTF-8", "UTF-16BE"); + if (cd == (iconv_t)(-1)) + result |= 16; + else + { + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + if (!(res == 0 && inbytesleft == 0)) + result |= 16; + else if (!(outptr == buf + (sizeof (expected) - 1))) + result |= 16; + else if (!(memcmp (buf, expected, sizeof (expected) - 1) == 0)) + result |= 16; + else if (!(iconv_close (cd) == 0)) + result |= 16; + } + } + return result; +}]])], + [gl_cv_func_iconv_supports_utf=yes], + [gl_cv_func_iconv_supports_utf=no], + [ + dnl We know that GNU libiconv, GNU libc, and Solaris >= 9 do. + dnl OSF/1 5.1 has these encodings, but inserts a BOM in the "to" + dnl direction. + gl_cv_func_iconv_supports_utf=no + if test $gl_func_iconv_gnu = yes; then + gl_cv_func_iconv_supports_utf=yes + else +changequote(,)dnl + case "$host_os" in + solaris2.9 | solaris2.1[0-9]) gl_cv_func_iconv_supports_utf=yes ;; + esac +changequote([,])dnl + fi + ]) + LIBS="$save_LIBS" + ]) + fi +]) diff --git a/gl/m4/iconv_open.m4 b/gl/m4/iconv_open.m4 new file mode 100644 index 0000000000..07f4849189 --- /dev/null +++ b/gl/m4/iconv_open.m4 @@ -0,0 +1,56 @@ +# iconv_open.m4 serial 14 +dnl Copyright (C) 2007-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_ICONV_OPEN], +[ + AC_REQUIRE([AM_ICONV]) + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([gl_ICONV_H_DEFAULTS]) + if test "$am_cv_func_iconv" = yes; then + dnl Provide the <iconv.h> override, for the sake of the C++ aliases. + gl_REPLACE_ICONV_H + dnl Test whether iconv_open accepts standardized encoding names. + dnl We know that GNU libiconv and GNU libc do. + AC_EGREP_CPP([gnu_iconv], [ + #include <iconv.h> + #if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + gnu_iconv + #endif + ], [gl_func_iconv_gnu=yes], [gl_func_iconv_gnu=no]) + if test $gl_func_iconv_gnu = no; then + iconv_flavor= + case "$host_os" in + aix*) iconv_flavor=ICONV_FLAVOR_AIX ;; + irix*) iconv_flavor=ICONV_FLAVOR_IRIX ;; + hpux*) iconv_flavor=ICONV_FLAVOR_HPUX ;; + osf*) iconv_flavor=ICONV_FLAVOR_OSF ;; + solaris*) iconv_flavor=ICONV_FLAVOR_SOLARIS ;; + esac + if test -n "$iconv_flavor"; then + AC_DEFINE_UNQUOTED([ICONV_FLAVOR], [$iconv_flavor], + [Define to a symbolic name denoting the flavor of iconv_open() + implementation.]) + gl_REPLACE_ICONV_OPEN + fi + fi + m4_ifdef([gl_FUNC_ICONV_OPEN_UTF_SUPPORT], [ + gl_FUNC_ICONV_OPEN_UTF_SUPPORT + if test $gl_cv_func_iconv_supports_utf = no; then + REPLACE_ICONV_UTF=1 + AC_DEFINE([REPLACE_ICONV_UTF], [1], + [Define if the iconv() functions are enhanced to handle the UTF-{16,32}{BE,LE} encodings.]) + REPLACE_ICONV=1 + gl_REPLACE_ICONV_OPEN + fi + ]) + fi +]) + +AC_DEFUN([gl_REPLACE_ICONV_OPEN], +[ + gl_REPLACE_ICONV_H + REPLACE_ICONV_OPEN=1 +]) diff --git a/gl/m4/inline.m4 b/gl/m4/inline.m4 new file mode 100644 index 0000000000..6fa997246c --- /dev/null +++ b/gl/m4/inline.m4 @@ -0,0 +1,40 @@ +# inline.m4 serial 4 +dnl Copyright (C) 2006, 2009-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl Test for the 'inline' keyword or equivalent. +dnl Define 'inline' to a supported equivalent, or to nothing if not supported, +dnl like AC_C_INLINE does. Also, define HAVE_INLINE if 'inline' or an +dnl equivalent is effectively supported, i.e. if the compiler is likely to +dnl drop unused 'static inline' functions. +AC_DEFUN([gl_INLINE], +[ + AC_REQUIRE([AC_C_INLINE]) + AC_CACHE_CHECK([whether the compiler generally respects inline], + [gl_cv_c_inline_effective], + [if test $ac_cv_c_inline = no; then + gl_cv_c_inline_effective=no + else + dnl GCC defines __NO_INLINE__ if not optimizing or if -fno-inline is + dnl specified. + dnl Use AC_COMPILE_IFELSE here, not AC_EGREP_CPP, because the result + dnl depends on optimization flags, which can be in CFLAGS. + dnl (AC_EGREP_CPP looks only at the CPPFLAGS.) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], + [[#ifdef __NO_INLINE__ + #error "inline is not effective" + #endif]])], + [gl_cv_c_inline_effective=yes], + [gl_cv_c_inline_effective=no]) + fi + ]) + if test $gl_cv_c_inline_effective = yes; then + AC_DEFINE([HAVE_INLINE], [1], + [Define to 1 if the compiler supports one of the keywords + 'inline', '__inline__', '__inline' and effectively inlines + functions marked as such.]) + fi +]) diff --git a/gl/m4/libunistring-base.m4 b/gl/m4/libunistring-base.m4 new file mode 100644 index 0000000000..d91c42b9f0 --- /dev/null +++ b/gl/m4/libunistring-base.m4 @@ -0,0 +1,141 @@ +# libunistring-base.m4 serial 5 +dnl Copyright (C) 2010-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Paolo Bonzini and Bruno Haible. + +dnl gl_LIBUNISTRING_MODULE([VERSION], [Module]) +dnl Declares that the source files of Module should be compiled, unless we +dnl are linking with libunistring and its version is >= the given VERSION. +dnl Defines an automake conditional LIBUNISTRING_COMPILE_$MODULE that is +dnl true if the source files of Module should be compiled. +dnl This macro is to be used for public libunistring API, not for +dnl undocumented API. +dnl +dnl You have to bump the VERSION argument to the next projected version +dnl number each time you make a change that affects the behaviour of the +dnl functions defined in Module (even if the sources of Module itself do not +dnl change). + +AC_DEFUN([gl_LIBUNISTRING_MODULE], +[ + AC_REQUIRE([gl_LIBUNISTRING_LIB_PREPARE]) + dnl Use the variables HAVE_LIBUNISTRING, LIBUNISTRING_VERSION from + dnl gl_LIBUNISTRING_CORE if that macro has been run. + AM_CONDITIONAL(AS_TR_CPP([LIBUNISTRING_COMPILE_$2]), + [gl_LIBUNISTRING_VERSION_CMP([$1])]) +]) + +dnl gl_LIBUNISTRING_LIBHEADER([VERSION], [HeaderFile]) +dnl Declares that HeaderFile should be created, unless we are linking +dnl with libunistring and its version is >= the given VERSION. +dnl HeaderFile should be relative to the lib directory and end in '.h'. +dnl Prepares for substituting LIBUNISTRING_HEADERFILE (to HeaderFile or empty). +dnl +dnl When we are linking with the already installed libunistring and its version +dnl is < VERSION, we create HeaderFile here, because we may compile functions +dnl (via gl_LIBUNISTRING_MODULE above) that are not contained in the installed +dnl version. +dnl When we are linking with the already installed libunistring and its version +dnl is > VERSION, we don't create HeaderFile here: it could cause compilation +dnl errors in other libunistring header files if some types are missing. +dnl +dnl You have to bump the VERSION argument to the next projected version +dnl number each time you make a non-comment change to the HeaderFile. + +AC_DEFUN([gl_LIBUNISTRING_LIBHEADER], +[ + AC_REQUIRE([gl_LIBUNISTRING_LIB_PREPARE]) + dnl Use the variables HAVE_LIBUNISTRING, LIBUNISTRING_VERSION from + dnl gl_LIBUNISTRING_CORE if that macro has been run. + if gl_LIBUNISTRING_VERSION_CMP([$1]); then + LIBUNISTRING_[]AS_TR_CPP([$2])='$2' + else + LIBUNISTRING_[]AS_TR_CPP([$2])= + fi + AC_SUBST([LIBUNISTRING_]AS_TR_CPP([$2])) +]) + +dnl Miscellaneous preparations/initializations. + +AC_DEFUN([gl_LIBUNISTRING_LIB_PREPARE], +[ + dnl Ensure that HAVE_LIBUNISTRING is fully determined at this point. + m4_ifdef([gl_LIBUNISTRING], [AC_REQUIRE([gl_LIBUNISTRING])]) + + AC_REQUIRE([AC_PROG_AWK]) + +dnl Sed expressions to extract the parts of a version number. +changequote(,) +gl_libunistring_sed_extract_major='/^[0-9]/{s/^\([0-9]*\).*/\1/p;q;} +i\ +0 +q +' +gl_libunistring_sed_extract_minor='/^[0-9][0-9]*[.][0-9]/{s/^[0-9]*[.]\([0-9]*\).*/\1/p;q;} +i\ +0 +q +' +gl_libunistring_sed_extract_subminor='/^[0-9][0-9]*[.][0-9][0-9]*[.][0-9]/{s/^[0-9]*[.][0-9]*[.]\([0-9]*\).*/\1/p;q;} +i\ +0 +q +' +changequote([,]) + + if test "$HAVE_LIBUNISTRING" = yes; then + LIBUNISTRING_VERSION_MAJOR=`echo "$LIBUNISTRING_VERSION" | sed -n -e "$gl_libunistring_sed_extract_major"` + LIBUNISTRING_VERSION_MINOR=`echo "$LIBUNISTRING_VERSION" | sed -n -e "$gl_libunistring_sed_extract_minor"` + LIBUNISTRING_VERSION_SUBMINOR=`echo "$LIBUNISTRING_VERSION" | sed -n -e "$gl_libunistring_sed_extract_subminor"` + fi +]) + +dnl gl_LIBUNISTRING_VERSION_CMP([VERSION]) +dnl Expands to a shell statement that evaluates to true if LIBUNISTRING_VERSION +dnl is less than the VERSION argument. +AC_DEFUN([gl_LIBUNISTRING_VERSION_CMP], +[ { test "$HAVE_LIBUNISTRING" != yes \ + || { + dnl AS_LITERAL_IF exists and works fine since autoconf-2.59 at least. + AS_LITERAL_IF([$1], + [dnl This is the optimized variant, that assumes the argument is a literal: + m4_pushdef([requested_version_major], + [gl_LIBUNISTRING_ARG_OR_ZERO(m4_bpatsubst([$1], [^\([0-9]*\).*], [\1]), [])]) + m4_pushdef([requested_version_minor], + [gl_LIBUNISTRING_ARG_OR_ZERO(m4_bpatsubst([$1], [^[0-9]*[.]\([0-9]*\).*], [\1]), [$1])]) + m4_pushdef([requested_version_subminor], + [gl_LIBUNISTRING_ARG_OR_ZERO(m4_bpatsubst([$1], [^[0-9]*[.][0-9]*[.]\([0-9]*\).*], [\1]), [$1])]) + test $LIBUNISTRING_VERSION_MAJOR -lt requested_version_major \ + || { test $LIBUNISTRING_VERSION_MAJOR -eq requested_version_major \ + && { test $LIBUNISTRING_VERSION_MINOR -lt requested_version_minor \ + || { test $LIBUNISTRING_VERSION_MINOR -eq requested_version_minor \ + && test $LIBUNISTRING_VERSION_SUBMINOR -lt requested_version_subminor + } + } + } + m4_popdef([requested_version_subminor]) + m4_popdef([requested_version_minor]) + m4_popdef([requested_version_major]) + ], + [dnl This is the unoptimized variant: + requested_version_major=`echo '$1' | sed -n -e "$gl_libunistring_sed_extract_major"` + requested_version_minor=`echo '$1' | sed -n -e "$gl_libunistring_sed_extract_minor"` + requested_version_subminor=`echo '$1' | sed -n -e "$gl_libunistring_sed_extract_subminor"` + test $LIBUNISTRING_VERSION_MAJOR -lt $requested_version_major \ + || { test $LIBUNISTRING_VERSION_MAJOR -eq $requested_version_major \ + && { test $LIBUNISTRING_VERSION_MINOR -lt $requested_version_minor \ + || { test $LIBUNISTRING_VERSION_MINOR -eq $requested_version_minor \ + && test $LIBUNISTRING_VERSION_SUBMINOR -lt $requested_version_subminor + } + } + } + ]) + } + }]) + +dnl gl_LIBUNISTRING_ARG_OR_ZERO([ARG], [ORIG]) expands to ARG if it is not the +dnl same as ORIG, otherwise to 0. +m4_define([gl_LIBUNISTRING_ARG_OR_ZERO], [m4_if([$1], [$2], [0], [$1])]) diff --git a/gl/m4/locale-fr.m4 b/gl/m4/locale-fr.m4 new file mode 100644 index 0000000000..71b68476c6 --- /dev/null +++ b/gl/m4/locale-fr.m4 @@ -0,0 +1,250 @@ +# locale-fr.m4 serial 17 +dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a french locale with traditional encoding. +AC_DEFUN([gt_LOCALE_FR], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional french locale], [gt_cv_locale_fr], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include <locale.h> +#include <time.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif +#include <stdlib.h> +#include <string.h> +struct tm t; +char buf[16]; +int main () { + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is only + one byte long. This excludes the UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 3 || buf[2] != 'v') return 1; +#if !defined __BIONIC__ /* Bionic libc's 'struct lconv' is just a dummy. */ + /* Check whether the decimal separator is a comma. + On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point + are nl_langinfo(RADIXCHAR) are both ".". */ + if (localeconv () ->decimal_point[0] != ',') return 1; +#endif + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the native Windows locale name. + if (LC_ALL=French_France.1252 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=French_France.1252 + else + # None found. + gt_cv_locale_fr=none + fi + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.ISO-8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO-8859-1 + else + # Test for the AIX, OSF/1, FreeBSD, NetBSD, OpenBSD locale name. + if (LC_ALL=fr_FR.ISO8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.ISO8859-1 + else + # Test for the HP-UX locale name. + if (LC_ALL=fr_FR.iso88591 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr_FR.iso88591 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr=fr + else + # None found. + gt_cv_locale_fr=none + fi + fi + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_FR=$gt_cv_locale_fr + AC_SUBST([LOCALE_FR]) +]) + +dnl Determine the name of a french locale with UTF-8 encoding. +AC_DEFUN([gt_LOCALE_FR_UTF8], +[ + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a french Unicode locale], [gt_cv_locale_fr_utf8], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include <locale.h> +#include <time.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif +#include <stdlib.h> +#include <string.h> +struct tm t; +char buf[16]; +int main () { + /* On BeOS and Haiku, locales are not implemented in libc. Rather, libintl + imitates locale dependent behaviour by looking at the environment + variables, and all locales use the UTF-8 encoding. */ +#if !(defined __BEOS__ || defined __HAIKU__) + /* Check whether the given locale name is recognized by the system. */ +# if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +# else + if (setlocale (LC_ALL, "") == NULL) return 1; +# endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. */ +# if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0) + return 1; + } +# endif +# ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +# endif + /* Check whether in the abbreviation of the second month, the second + character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is + two bytes long, with UTF-8 encoding. */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%b", &t) < 4 + || buf[1] != (char) 0xc3 || buf[2] != (char) 0xa9 || buf[3] != 'v') + return 1; +#endif +#if !defined __BIONIC__ /* Bionic libc's 'struct lconv' is just a dummy. */ + /* Check whether the decimal separator is a comma. + On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point + are nl_langinfo(RADIXCHAR) are both ".". */ + if (localeconv () ->decimal_point[0] != ',') return 1; +#endif + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the hypothetical native Windows locale name. + if (LC_ALL=French_France.65001 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=French_France.65001 + else + # None found. + gt_cv_locale_fr_utf8=none + fi + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=fr_FR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr_FR.UTF-8 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=fr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_fr_utf8=fr.UTF-8 + else + # None found. + gt_cv_locale_fr_utf8=none + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_FR_UTF8=$gt_cv_locale_fr_utf8 + AC_SUBST([LOCALE_FR_UTF8]) +]) diff --git a/gl/m4/locale-ja.m4 b/gl/m4/locale-ja.m4 new file mode 100644 index 0000000000..5ba0e4381a --- /dev/null +++ b/gl/m4/locale-ja.m4 @@ -0,0 +1,136 @@ +# locale-ja.m4 serial 12 +dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a japanese locale with EUC-JP encoding. +AC_DEFUN([gt_LOCALE_JA], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a traditional japanese locale], [gt_cv_locale_ja], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include <locale.h> +#include <time.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif +#include <stdlib.h> +#include <string.h> +struct tm t; +char buf[16]; +int main () +{ + const char *p; + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether MB_CUR_MAX is > 1. This excludes the dysfunctional locales + on Cygwin 1.5.x. */ + if (MB_CUR_MAX == 1) + return 1; + /* Check whether in a month name, no byte in the range 0x80..0x9F occurs. + This excludes the UTF-8 encoding (except on MirBSD). */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1; + for (p = buf; *p != '\0'; p++) + if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0) + return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Note that on native Windows, the Japanese locale is + # Japanese_Japan.932, and CP932 is very different from EUC-JP, so we + # cannot use it here. + gt_cv_locale_ja=none + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the AIX locale name. + if (LC_ALL=ja_JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=ja_JP.EUC-JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.EUC-JP + else + # Test for the HP-UX, OSF/1, NetBSD locale name. + if (LC_ALL=ja_JP.eucJP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.eucJP + else + # Test for the IRIX, FreeBSD locale name. + if (LC_ALL=ja_JP.EUC LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja_JP.EUC + else + # Test for the Solaris 7 locale name. + if (LC_ALL=ja LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_ja=ja + else + # Special test for NetBSD 1.6. + if test -f /usr/share/locale/ja_JP.eucJP/LC_CTYPE; then + gt_cv_locale_ja=ja_JP.eucJP + else + # None found. + gt_cv_locale_ja=none + fi + fi + fi + fi + fi + fi + ;; + esac + fi + rm -fr conftest* + ]) + LOCALE_JA=$gt_cv_locale_ja + AC_SUBST([LOCALE_JA]) +]) diff --git a/gl/m4/locale-tr.m4 b/gl/m4/locale-tr.m4 new file mode 100644 index 0000000000..5f3bf80302 --- /dev/null +++ b/gl/m4/locale-tr.m4 @@ -0,0 +1,127 @@ +# locale-tr.m4 serial 10 +dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a turkish locale with UTF-8 encoding. +AC_DEFUN([gt_LOCALE_TR_UTF8], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a turkish Unicode locale], [gt_cv_locale_tr_utf8], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include <locale.h> +#include <time.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif +#include <stdlib.h> +#include <string.h> +struct tm t; +char buf[16]; +int main () { + /* On BeOS, locales are not implemented in libc. Rather, libintl + imitates locale dependent behaviour by looking at the environment + variables, and all locales use the UTF-8 encoding. But BeOS does not + implement the Turkish upper-/lowercase mappings. Therefore, let this + program return 1 on BeOS. */ + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the tr_TR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether in the abbreviation of the eighth month, the second + character (should be U+011F: LATIN SMALL LETTER G WITH BREVE) is + two bytes long, with UTF-8 encoding. */ + t.tm_year = 1992 - 1900; t.tm_mon = 8 - 1; t.tm_mday = 19; + if (strftime (buf, sizeof (buf), "%b", &t) < 4 + || buf[1] != (char) 0xc4 || buf[2] != (char) 0x9f) + return 1; + /* Check whether the upper-/lowercase mappings are as expected for + Turkish. */ + if (towupper ('i') != 0x0130 || towlower (0x0130) != 'i' + || towupper(0x0131) != 'I' || towlower ('I') != 0x0131) + return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the hypothetical native Windows locale name. + if (LC_ALL=Turkish_Turkey.65001 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_tr_utf8=Turkish_Turkey.65001 + else + # None found. + gt_cv_locale_tr_utf8=none + fi + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the usual locale name. + if (LC_ALL=tr_TR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_tr_utf8=tr_TR + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=tr_TR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_tr_utf8=tr_TR.UTF-8 + else + # Test for the Solaris 7 locale name. + if (LC_ALL=tr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_tr_utf8=tr.UTF-8 + else + # None found. + gt_cv_locale_tr_utf8=none + fi + fi + fi + ;; + esac + else + gt_cv_locale_tr_utf8=none + fi + rm -fr conftest* + ]) + LOCALE_TR_UTF8=$gt_cv_locale_tr_utf8 + AC_SUBST([LOCALE_TR_UTF8]) +]) diff --git a/gl/m4/locale-zh.m4 b/gl/m4/locale-zh.m4 new file mode 100644 index 0000000000..e5502b29e1 --- /dev/null +++ b/gl/m4/locale-zh.m4 @@ -0,0 +1,130 @@ +# locale-zh.m4 serial 12 +dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl From Bruno Haible. + +dnl Determine the name of a chinese locale with GB18030 encoding. +AC_DEFUN([gt_LOCALE_ZH_CN], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + AC_REQUIRE([AM_LANGINFO_CODESET]) + AC_CACHE_CHECK([for a transitional chinese locale], [gt_cv_locale_zh_CN], [ + AC_LANG_CONFTEST([AC_LANG_SOURCE([ +changequote(,)dnl +#include <locale.h> +#include <stdlib.h> +#include <time.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif +#include <stdlib.h> +#include <string.h> +struct tm t; +char buf[16]; +int main () +{ + const char *p; + /* Check whether the given locale name is recognized by the system. */ +#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__ + /* On native Windows, setlocale(category, "") looks at the system settings, + not at the environment variables. Also, when an encoding suffix such + as ".65001" or ".54936" is specified, it succeeds but sets the LC_CTYPE + category of the locale to "C". */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL + || strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + return 1; +#else + if (setlocale (LC_ALL, "") == NULL) return 1; +#endif + /* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646". + On Mac OS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET) + is empty, and the behaviour of Tcl 8.4 in this locale is not useful. + On OpenBSD 4.0, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "646". In this situation, + some unit tests fail. + On MirBSD 10, when an unsupported locale is specified, setlocale() + succeeds but then nl_langinfo(CODESET) is "UTF-8". */ +#if HAVE_LANGINFO_CODESET + { + const char *cs = nl_langinfo (CODESET); + if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0 + || strcmp (cs, "UTF-8") == 0) + return 1; + } +#endif +#ifdef __CYGWIN__ + /* On Cygwin, avoid locale names without encoding suffix, because the + locale_charset() function relies on the encoding suffix. Note that + LC_ALL is set on the command line. */ + if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1; +#endif + /* Check whether in a month name, no byte in the range 0x80..0x9F occurs. + This excludes the UTF-8 encoding (except on MirBSD). */ + t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4; + if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1; + for (p = buf; *p != '\0'; p++) + if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0) + return 1; + /* Check whether a typical GB18030 multibyte sequence is recognized as a + single wide character. This excludes the GB2312 and GBK encodings. */ + if (mblen ("\203\062\332\066", 5) != 4) + return 1; + return 0; +} +changequote([,])dnl + ])]) + if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then + case "$host_os" in + # Handle native Windows specially, because there setlocale() interprets + # "ar" as "Arabic" or "Arabic_Saudi Arabia.1256", + # "fr" or "fra" as "French" or "French_France.1252", + # "ge"(!) or "deu"(!) as "German" or "German_Germany.1252", + # "ja" as "Japanese" or "Japanese_Japan.932", + # and similar. + mingw*) + # Test for the hypothetical native Windows locale name. + if (LC_ALL=Chinese_China.54936 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=Chinese_China.54936 + else + # None found. + gt_cv_locale_zh_CN=none + fi + ;; + solaris2.8) + # On Solaris 8, the locales zh_CN.GB18030, zh_CN.GBK, zh.GBK are + # broken. One witness is the test case in gl_MBRTOWC_SANITYCHECK. + # Another witness is that "LC_ALL=zh_CN.GB18030 bash -c true" dumps core. + gt_cv_locale_zh_CN=none + ;; + *) + # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because + # otherwise on Mac OS X 10.3.5 the LC_TIME=C from the beginning of the + # configure script would override the LC_ALL setting. Likewise for + # LC_CTYPE, which is also set at the beginning of the configure script. + # Test for the locale name without encoding suffix. + if (LC_ALL=zh_CN LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=zh_CN + else + # Test for the locale name with explicit encoding suffix. + if (LC_ALL=zh_CN.GB18030 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then + gt_cv_locale_zh_CN=zh_CN.GB18030 + else + # None found. + gt_cv_locale_zh_CN=none + fi + fi + ;; + esac + else + # If there was a link error, due to mblen(), the system is so old that + # it certainly doesn't have a chinese locale. + gt_cv_locale_zh_CN=none + fi + rm -fr conftest* + ]) + LOCALE_ZH_CN=$gt_cv_locale_zh_CN + AC_SUBST([LOCALE_ZH_CN]) +]) diff --git a/gl/m4/locale_h.m4 b/gl/m4/locale_h.m4 new file mode 100644 index 0000000000..c0f4d524ad --- /dev/null +++ b/gl/m4/locale_h.m4 @@ -0,0 +1,122 @@ +# locale_h.m4 serial 19 +dnl Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_LOCALE_H], +[ + dnl Use AC_REQUIRE here, so that the default behavior below is expanded + dnl once only, before all statements that occur in other macros. + AC_REQUIRE([gl_LOCALE_H_DEFAULTS]) + + dnl Persuade glibc <locale.h> to define locale_t and the int_p_*, int_n_* + dnl members of 'struct lconv'. + AC_REQUIRE([gl_USE_SYSTEM_EXTENSIONS]) + + dnl If <stddef.h> is replaced, then <locale.h> must also be replaced. + AC_REQUIRE([gl_STDDEF_H]) + + dnl Solaris 11 2011-11 defines the int_p_*, int_n_* members of 'struct lconv' + dnl only if _LCONV_C99 is defined. + AC_REQUIRE([AC_CANONICAL_HOST]) + case "$host_os" in + solaris*) + AC_DEFINE([_LCONV_C99], [1], [Define to 1 on Solaris.]) + ;; + esac + + AC_CACHE_CHECK([whether locale.h conforms to POSIX:2001], + [gl_cv_header_locale_h_posix2001], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include <locale.h> + int x = LC_MESSAGES; + int y = sizeof (((struct lconv *) 0)->decimal_point);]], + [[]])], + [gl_cv_header_locale_h_posix2001=yes], + [gl_cv_header_locale_h_posix2001=no])]) + + dnl Check for <xlocale.h>. + AC_CHECK_HEADERS_ONCE([xlocale.h]) + if test $ac_cv_header_xlocale_h = yes; then + HAVE_XLOCALE_H=1 + dnl Check whether use of locale_t requires inclusion of <xlocale.h>, + dnl e.g. on Mac OS X 10.5. If <locale.h> does not define locale_t by + dnl itself, we assume that <xlocale.h> will do so. + AC_CACHE_CHECK([whether locale.h defines locale_t], + [gl_cv_header_locale_has_locale_t], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include <locale.h> + locale_t x;]], + [[]])], + [gl_cv_header_locale_has_locale_t=yes], + [gl_cv_header_locale_has_locale_t=no]) + ]) + if test $gl_cv_header_locale_has_locale_t = yes; then + gl_cv_header_locale_h_needs_xlocale_h=no + else + gl_cv_header_locale_h_needs_xlocale_h=yes + fi + else + HAVE_XLOCALE_H=0 + gl_cv_header_locale_h_needs_xlocale_h=no + fi + AC_SUBST([HAVE_XLOCALE_H]) + + dnl Check whether 'struct lconv' is complete. + dnl Bionic libc's 'struct lconv' is just a dummy. + dnl On OpenBSD 4.9, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 9, Cygwin 1.5.x, + dnl mingw, MSVC 9, it lacks the int_p_* and int_n_* members. + AC_CACHE_CHECK([whether struct lconv is properly defined], + [gl_cv_sys_struct_lconv_ok], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include <locale.h> + struct lconv l; + int x = sizeof (l.decimal_point); + int y = sizeof (l.int_p_cs_precedes);]], + [[]])], + [gl_cv_sys_struct_lconv_ok=yes], + [gl_cv_sys_struct_lconv_ok=no]) + ]) + if test $gl_cv_sys_struct_lconv_ok = no; then + REPLACE_STRUCT_LCONV=1 + fi + + dnl <locale.h> is always overridden, because of GNULIB_POSIXCHECK. + gl_NEXT_HEADERS([locale.h]) + + dnl Check for declarations of anything we want to poison if the + dnl corresponding gnulib module is not in use. + gl_WARN_ON_USE_PREPARE([[#include <locale.h> +/* Some systems provide declarations in a non-standard header. */ +#if HAVE_XLOCALE_H +# include <xlocale.h> +#endif + ]], + [setlocale duplocale]) +]) + +AC_DEFUN([gl_LOCALE_MODULE_INDICATOR], +[ + dnl Use AC_REQUIRE here, so that the default settings are expanded once only. + AC_REQUIRE([gl_LOCALE_H_DEFAULTS]) + gl_MODULE_INDICATOR_SET_VARIABLE([$1]) + dnl Define it also as a C macro, for the benefit of the unit tests. + gl_MODULE_INDICATOR_FOR_TESTS([$1]) +]) + +AC_DEFUN([gl_LOCALE_H_DEFAULTS], +[ + GNULIB_LOCALECONV=0; AC_SUBST([GNULIB_LOCALECONV]) + GNULIB_SETLOCALE=0; AC_SUBST([GNULIB_SETLOCALE]) + GNULIB_DUPLOCALE=0; AC_SUBST([GNULIB_DUPLOCALE]) + dnl Assume proper GNU behavior unless another module says otherwise. + HAVE_DUPLOCALE=1; AC_SUBST([HAVE_DUPLOCALE]) + REPLACE_LOCALECONV=0; AC_SUBST([REPLACE_LOCALECONV]) + REPLACE_SETLOCALE=0; AC_SUBST([REPLACE_SETLOCALE]) + REPLACE_DUPLOCALE=0; AC_SUBST([REPLACE_DUPLOCALE]) + REPLACE_STRUCT_LCONV=0; AC_SUBST([REPLACE_STRUCT_LCONV]) +]) diff --git a/gl/m4/localename.m4 b/gl/m4/localename.m4 new file mode 100644 index 0000000000..2ba295ebc6 --- /dev/null +++ b/gl/m4/localename.m4 @@ -0,0 +1,12 @@ +# localename.m4 serial 2 +dnl Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_LOCALENAME], +[ + AC_REQUIRE([gt_LC_MESSAGES]) + AC_REQUIRE([gt_INTL_MACOSX]) + AC_CHECK_FUNCS([setlocale uselocale]) +]) diff --git a/gl/m4/setlocale.m4 b/gl/m4/setlocale.m4 new file mode 100644 index 0000000000..c605241aa1 --- /dev/null +++ b/gl/m4/setlocale.m4 @@ -0,0 +1,29 @@ +# setlocale.m4 serial 4 +dnl Copyright (C) 2011-2012 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_SETLOCALE], +[ + AC_REQUIRE([gl_LOCALE_H_DEFAULTS]) + AC_REQUIRE([AC_CANONICAL_HOST]) + case "$host_os" in + dnl On native Windows systems, setlocale(category,NULL) does not look at + dnl the environment variables LC_ALL, category, and LANG. + mingw*) REPLACE_SETLOCALE=1 ;; + dnl On Cygwin 1.5.x, setlocale always succeeds but setlocale(LC_CTYPE,NULL) + dnl is then still "C". + cygwin*) + case `uname -r` in + 1.5.*) REPLACE_SETLOCALE=1 ;; + esac + ;; + esac +]) + +# Prerequisites of lib/setlocale.c. +AC_DEFUN([gl_PREREQ_SETLOCALE], +[ + : +]) diff --git a/gl/tests/Makefile.am b/gl/tests/Makefile.am index 70cc6d6f32..30cfa0c10f 100644 --- a/gl/tests/Makefile.am +++ b/gl/tests/Makefile.am @@ -142,6 +142,15 @@ EXTRA_DIST += test-c-ctype.c macros.h ## end gnulib module c-ctype-tests +## begin gnulib module c-strcase-tests + +TESTS += test-c-strcase.sh +TESTS_ENVIRONMENT += LOCALE_FR='@LOCALE_FR@' LOCALE_TR_UTF8='@LOCALE_TR_UTF8@' +check_PROGRAMS += test-c-strcasecmp test-c-strncasecmp +EXTRA_DIST += test-c-strcase.sh test-c-strcasecmp.c test-c-strncasecmp.c macros.h + +## end gnulib module c-strcase-tests + ## begin gnulib module close-tests TESTS += test-close @@ -468,6 +477,14 @@ EXTRA_DIST += signature.h test-gettimeofday.c ## end gnulib module gettimeofday-tests +## begin gnulib module iconv-h-tests + +TESTS += test-iconv-h +check_PROGRAMS += test-iconv-h +EXTRA_DIST += test-iconv-h.c + +## end gnulib module iconv-h-tests + ## begin gnulib module iconv-tests TESTS += test-iconv @@ -478,6 +495,16 @@ EXTRA_DIST += test-iconv.c signature.h macros.h ## end gnulib module iconv-tests +## begin gnulib module iconv_open-utf-tests + +TESTS += test-iconv-utf +check_PROGRAMS += test-iconv-utf +test_iconv_utf_LDADD = $(LDADD) @LIBICONV@ + +EXTRA_DIST += test-iconv-utf.c macros.h + +## end gnulib module iconv_open-utf-tests + ## begin gnulib module ignore-value @@ -624,6 +651,67 @@ EXTRA_DIST += test-listen.c signature.h macros.h ## end gnulib module listen-tests +## begin gnulib module locale + +BUILT_SOURCES += locale.h + +# We need the following in order to create <locale.h> when the system +# doesn't have one that provides all definitions. +locale.h: locale.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) $(WARN_ON_USE_H) + $(AM_V_GEN)rm -f $@-t $@ && \ + { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \ + sed -e 's|@''GUARD_PREFIX''@|GL|g' \ + -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \ + -e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \ + -e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \ + -e 's|@''NEXT_LOCALE_H''@|$(NEXT_LOCALE_H)|g' \ + -e 's/@''GNULIB_LOCALECONV''@/$(GNULIB_LOCALECONV)/g' \ + -e 's/@''GNULIB_SETLOCALE''@/$(GNULIB_SETLOCALE)/g' \ + -e 's/@''GNULIB_DUPLOCALE''@/$(GNULIB_DUPLOCALE)/g' \ + -e 's|@''HAVE_DUPLOCALE''@|$(HAVE_DUPLOCALE)|g' \ + -e 's|@''HAVE_XLOCALE_H''@|$(HAVE_XLOCALE_H)|g' \ + -e 's|@''REPLACE_LOCALECONV''@|$(REPLACE_LOCALECONV)|g' \ + -e 's|@''REPLACE_SETLOCALE''@|$(REPLACE_SETLOCALE)|g' \ + -e 's|@''REPLACE_DUPLOCALE''@|$(REPLACE_DUPLOCALE)|g' \ + -e 's|@''REPLACE_STRUCT_LCONV''@|$(REPLACE_STRUCT_LCONV)|g' \ + -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ + -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \ + -e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \ + < $(srcdir)/locale.in.h; \ + } > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += locale.h locale.h-t + +EXTRA_DIST += locale.in.h + +## end gnulib module locale + +## begin gnulib module locale-tests + +TESTS += test-locale +check_PROGRAMS += test-locale +EXTRA_DIST += test-locale.c + +## end gnulib module locale-tests + +## begin gnulib module localename + +libtests_a_SOURCES += localename.c + +EXTRA_DIST += localename.h + +## end gnulib module localename + +## begin gnulib module localename-tests + +TESTS += test-localename +check_PROGRAMS += test-localename +test_localename_LDADD = $(LDADD) @INTL_MACOSX_LIBS@ $(LIBTHREAD) + +EXTRA_DIST += test-localename.c macros.h + +## end gnulib module localename-tests + ## begin gnulib module lock libtests_a_SOURCES += glthread/lock.h glthread/lock.c @@ -883,6 +971,28 @@ EXTRA_DIST += test-setenv.c signature.h macros.h ## end gnulib module setenv-tests +## begin gnulib module setlocale + + +EXTRA_DIST += setlocale.c + +EXTRA_libtests_a_SOURCES += setlocale.c + +## end gnulib module setlocale + +## begin gnulib module setlocale-tests + +TESTS += test-setlocale1.sh test-setlocale2.sh +TESTS_ENVIRONMENT += \ + LOCALE_FR='@LOCALE_FR@' \ + LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \ + LOCALE_JA='@LOCALE_JA@' \ + LOCALE_ZH_CN='@LOCALE_ZH_CN@' +check_PROGRAMS += test-setlocale1 test-setlocale2 +EXTRA_DIST += test-setlocale1.sh test-setlocale1.c test-setlocale2.sh test-setlocale2.c signature.h macros.h + +## end gnulib module setlocale-tests + ## begin gnulib module setsockopt-tests TESTS += test-setsockopt @@ -987,6 +1097,31 @@ EXTRA_DIST += $(top_srcdir)/build-aux/snippet/c++defs.h ## end gnulib module snippet/c++defs +## begin gnulib module snippet/unused-parameter + +# The BUILT_SOURCES created by this Makefile snippet are not used via #include +# statements but through direct file reference. Therefore this snippet must be +# present in all Makefile.am that need it. This is ensured by the applicability +# 'all' defined above. + +BUILT_SOURCES += unused-parameter.h +# The unused-parameter.h that gets inserted into generated .h files is the same +# as build-aux/snippet/unused-parameter.h, except that it has the copyright +# header cut off. +unused-parameter.h: $(top_srcdir)/build-aux/snippet/unused-parameter.h + $(AM_V_GEN)rm -f $@-t $@ && \ + sed -n -e '/GL_UNUSED_PARAMETER/,$$p' \ + < $(top_srcdir)/build-aux/snippet/unused-parameter.h \ + > $@-t && \ + mv $@-t $@ +MOSTLYCLEANFILES += unused-parameter.h unused-parameter.h-t + +UNUSED_PARAMETER_H=unused-parameter.h + +EXTRA_DIST += $(top_srcdir)/build-aux/snippet/unused-parameter.h + +## end gnulib module snippet/unused-parameter + ## begin gnulib module snippet/warn-on-use BUILT_SOURCES += warn-on-use.h @@ -1301,6 +1436,26 @@ EXTRA_DIST += test-unistd.c ## end gnulib module unistd-tests +## begin gnulib module unistr/u8-mbtoucr-tests + +TESTS += test-u8-mbtoucr +check_PROGRAMS += test-u8-mbtoucr +test_u8_mbtoucr_SOURCES = unistr/test-u8-mbtoucr.c +test_u8_mbtoucr_LDADD = $(LDADD) $(LIBUNISTRING) +EXTRA_DIST += unistr/test-u8-mbtoucr.c macros.h + +## end gnulib module unistr/u8-mbtoucr-tests + +## begin gnulib module unistr/u8-uctomb-tests + +TESTS += test-u8-uctomb +check_PROGRAMS += test-u8-uctomb +test_u8_uctomb_SOURCES = unistr/test-u8-uctomb.c +test_u8_uctomb_LDADD = $(LDADD) $(LIBUNISTRING) +EXTRA_DIST += unistr/test-u8-uctomb.c macros.h + +## end gnulib module unistr/u8-uctomb-tests + ## begin gnulib module unsetenv diff --git a/gl/tests/locale.in.h b/gl/tests/locale.in.h new file mode 100644 index 0000000000..4d3c543750 --- /dev/null +++ b/gl/tests/locale.in.h @@ -0,0 +1,201 @@ +/* A POSIX <locale.h>. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _@GUARD_PREFIX@_LOCALE_H + +#if __GNUC__ >= 3 +@PRAGMA_SYSTEM_HEADER@ +#endif +@PRAGMA_COLUMNS@ + +/* The include_next requires a split double-inclusion guard. */ +#@INCLUDE_NEXT@ @NEXT_LOCALE_H@ + +#ifndef _@GUARD_PREFIX@_LOCALE_H +#define _@GUARD_PREFIX@_LOCALE_H + +/* NetBSD 5.0 mis-defines NULL. */ +#include <stddef.h> + +/* Mac OS X 10.5 defines the locale_t type in <xlocale.h>. */ +#if @HAVE_XLOCALE_H@ +# include <xlocale.h> +#endif + +/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */ + +/* The definition of _GL_ARG_NONNULL is copied here. */ + +/* The definition of _GL_WARN_ON_USE is copied here. */ + +/* The LC_MESSAGES locale category is specified in POSIX, but not in ISO C. + On systems that don't define it, use the same value as GNU libintl. */ +#if !defined LC_MESSAGES +# define LC_MESSAGES 1729 +#endif + +/* Bionic libc's 'struct lconv' is just a dummy. */ +#if @REPLACE_STRUCT_LCONV@ +# define lconv rpl_lconv +struct lconv +{ + /* All 'char *' are actually 'const char *'. */ + + /* Members that depend on the LC_NUMERIC category of the locale. See + <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_04> */ + + /* Symbol used as decimal point. */ + char *decimal_point; + /* Symbol used to separate groups of digits to the left of the decimal + point. */ + char *thousands_sep; + /* Definition of the size of groups of digits to the left of the decimal + point. */ + char *grouping; + + /* Members that depend on the LC_MONETARY category of the locale. See + <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_03> */ + + /* Symbol used as decimal point. */ + char *mon_decimal_point; + /* Symbol used to separate groups of digits to the left of the decimal + point. */ + char *mon_thousands_sep; + /* Definition of the size of groups of digits to the left of the decimal + point. */ + char *mon_grouping; + /* Sign used to indicate a value >= 0. */ + char *positive_sign; + /* Sign used to indicate a value < 0. */ + char *negative_sign; + + /* For formatting local currency. */ + /* Currency symbol (3 characters) followed by separator (1 character). */ + char *currency_symbol; + /* Number of digits after the decimal point. */ + char frac_digits; + /* For values >= 0: 1 if the currency symbol precedes the number, 0 if it + comes after the number. */ + char p_cs_precedes; + /* For values >= 0: Position of the sign. */ + char p_sign_posn; + /* For values >= 0: Placement of spaces between currency symbol, sign, and + number. */ + char p_sep_by_space; + /* For values < 0: 1 if the currency symbol precedes the number, 0 if it + comes after the number. */ + char n_cs_precedes; + /* For values < 0: Position of the sign. */ + char n_sign_posn; + /* For values < 0: Placement of spaces between currency symbol, sign, and + number. */ + char n_sep_by_space; + + /* For formatting international currency. */ + /* Currency symbol (3 characters) followed by separator (1 character). */ + char *int_curr_symbol; + /* Number of digits after the decimal point. */ + char int_frac_digits; + /* For values >= 0: 1 if the currency symbol precedes the number, 0 if it + comes after the number. */ + char int_p_cs_precedes; + /* For values >= 0: Position of the sign. */ + char int_p_sign_posn; + /* For values >= 0: Placement of spaces between currency symbol, sign, and + number. */ + char int_p_sep_by_space; + /* For values < 0: 1 if the currency symbol precedes the number, 0 if it + comes after the number. */ + char int_n_cs_precedes; + /* For values < 0: Position of the sign. */ + char int_n_sign_posn; + /* For values < 0: Placement of spaces between currency symbol, sign, and + number. */ + char int_n_sep_by_space; +}; +#endif + +#if @GNULIB_LOCALECONV@ +# if @REPLACE_LOCALECONV@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef localeconv +# define localeconv rpl_localeconv +# endif +_GL_FUNCDECL_RPL (localeconv, struct lconv *, (void)); +_GL_CXXALIAS_RPL (localeconv, struct lconv *, (void)); +# else +_GL_CXXALIAS_SYS (localeconv, struct lconv *, (void)); +# endif +_GL_CXXALIASWARN (localeconv); +#elif @REPLACE_STRUCT_LCONV@ +# undef localeconv +# define localeconv localeconv_used_without_requesting_gnulib_module_localeconv +#elif defined GNULIB_POSIXCHECK +# undef localeconv +# if HAVE_RAW_DECL_LOCALECONV +_GL_WARN_ON_USE (localeconv, + "localeconv returns too few information on some platforms - " + "use gnulib module localeconv for portability"); +# endif +#endif + +#if @GNULIB_SETLOCALE@ +# if @REPLACE_SETLOCALE@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef setlocale +# define setlocale rpl_setlocale +# define GNULIB_defined_setlocale 1 +# endif +_GL_FUNCDECL_RPL (setlocale, char *, (int category, const char *locale)); +_GL_CXXALIAS_RPL (setlocale, char *, (int category, const char *locale)); +# else +_GL_CXXALIAS_SYS (setlocale, char *, (int category, const char *locale)); +# endif +_GL_CXXALIASWARN (setlocale); +#elif defined GNULIB_POSIXCHECK +# undef setlocale +# if HAVE_RAW_DECL_SETLOCALE +_GL_WARN_ON_USE (setlocale, "setlocale works differently on native Windows - " + "use gnulib module setlocale for portability"); +# endif +#endif + +#if @GNULIB_DUPLOCALE@ +# if @REPLACE_DUPLOCALE@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef duplocale +# define duplocale rpl_duplocale +# endif +_GL_FUNCDECL_RPL (duplocale, locale_t, (locale_t locale) _GL_ARG_NONNULL ((1))); +_GL_CXXALIAS_RPL (duplocale, locale_t, (locale_t locale)); +# else +# if @HAVE_DUPLOCALE@ +_GL_CXXALIAS_SYS (duplocale, locale_t, (locale_t locale)); +# endif +# endif +# if @HAVE_DUPLOCALE@ +_GL_CXXALIASWARN (duplocale); +# endif +#elif defined GNULIB_POSIXCHECK +# undef duplocale +# if HAVE_RAW_DECL_DUPLOCALE +_GL_WARN_ON_USE (duplocale, "duplocale is buggy on some glibc systems - " + "use gnulib module duplocale for portability"); +# endif +#endif + +#endif /* _@GUARD_PREFIX@_LOCALE_H */ +#endif /* _@GUARD_PREFIX@_LOCALE_H */ diff --git a/gl/tests/localename.c b/gl/tests/localename.c new file mode 100644 index 0000000000..4803eaffe4 --- /dev/null +++ b/gl/tests/localename.c @@ -0,0 +1,2858 @@ +/* Determine name of the currently selected locale. + Copyright (C) 1995-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Ulrich Drepper <drepper@gnu.org>, 1995. */ +/* Native Windows code written by Tor Lillqvist <tml@iki.fi>. */ +/* Mac OS X code written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#ifdef IN_LIBINTL +# include "gettextP.h" +#else +# include "localename.h" +#endif + +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> +#include <locale.h> +#include <string.h> + +#if HAVE_USELOCALE +/* Mac OS X 10.5 defines the locale_t type in <xlocale.h>. */ +# if defined __APPLE__ && defined __MACH__ +# include <xlocale.h> +# endif +# include <langinfo.h> +# if !defined IN_LIBINTL +# include "glthread/lock.h" +# endif +#endif + +#if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE +# include <CoreFoundation/CFString.h> +# if HAVE_CFLOCALECOPYCURRENT +# include <CoreFoundation/CFLocale.h> +# elif HAVE_CFPREFERENCESCOPYAPPVALUE +# include <CoreFoundation/CFPreferences.h> +# endif +#endif + +#if defined _WIN32 || defined __WIN32__ +# define WINDOWS_NATIVE +#endif + +#if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Native Windows or Cygwin */ +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +/* List of language codes, sorted by value: + 0x01 LANG_ARABIC + 0x02 LANG_BULGARIAN + 0x03 LANG_CATALAN + 0x04 LANG_CHINESE + 0x05 LANG_CZECH + 0x06 LANG_DANISH + 0x07 LANG_GERMAN + 0x08 LANG_GREEK + 0x09 LANG_ENGLISH + 0x0a LANG_SPANISH + 0x0b LANG_FINNISH + 0x0c LANG_FRENCH + 0x0d LANG_HEBREW + 0x0e LANG_HUNGARIAN + 0x0f LANG_ICELANDIC + 0x10 LANG_ITALIAN + 0x11 LANG_JAPANESE + 0x12 LANG_KOREAN + 0x13 LANG_DUTCH + 0x14 LANG_NORWEGIAN + 0x15 LANG_POLISH + 0x16 LANG_PORTUGUESE + 0x17 LANG_ROMANSH + 0x18 LANG_ROMANIAN + 0x19 LANG_RUSSIAN + 0x1a LANG_CROATIAN == LANG_SERBIAN + 0x1b LANG_SLOVAK + 0x1c LANG_ALBANIAN + 0x1d LANG_SWEDISH + 0x1e LANG_THAI + 0x1f LANG_TURKISH + 0x20 LANG_URDU + 0x21 LANG_INDONESIAN + 0x22 LANG_UKRAINIAN + 0x23 LANG_BELARUSIAN + 0x24 LANG_SLOVENIAN + 0x25 LANG_ESTONIAN + 0x26 LANG_LATVIAN + 0x27 LANG_LITHUANIAN + 0x28 LANG_TAJIK + 0x29 LANG_FARSI + 0x2a LANG_VIETNAMESE + 0x2b LANG_ARMENIAN + 0x2c LANG_AZERI + 0x2d LANG_BASQUE + 0x2e LANG_SORBIAN + 0x2f LANG_MACEDONIAN + 0x30 LANG_SUTU + 0x31 LANG_TSONGA + 0x32 LANG_TSWANA + 0x33 LANG_VENDA + 0x34 LANG_XHOSA + 0x35 LANG_ZULU + 0x36 LANG_AFRIKAANS + 0x37 LANG_GEORGIAN + 0x38 LANG_FAEROESE + 0x39 LANG_HINDI + 0x3a LANG_MALTESE + 0x3b LANG_SAMI + 0x3c LANG_GAELIC + 0x3d LANG_YIDDISH + 0x3e LANG_MALAY + 0x3f LANG_KAZAK + 0x40 LANG_KYRGYZ + 0x41 LANG_SWAHILI + 0x42 LANG_TURKMEN + 0x43 LANG_UZBEK + 0x44 LANG_TATAR + 0x45 LANG_BENGALI + 0x46 LANG_PUNJABI + 0x47 LANG_GUJARATI + 0x48 LANG_ORIYA + 0x49 LANG_TAMIL + 0x4a LANG_TELUGU + 0x4b LANG_KANNADA + 0x4c LANG_MALAYALAM + 0x4d LANG_ASSAMESE + 0x4e LANG_MARATHI + 0x4f LANG_SANSKRIT + 0x50 LANG_MONGOLIAN + 0x51 LANG_TIBETAN + 0x52 LANG_WELSH + 0x53 LANG_CAMBODIAN + 0x54 LANG_LAO + 0x55 LANG_BURMESE + 0x56 LANG_GALICIAN + 0x57 LANG_KONKANI + 0x58 LANG_MANIPURI + 0x59 LANG_SINDHI + 0x5a LANG_SYRIAC + 0x5b LANG_SINHALESE + 0x5c LANG_CHEROKEE + 0x5d LANG_INUKTITUT + 0x5e LANG_AMHARIC + 0x5f LANG_TAMAZIGHT + 0x60 LANG_KASHMIRI + 0x61 LANG_NEPALI + 0x62 LANG_FRISIAN + 0x63 LANG_PASHTO + 0x64 LANG_TAGALOG + 0x65 LANG_DIVEHI + 0x66 LANG_EDO + 0x67 LANG_FULFULDE + 0x68 LANG_HAUSA + 0x69 LANG_IBIBIO + 0x6a LANG_YORUBA + 0x6d LANG_BASHKIR + 0x6e LANG_LUXEMBOURGISH + 0x6f LANG_GREENLANDIC + 0x70 LANG_IGBO + 0x71 LANG_KANURI + 0x72 LANG_OROMO + 0x73 LANG_TIGRINYA + 0x74 LANG_GUARANI + 0x75 LANG_HAWAIIAN + 0x76 LANG_LATIN + 0x77 LANG_SOMALI + 0x78 LANG_YI + 0x79 LANG_PAPIAMENTU + 0x7a LANG_MAPUDUNGUN + 0x7c LANG_MOHAWK + 0x7e LANG_BRETON + 0x82 LANG_OCCITAN + 0x83 LANG_CORSICAN + 0x84 LANG_ALSATIAN + 0x85 LANG_YAKUT + 0x86 LANG_KICHE + 0x87 LANG_KINYARWANDA + 0x88 LANG_WOLOF + 0x8c LANG_DARI + 0x91 LANG_SCOTTISH_GAELIC +*/ +/* Mingw headers don't have latest language and sublanguage codes. */ +# ifndef LANG_AFRIKAANS +# define LANG_AFRIKAANS 0x36 +# endif +# ifndef LANG_ALBANIAN +# define LANG_ALBANIAN 0x1c +# endif +# ifndef LANG_ALSATIAN +# define LANG_ALSATIAN 0x84 +# endif +# ifndef LANG_AMHARIC +# define LANG_AMHARIC 0x5e +# endif +# ifndef LANG_ARABIC +# define LANG_ARABIC 0x01 +# endif +# ifndef LANG_ARMENIAN +# define LANG_ARMENIAN 0x2b +# endif +# ifndef LANG_ASSAMESE +# define LANG_ASSAMESE 0x4d +# endif +# ifndef LANG_AZERI +# define LANG_AZERI 0x2c +# endif +# ifndef LANG_BASHKIR +# define LANG_BASHKIR 0x6d +# endif +# ifndef LANG_BASQUE +# define LANG_BASQUE 0x2d +# endif +# ifndef LANG_BELARUSIAN +# define LANG_BELARUSIAN 0x23 +# endif +# ifndef LANG_BENGALI +# define LANG_BENGALI 0x45 +# endif +# ifndef LANG_BRETON +# define LANG_BRETON 0x7e +# endif +# ifndef LANG_BURMESE +# define LANG_BURMESE 0x55 +# endif +# ifndef LANG_CAMBODIAN +# define LANG_CAMBODIAN 0x53 +# endif +# ifndef LANG_CATALAN +# define LANG_CATALAN 0x03 +# endif +# ifndef LANG_CHEROKEE +# define LANG_CHEROKEE 0x5c +# endif +# ifndef LANG_CORSICAN +# define LANG_CORSICAN 0x83 +# endif +# ifndef LANG_DARI +# define LANG_DARI 0x8c +# endif +# ifndef LANG_DIVEHI +# define LANG_DIVEHI 0x65 +# endif +# ifndef LANG_EDO +# define LANG_EDO 0x66 +# endif +# ifndef LANG_ESTONIAN +# define LANG_ESTONIAN 0x25 +# endif +# ifndef LANG_FAEROESE +# define LANG_FAEROESE 0x38 +# endif +# ifndef LANG_FARSI +# define LANG_FARSI 0x29 +# endif +# ifndef LANG_FRISIAN +# define LANG_FRISIAN 0x62 +# endif +# ifndef LANG_FULFULDE +# define LANG_FULFULDE 0x67 +# endif +# ifndef LANG_GAELIC +# define LANG_GAELIC 0x3c +# endif +# ifndef LANG_GALICIAN +# define LANG_GALICIAN 0x56 +# endif +# ifndef LANG_GEORGIAN +# define LANG_GEORGIAN 0x37 +# endif +# ifndef LANG_GREENLANDIC +# define LANG_GREENLANDIC 0x6f +# endif +# ifndef LANG_GUARANI +# define LANG_GUARANI 0x74 +# endif +# ifndef LANG_GUJARATI +# define LANG_GUJARATI 0x47 +# endif +# ifndef LANG_HAUSA +# define LANG_HAUSA 0x68 +# endif +# ifndef LANG_HAWAIIAN +# define LANG_HAWAIIAN 0x75 +# endif +# ifndef LANG_HEBREW +# define LANG_HEBREW 0x0d +# endif +# ifndef LANG_HINDI +# define LANG_HINDI 0x39 +# endif +# ifndef LANG_IBIBIO +# define LANG_IBIBIO 0x69 +# endif +# ifndef LANG_IGBO +# define LANG_IGBO 0x70 +# endif +# ifndef LANG_INDONESIAN +# define LANG_INDONESIAN 0x21 +# endif +# ifndef LANG_INUKTITUT +# define LANG_INUKTITUT 0x5d +# endif +# ifndef LANG_KANNADA +# define LANG_KANNADA 0x4b +# endif +# ifndef LANG_KANURI +# define LANG_KANURI 0x71 +# endif +# ifndef LANG_KASHMIRI +# define LANG_KASHMIRI 0x60 +# endif +# ifndef LANG_KAZAK +# define LANG_KAZAK 0x3f +# endif +# ifndef LANG_KICHE +# define LANG_KICHE 0x86 +# endif +# ifndef LANG_KINYARWANDA +# define LANG_KINYARWANDA 0x87 +# endif +# ifndef LANG_KONKANI +# define LANG_KONKANI 0x57 +# endif +# ifndef LANG_KYRGYZ +# define LANG_KYRGYZ 0x40 +# endif +# ifndef LANG_LAO +# define LANG_LAO 0x54 +# endif +# ifndef LANG_LATIN +# define LANG_LATIN 0x76 +# endif +# ifndef LANG_LATVIAN +# define LANG_LATVIAN 0x26 +# endif +# ifndef LANG_LITHUANIAN +# define LANG_LITHUANIAN 0x27 +# endif +# ifndef LANG_LUXEMBOURGISH +# define LANG_LUXEMBOURGISH 0x6e +# endif +# ifndef LANG_MACEDONIAN +# define LANG_MACEDONIAN 0x2f +# endif +# ifndef LANG_MALAY +# define LANG_MALAY 0x3e +# endif +# ifndef LANG_MALAYALAM +# define LANG_MALAYALAM 0x4c +# endif +# ifndef LANG_MALTESE +# define LANG_MALTESE 0x3a +# endif +# ifndef LANG_MANIPURI +# define LANG_MANIPURI 0x58 +# endif +# ifndef LANG_MAORI +# define LANG_MAORI 0x81 +# endif +# ifndef LANG_MAPUDUNGUN +# define LANG_MAPUDUNGUN 0x7a +# endif +# ifndef LANG_MARATHI +# define LANG_MARATHI 0x4e +# endif +# ifndef LANG_MOHAWK +# define LANG_MOHAWK 0x7c +# endif +# ifndef LANG_MONGOLIAN +# define LANG_MONGOLIAN 0x50 +# endif +# ifndef LANG_NEPALI +# define LANG_NEPALI 0x61 +# endif +# ifndef LANG_OCCITAN +# define LANG_OCCITAN 0x82 +# endif +# ifndef LANG_ORIYA +# define LANG_ORIYA 0x48 +# endif +# ifndef LANG_OROMO +# define LANG_OROMO 0x72 +# endif +# ifndef LANG_PAPIAMENTU +# define LANG_PAPIAMENTU 0x79 +# endif +# ifndef LANG_PASHTO +# define LANG_PASHTO 0x63 +# endif +# ifndef LANG_PUNJABI +# define LANG_PUNJABI 0x46 +# endif +# ifndef LANG_QUECHUA +# define LANG_QUECHUA 0x6b +# endif +# ifndef LANG_ROMANSH +# define LANG_ROMANSH 0x17 +# endif +# ifndef LANG_SAMI +# define LANG_SAMI 0x3b +# endif +# ifndef LANG_SANSKRIT +# define LANG_SANSKRIT 0x4f +# endif +# ifndef LANG_SCOTTISH_GAELIC +# define LANG_SCOTTISH_GAELIC 0x91 +# endif +# ifndef LANG_SERBIAN +# define LANG_SERBIAN 0x1a +# endif +# ifndef LANG_SINDHI +# define LANG_SINDHI 0x59 +# endif +# ifndef LANG_SINHALESE +# define LANG_SINHALESE 0x5b +# endif +# ifndef LANG_SLOVAK +# define LANG_SLOVAK 0x1b +# endif +# ifndef LANG_SOMALI +# define LANG_SOMALI 0x77 +# endif +# ifndef LANG_SORBIAN +# define LANG_SORBIAN 0x2e +# endif +# ifndef LANG_SOTHO +# define LANG_SOTHO 0x6c +# endif +# ifndef LANG_SUTU +# define LANG_SUTU 0x30 +# endif +# ifndef LANG_SWAHILI +# define LANG_SWAHILI 0x41 +# endif +# ifndef LANG_SYRIAC +# define LANG_SYRIAC 0x5a +# endif +# ifndef LANG_TAGALOG +# define LANG_TAGALOG 0x64 +# endif +# ifndef LANG_TAJIK +# define LANG_TAJIK 0x28 +# endif +# ifndef LANG_TAMAZIGHT +# define LANG_TAMAZIGHT 0x5f +# endif +# ifndef LANG_TAMIL +# define LANG_TAMIL 0x49 +# endif +# ifndef LANG_TATAR +# define LANG_TATAR 0x44 +# endif +# ifndef LANG_TELUGU +# define LANG_TELUGU 0x4a +# endif +# ifndef LANG_THAI +# define LANG_THAI 0x1e +# endif +# ifndef LANG_TIBETAN +# define LANG_TIBETAN 0x51 +# endif +# ifndef LANG_TIGRINYA +# define LANG_TIGRINYA 0x73 +# endif +# ifndef LANG_TSONGA +# define LANG_TSONGA 0x31 +# endif +# ifndef LANG_TSWANA +# define LANG_TSWANA 0x32 +# endif +# ifndef LANG_TURKMEN +# define LANG_TURKMEN 0x42 +# endif +# ifndef LANG_UIGHUR +# define LANG_UIGHUR 0x80 +# endif +# ifndef LANG_UKRAINIAN +# define LANG_UKRAINIAN 0x22 +# endif +# ifndef LANG_URDU +# define LANG_URDU 0x20 +# endif +# ifndef LANG_UZBEK +# define LANG_UZBEK 0x43 +# endif +# ifndef LANG_VENDA +# define LANG_VENDA 0x33 +# endif +# ifndef LANG_VIETNAMESE +# define LANG_VIETNAMESE 0x2a +# endif +# ifndef LANG_WELSH +# define LANG_WELSH 0x52 +# endif +# ifndef LANG_WOLOF +# define LANG_WOLOF 0x88 +# endif +# ifndef LANG_XHOSA +# define LANG_XHOSA 0x34 +# endif +# ifndef LANG_YAKUT +# define LANG_YAKUT 0x85 +# endif +# ifndef LANG_YI +# define LANG_YI 0x78 +# endif +# ifndef LANG_YIDDISH +# define LANG_YIDDISH 0x3d +# endif +# ifndef LANG_YORUBA +# define LANG_YORUBA 0x6a +# endif +# ifndef LANG_ZULU +# define LANG_ZULU 0x35 +# endif +# ifndef SUBLANG_AFRIKAANS_SOUTH_AFRICA +# define SUBLANG_AFRIKAANS_SOUTH_AFRICA 0x01 +# endif +# ifndef SUBLANG_ALBANIAN_ALBANIA +# define SUBLANG_ALBANIAN_ALBANIA 0x01 +# endif +# ifndef SUBLANG_ALSATIAN_FRANCE +# define SUBLANG_ALSATIAN_FRANCE 0x01 +# endif +# ifndef SUBLANG_AMHARIC_ETHIOPIA +# define SUBLANG_AMHARIC_ETHIOPIA 0x01 +# endif +# ifndef SUBLANG_ARABIC_SAUDI_ARABIA +# define SUBLANG_ARABIC_SAUDI_ARABIA 0x01 +# endif +# ifndef SUBLANG_ARABIC_IRAQ +# define SUBLANG_ARABIC_IRAQ 0x02 +# endif +# ifndef SUBLANG_ARABIC_EGYPT +# define SUBLANG_ARABIC_EGYPT 0x03 +# endif +# ifndef SUBLANG_ARABIC_LIBYA +# define SUBLANG_ARABIC_LIBYA 0x04 +# endif +# ifndef SUBLANG_ARABIC_ALGERIA +# define SUBLANG_ARABIC_ALGERIA 0x05 +# endif +# ifndef SUBLANG_ARABIC_MOROCCO +# define SUBLANG_ARABIC_MOROCCO 0x06 +# endif +# ifndef SUBLANG_ARABIC_TUNISIA +# define SUBLANG_ARABIC_TUNISIA 0x07 +# endif +# ifndef SUBLANG_ARABIC_OMAN +# define SUBLANG_ARABIC_OMAN 0x08 +# endif +# ifndef SUBLANG_ARABIC_YEMEN +# define SUBLANG_ARABIC_YEMEN 0x09 +# endif +# ifndef SUBLANG_ARABIC_SYRIA +# define SUBLANG_ARABIC_SYRIA 0x0a +# endif +# ifndef SUBLANG_ARABIC_JORDAN +# define SUBLANG_ARABIC_JORDAN 0x0b +# endif +# ifndef SUBLANG_ARABIC_LEBANON +# define SUBLANG_ARABIC_LEBANON 0x0c +# endif +# ifndef SUBLANG_ARABIC_KUWAIT +# define SUBLANG_ARABIC_KUWAIT 0x0d +# endif +# ifndef SUBLANG_ARABIC_UAE +# define SUBLANG_ARABIC_UAE 0x0e +# endif +# ifndef SUBLANG_ARABIC_BAHRAIN +# define SUBLANG_ARABIC_BAHRAIN 0x0f +# endif +# ifndef SUBLANG_ARABIC_QATAR +# define SUBLANG_ARABIC_QATAR 0x10 +# endif +# ifndef SUBLANG_ARMENIAN_ARMENIA +# define SUBLANG_ARMENIAN_ARMENIA 0x01 +# endif +# ifndef SUBLANG_ASSAMESE_INDIA +# define SUBLANG_ASSAMESE_INDIA 0x01 +# endif +# ifndef SUBLANG_AZERI_LATIN +# define SUBLANG_AZERI_LATIN 0x01 +# endif +# ifndef SUBLANG_AZERI_CYRILLIC +# define SUBLANG_AZERI_CYRILLIC 0x02 +# endif +# ifndef SUBLANG_BASHKIR_RUSSIA +# define SUBLANG_BASHKIR_RUSSIA 0x01 +# endif +# ifndef SUBLANG_BASQUE_BASQUE +# define SUBLANG_BASQUE_BASQUE 0x01 +# endif +# ifndef SUBLANG_BELARUSIAN_BELARUS +# define SUBLANG_BELARUSIAN_BELARUS 0x01 +# endif +# ifndef SUBLANG_BENGALI_INDIA +# define SUBLANG_BENGALI_INDIA 0x01 +# endif +# ifndef SUBLANG_BENGALI_BANGLADESH +# define SUBLANG_BENGALI_BANGLADESH 0x02 +# endif +# ifndef SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_LATIN +# define SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_LATIN 0x05 +# endif +# ifndef SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC +# define SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC 0x08 +# endif +# ifndef SUBLANG_BRETON_FRANCE +# define SUBLANG_BRETON_FRANCE 0x01 +# endif +# ifndef SUBLANG_BULGARIAN_BULGARIA +# define SUBLANG_BULGARIAN_BULGARIA 0x01 +# endif +# ifndef SUBLANG_CAMBODIAN_CAMBODIA +# define SUBLANG_CAMBODIAN_CAMBODIA 0x01 +# endif +# ifndef SUBLANG_CATALAN_SPAIN +# define SUBLANG_CATALAN_SPAIN 0x01 +# endif +# ifndef SUBLANG_CORSICAN_FRANCE +# define SUBLANG_CORSICAN_FRANCE 0x01 +# endif +# ifndef SUBLANG_CROATIAN_CROATIA +# define SUBLANG_CROATIAN_CROATIA 0x01 +# endif +# ifndef SUBLANG_CROATIAN_BOSNIA_HERZEGOVINA_LATIN +# define SUBLANG_CROATIAN_BOSNIA_HERZEGOVINA_LATIN 0x04 +# endif +# ifndef SUBLANG_CHINESE_MACAU +# define SUBLANG_CHINESE_MACAU 0x05 +# endif +# ifndef SUBLANG_CZECH_CZECH_REPUBLIC +# define SUBLANG_CZECH_CZECH_REPUBLIC 0x01 +# endif +# ifndef SUBLANG_DANISH_DENMARK +# define SUBLANG_DANISH_DENMARK 0x01 +# endif +# ifndef SUBLANG_DARI_AFGHANISTAN +# define SUBLANG_DARI_AFGHANISTAN 0x01 +# endif +# ifndef SUBLANG_DIVEHI_MALDIVES +# define SUBLANG_DIVEHI_MALDIVES 0x01 +# endif +# ifndef SUBLANG_DUTCH_SURINAM +# define SUBLANG_DUTCH_SURINAM 0x03 +# endif +# ifndef SUBLANG_ENGLISH_SOUTH_AFRICA +# define SUBLANG_ENGLISH_SOUTH_AFRICA 0x07 +# endif +# ifndef SUBLANG_ENGLISH_JAMAICA +# define SUBLANG_ENGLISH_JAMAICA 0x08 +# endif +# ifndef SUBLANG_ENGLISH_CARIBBEAN +# define SUBLANG_ENGLISH_CARIBBEAN 0x09 +# endif +# ifndef SUBLANG_ENGLISH_BELIZE +# define SUBLANG_ENGLISH_BELIZE 0x0a +# endif +# ifndef SUBLANG_ENGLISH_TRINIDAD +# define SUBLANG_ENGLISH_TRINIDAD 0x0b +# endif +# ifndef SUBLANG_ENGLISH_ZIMBABWE +# define SUBLANG_ENGLISH_ZIMBABWE 0x0c +# endif +# ifndef SUBLANG_ENGLISH_PHILIPPINES +# define SUBLANG_ENGLISH_PHILIPPINES 0x0d +# endif +# ifndef SUBLANG_ENGLISH_INDONESIA +# define SUBLANG_ENGLISH_INDONESIA 0x0e +# endif +# ifndef SUBLANG_ENGLISH_HONGKONG +# define SUBLANG_ENGLISH_HONGKONG 0x0f +# endif +# ifndef SUBLANG_ENGLISH_INDIA +# define SUBLANG_ENGLISH_INDIA 0x10 +# endif +# ifndef SUBLANG_ENGLISH_MALAYSIA +# define SUBLANG_ENGLISH_MALAYSIA 0x11 +# endif +# ifndef SUBLANG_ENGLISH_SINGAPORE +# define SUBLANG_ENGLISH_SINGAPORE 0x12 +# endif +# ifndef SUBLANG_ESTONIAN_ESTONIA +# define SUBLANG_ESTONIAN_ESTONIA 0x01 +# endif +# ifndef SUBLANG_FAEROESE_FAROE_ISLANDS +# define SUBLANG_FAEROESE_FAROE_ISLANDS 0x01 +# endif +# ifndef SUBLANG_FARSI_IRAN +# define SUBLANG_FARSI_IRAN 0x01 +# endif +# ifndef SUBLANG_FINNISH_FINLAND +# define SUBLANG_FINNISH_FINLAND 0x01 +# endif +# ifndef SUBLANG_FRENCH_LUXEMBOURG +# define SUBLANG_FRENCH_LUXEMBOURG 0x05 +# endif +# ifndef SUBLANG_FRENCH_MONACO +# define SUBLANG_FRENCH_MONACO 0x06 +# endif +# ifndef SUBLANG_FRENCH_WESTINDIES +# define SUBLANG_FRENCH_WESTINDIES 0x07 +# endif +# ifndef SUBLANG_FRENCH_REUNION +# define SUBLANG_FRENCH_REUNION 0x08 +# endif +# ifndef SUBLANG_FRENCH_CONGO +# define SUBLANG_FRENCH_CONGO 0x09 +# endif +# ifndef SUBLANG_FRENCH_SENEGAL +# define SUBLANG_FRENCH_SENEGAL 0x0a +# endif +# ifndef SUBLANG_FRENCH_CAMEROON +# define SUBLANG_FRENCH_CAMEROON 0x0b +# endif +# ifndef SUBLANG_FRENCH_COTEDIVOIRE +# define SUBLANG_FRENCH_COTEDIVOIRE 0x0c +# endif +# ifndef SUBLANG_FRENCH_MALI +# define SUBLANG_FRENCH_MALI 0x0d +# endif +# ifndef SUBLANG_FRENCH_MOROCCO +# define SUBLANG_FRENCH_MOROCCO 0x0e +# endif +# ifndef SUBLANG_FRENCH_HAITI +# define SUBLANG_FRENCH_HAITI 0x0f +# endif +# ifndef SUBLANG_FRISIAN_NETHERLANDS +# define SUBLANG_FRISIAN_NETHERLANDS 0x01 +# endif +# ifndef SUBLANG_GALICIAN_SPAIN +# define SUBLANG_GALICIAN_SPAIN 0x01 +# endif +# ifndef SUBLANG_GEORGIAN_GEORGIA +# define SUBLANG_GEORGIAN_GEORGIA 0x01 +# endif +# ifndef SUBLANG_GERMAN_LUXEMBOURG +# define SUBLANG_GERMAN_LUXEMBOURG 0x04 +# endif +# ifndef SUBLANG_GERMAN_LIECHTENSTEIN +# define SUBLANG_GERMAN_LIECHTENSTEIN 0x05 +# endif +# ifndef SUBLANG_GREEK_GREECE +# define SUBLANG_GREEK_GREECE 0x01 +# endif +# ifndef SUBLANG_GREENLANDIC_GREENLAND +# define SUBLANG_GREENLANDIC_GREENLAND 0x01 +# endif +# ifndef SUBLANG_GUJARATI_INDIA +# define SUBLANG_GUJARATI_INDIA 0x01 +# endif +# ifndef SUBLANG_HAUSA_NIGERIA_LATIN +# define SUBLANG_HAUSA_NIGERIA_LATIN 0x01 +# endif +# ifndef SUBLANG_HEBREW_ISRAEL +# define SUBLANG_HEBREW_ISRAEL 0x01 +# endif +# ifndef SUBLANG_HINDI_INDIA +# define SUBLANG_HINDI_INDIA 0x01 +# endif +# ifndef SUBLANG_HUNGARIAN_HUNGARY +# define SUBLANG_HUNGARIAN_HUNGARY 0x01 +# endif +# ifndef SUBLANG_ICELANDIC_ICELAND +# define SUBLANG_ICELANDIC_ICELAND 0x01 +# endif +# ifndef SUBLANG_IGBO_NIGERIA +# define SUBLANG_IGBO_NIGERIA 0x01 +# endif +# ifndef SUBLANG_INDONESIAN_INDONESIA +# define SUBLANG_INDONESIAN_INDONESIA 0x01 +# endif +# ifndef SUBLANG_INUKTITUT_CANADA +# define SUBLANG_INUKTITUT_CANADA 0x01 +# endif +# undef SUBLANG_INUKTITUT_CANADA_LATIN +# define SUBLANG_INUKTITUT_CANADA_LATIN 0x02 +# undef SUBLANG_IRISH_IRELAND +# define SUBLANG_IRISH_IRELAND 0x02 +# ifndef SUBLANG_JAPANESE_JAPAN +# define SUBLANG_JAPANESE_JAPAN 0x01 +# endif +# ifndef SUBLANG_KANNADA_INDIA +# define SUBLANG_KANNADA_INDIA 0x01 +# endif +# ifndef SUBLANG_KASHMIRI_INDIA +# define SUBLANG_KASHMIRI_INDIA 0x02 +# endif +# ifndef SUBLANG_KAZAK_KAZAKHSTAN +# define SUBLANG_KAZAK_KAZAKHSTAN 0x01 +# endif +# ifndef SUBLANG_KICHE_GUATEMALA +# define SUBLANG_KICHE_GUATEMALA 0x01 +# endif +# ifndef SUBLANG_KINYARWANDA_RWANDA +# define SUBLANG_KINYARWANDA_RWANDA 0x01 +# endif +# ifndef SUBLANG_KONKANI_INDIA +# define SUBLANG_KONKANI_INDIA 0x01 +# endif +# ifndef SUBLANG_KYRGYZ_KYRGYZSTAN +# define SUBLANG_KYRGYZ_KYRGYZSTAN 0x01 +# endif +# ifndef SUBLANG_LAO_LAOS +# define SUBLANG_LAO_LAOS 0x01 +# endif +# ifndef SUBLANG_LATVIAN_LATVIA +# define SUBLANG_LATVIAN_LATVIA 0x01 +# endif +# ifndef SUBLANG_LITHUANIAN_LITHUANIA +# define SUBLANG_LITHUANIAN_LITHUANIA 0x01 +# endif +# undef SUBLANG_LOWER_SORBIAN_GERMANY +# define SUBLANG_LOWER_SORBIAN_GERMANY 0x02 +# ifndef SUBLANG_LUXEMBOURGISH_LUXEMBOURG +# define SUBLANG_LUXEMBOURGISH_LUXEMBOURG 0x01 +# endif +# ifndef SUBLANG_MACEDONIAN_MACEDONIA +# define SUBLANG_MACEDONIAN_MACEDONIA 0x01 +# endif +# ifndef SUBLANG_MALAY_MALAYSIA +# define SUBLANG_MALAY_MALAYSIA 0x01 +# endif +# ifndef SUBLANG_MALAY_BRUNEI_DARUSSALAM +# define SUBLANG_MALAY_BRUNEI_DARUSSALAM 0x02 +# endif +# ifndef SUBLANG_MALAYALAM_INDIA +# define SUBLANG_MALAYALAM_INDIA 0x01 +# endif +# ifndef SUBLANG_MALTESE_MALTA +# define SUBLANG_MALTESE_MALTA 0x01 +# endif +# ifndef SUBLANG_MAORI_NEW_ZEALAND +# define SUBLANG_MAORI_NEW_ZEALAND 0x01 +# endif +# ifndef SUBLANG_MAPUDUNGUN_CHILE +# define SUBLANG_MAPUDUNGUN_CHILE 0x01 +# endif +# ifndef SUBLANG_MARATHI_INDIA +# define SUBLANG_MARATHI_INDIA 0x01 +# endif +# ifndef SUBLANG_MOHAWK_CANADA +# define SUBLANG_MOHAWK_CANADA 0x01 +# endif +# ifndef SUBLANG_MONGOLIAN_CYRILLIC_MONGOLIA +# define SUBLANG_MONGOLIAN_CYRILLIC_MONGOLIA 0x01 +# endif +# ifndef SUBLANG_MONGOLIAN_PRC +# define SUBLANG_MONGOLIAN_PRC 0x02 +# endif +# ifndef SUBLANG_NEPALI_NEPAL +# define SUBLANG_NEPALI_NEPAL 0x01 +# endif +# ifndef SUBLANG_NEPALI_INDIA +# define SUBLANG_NEPALI_INDIA 0x02 +# endif +# ifndef SUBLANG_OCCITAN_FRANCE +# define SUBLANG_OCCITAN_FRANCE 0x01 +# endif +# ifndef SUBLANG_ORIYA_INDIA +# define SUBLANG_ORIYA_INDIA 0x01 +# endif +# ifndef SUBLANG_PASHTO_AFGHANISTAN +# define SUBLANG_PASHTO_AFGHANISTAN 0x01 +# endif +# ifndef SUBLANG_POLISH_POLAND +# define SUBLANG_POLISH_POLAND 0x01 +# endif +# ifndef SUBLANG_PUNJABI_INDIA +# define SUBLANG_PUNJABI_INDIA 0x01 +# endif +# ifndef SUBLANG_PUNJABI_PAKISTAN +# define SUBLANG_PUNJABI_PAKISTAN 0x02 +# endif +# ifndef SUBLANG_QUECHUA_BOLIVIA +# define SUBLANG_QUECHUA_BOLIVIA 0x01 +# endif +# ifndef SUBLANG_QUECHUA_ECUADOR +# define SUBLANG_QUECHUA_ECUADOR 0x02 +# endif +# ifndef SUBLANG_QUECHUA_PERU +# define SUBLANG_QUECHUA_PERU 0x03 +# endif +# ifndef SUBLANG_ROMANIAN_ROMANIA +# define SUBLANG_ROMANIAN_ROMANIA 0x01 +# endif +# ifndef SUBLANG_ROMANIAN_MOLDOVA +# define SUBLANG_ROMANIAN_MOLDOVA 0x02 +# endif +# ifndef SUBLANG_ROMANSH_SWITZERLAND +# define SUBLANG_ROMANSH_SWITZERLAND 0x01 +# endif +# ifndef SUBLANG_RUSSIAN_RUSSIA +# define SUBLANG_RUSSIAN_RUSSIA 0x01 +# endif +# ifndef SUBLANG_RUSSIAN_MOLDAVIA +# define SUBLANG_RUSSIAN_MOLDAVIA 0x02 +# endif +# ifndef SUBLANG_SAMI_NORTHERN_NORWAY +# define SUBLANG_SAMI_NORTHERN_NORWAY 0x01 +# endif +# ifndef SUBLANG_SAMI_NORTHERN_SWEDEN +# define SUBLANG_SAMI_NORTHERN_SWEDEN 0x02 +# endif +# ifndef SUBLANG_SAMI_NORTHERN_FINLAND +# define SUBLANG_SAMI_NORTHERN_FINLAND 0x03 +# endif +# ifndef SUBLANG_SAMI_LULE_NORWAY +# define SUBLANG_SAMI_LULE_NORWAY 0x04 +# endif +# ifndef SUBLANG_SAMI_LULE_SWEDEN +# define SUBLANG_SAMI_LULE_SWEDEN 0x05 +# endif +# ifndef SUBLANG_SAMI_SOUTHERN_NORWAY +# define SUBLANG_SAMI_SOUTHERN_NORWAY 0x06 +# endif +# ifndef SUBLANG_SAMI_SOUTHERN_SWEDEN +# define SUBLANG_SAMI_SOUTHERN_SWEDEN 0x07 +# endif +# undef SUBLANG_SAMI_SKOLT_FINLAND +# define SUBLANG_SAMI_SKOLT_FINLAND 0x08 +# undef SUBLANG_SAMI_INARI_FINLAND +# define SUBLANG_SAMI_INARI_FINLAND 0x09 +# ifndef SUBLANG_SANSKRIT_INDIA +# define SUBLANG_SANSKRIT_INDIA 0x01 +# endif +# ifndef SUBLANG_SERBIAN_LATIN +# define SUBLANG_SERBIAN_LATIN 0x02 +# endif +# ifndef SUBLANG_SERBIAN_CYRILLIC +# define SUBLANG_SERBIAN_CYRILLIC 0x03 +# endif +# ifndef SUBLANG_SINDHI_INDIA +# define SUBLANG_SINDHI_INDIA 0x01 +# endif +# undef SUBLANG_SINDHI_PAKISTAN +# define SUBLANG_SINDHI_PAKISTAN 0x02 +# ifndef SUBLANG_SINDHI_AFGHANISTAN +# define SUBLANG_SINDHI_AFGHANISTAN 0x02 +# endif +# ifndef SUBLANG_SINHALESE_SRI_LANKA +# define SUBLANG_SINHALESE_SRI_LANKA 0x01 +# endif +# ifndef SUBLANG_SLOVAK_SLOVAKIA +# define SUBLANG_SLOVAK_SLOVAKIA 0x01 +# endif +# ifndef SUBLANG_SLOVENIAN_SLOVENIA +# define SUBLANG_SLOVENIAN_SLOVENIA 0x01 +# endif +# ifndef SUBLANG_SOTHO_SOUTH_AFRICA +# define SUBLANG_SOTHO_SOUTH_AFRICA 0x01 +# endif +# ifndef SUBLANG_SPANISH_GUATEMALA +# define SUBLANG_SPANISH_GUATEMALA 0x04 +# endif +# ifndef SUBLANG_SPANISH_COSTA_RICA +# define SUBLANG_SPANISH_COSTA_RICA 0x05 +# endif +# ifndef SUBLANG_SPANISH_PANAMA +# define SUBLANG_SPANISH_PANAMA 0x06 +# endif +# ifndef SUBLANG_SPANISH_DOMINICAN_REPUBLIC +# define SUBLANG_SPANISH_DOMINICAN_REPUBLIC 0x07 +# endif +# ifndef SUBLANG_SPANISH_VENEZUELA +# define SUBLANG_SPANISH_VENEZUELA 0x08 +# endif +# ifndef SUBLANG_SPANISH_COLOMBIA +# define SUBLANG_SPANISH_COLOMBIA 0x09 +# endif +# ifndef SUBLANG_SPANISH_PERU +# define SUBLANG_SPANISH_PERU 0x0a +# endif +# ifndef SUBLANG_SPANISH_ARGENTINA +# define SUBLANG_SPANISH_ARGENTINA 0x0b +# endif +# ifndef SUBLANG_SPANISH_ECUADOR +# define SUBLANG_SPANISH_ECUADOR 0x0c +# endif +# ifndef SUBLANG_SPANISH_CHILE +# define SUBLANG_SPANISH_CHILE 0x0d +# endif +# ifndef SUBLANG_SPANISH_URUGUAY +# define SUBLANG_SPANISH_URUGUAY 0x0e +# endif +# ifndef SUBLANG_SPANISH_PARAGUAY +# define SUBLANG_SPANISH_PARAGUAY 0x0f +# endif +# ifndef SUBLANG_SPANISH_BOLIVIA +# define SUBLANG_SPANISH_BOLIVIA 0x10 +# endif +# ifndef SUBLANG_SPANISH_EL_SALVADOR +# define SUBLANG_SPANISH_EL_SALVADOR 0x11 +# endif +# ifndef SUBLANG_SPANISH_HONDURAS +# define SUBLANG_SPANISH_HONDURAS 0x12 +# endif +# ifndef SUBLANG_SPANISH_NICARAGUA +# define SUBLANG_SPANISH_NICARAGUA 0x13 +# endif +# ifndef SUBLANG_SPANISH_PUERTO_RICO +# define SUBLANG_SPANISH_PUERTO_RICO 0x14 +# endif +# ifndef SUBLANG_SPANISH_US +# define SUBLANG_SPANISH_US 0x15 +# endif +# ifndef SUBLANG_SWAHILI_KENYA +# define SUBLANG_SWAHILI_KENYA 0x01 +# endif +# ifndef SUBLANG_SWEDISH_SWEDEN +# define SUBLANG_SWEDISH_SWEDEN 0x01 +# endif +# ifndef SUBLANG_SWEDISH_FINLAND +# define SUBLANG_SWEDISH_FINLAND 0x02 +# endif +# ifndef SUBLANG_SYRIAC_SYRIA +# define SUBLANG_SYRIAC_SYRIA 0x01 +# endif +# ifndef SUBLANG_TAGALOG_PHILIPPINES +# define SUBLANG_TAGALOG_PHILIPPINES 0x01 +# endif +# ifndef SUBLANG_TAJIK_TAJIKISTAN +# define SUBLANG_TAJIK_TAJIKISTAN 0x01 +# endif +# ifndef SUBLANG_TAMAZIGHT_ARABIC +# define SUBLANG_TAMAZIGHT_ARABIC 0x01 +# endif +# ifndef SUBLANG_TAMAZIGHT_ALGERIA_LATIN +# define SUBLANG_TAMAZIGHT_ALGERIA_LATIN 0x02 +# endif +# ifndef SUBLANG_TAMIL_INDIA +# define SUBLANG_TAMIL_INDIA 0x01 +# endif +# ifndef SUBLANG_TATAR_RUSSIA +# define SUBLANG_TATAR_RUSSIA 0x01 +# endif +# ifndef SUBLANG_TELUGU_INDIA +# define SUBLANG_TELUGU_INDIA 0x01 +# endif +# ifndef SUBLANG_THAI_THAILAND +# define SUBLANG_THAI_THAILAND 0x01 +# endif +# ifndef SUBLANG_TIBETAN_PRC +# define SUBLANG_TIBETAN_PRC 0x01 +# endif +# undef SUBLANG_TIBETAN_BHUTAN +# define SUBLANG_TIBETAN_BHUTAN 0x02 +# ifndef SUBLANG_TIGRINYA_ETHIOPIA +# define SUBLANG_TIGRINYA_ETHIOPIA 0x01 +# endif +# ifndef SUBLANG_TIGRINYA_ERITREA +# define SUBLANG_TIGRINYA_ERITREA 0x02 +# endif +# ifndef SUBLANG_TSWANA_SOUTH_AFRICA +# define SUBLANG_TSWANA_SOUTH_AFRICA 0x01 +# endif +# ifndef SUBLANG_TURKISH_TURKEY +# define SUBLANG_TURKISH_TURKEY 0x01 +# endif +# ifndef SUBLANG_TURKMEN_TURKMENISTAN +# define SUBLANG_TURKMEN_TURKMENISTAN 0x01 +# endif +# ifndef SUBLANG_UIGHUR_PRC +# define SUBLANG_UIGHUR_PRC 0x01 +# endif +# ifndef SUBLANG_UKRAINIAN_UKRAINE +# define SUBLANG_UKRAINIAN_UKRAINE 0x01 +# endif +# ifndef SUBLANG_UPPER_SORBIAN_GERMANY +# define SUBLANG_UPPER_SORBIAN_GERMANY 0x01 +# endif +# ifndef SUBLANG_URDU_PAKISTAN +# define SUBLANG_URDU_PAKISTAN 0x01 +# endif +# ifndef SUBLANG_URDU_INDIA +# define SUBLANG_URDU_INDIA 0x02 +# endif +# ifndef SUBLANG_UZBEK_LATIN +# define SUBLANG_UZBEK_LATIN 0x01 +# endif +# ifndef SUBLANG_UZBEK_CYRILLIC +# define SUBLANG_UZBEK_CYRILLIC 0x02 +# endif +# ifndef SUBLANG_VIETNAMESE_VIETNAM +# define SUBLANG_VIETNAMESE_VIETNAM 0x01 +# endif +# ifndef SUBLANG_WELSH_UNITED_KINGDOM +# define SUBLANG_WELSH_UNITED_KINGDOM 0x01 +# endif +# ifndef SUBLANG_WOLOF_SENEGAL +# define SUBLANG_WOLOF_SENEGAL 0x01 +# endif +# ifndef SUBLANG_XHOSA_SOUTH_AFRICA +# define SUBLANG_XHOSA_SOUTH_AFRICA 0x01 +# endif +# ifndef SUBLANG_YAKUT_RUSSIA +# define SUBLANG_YAKUT_RUSSIA 0x01 +# endif +# ifndef SUBLANG_YI_PRC +# define SUBLANG_YI_PRC 0x01 +# endif +# ifndef SUBLANG_YORUBA_NIGERIA +# define SUBLANG_YORUBA_NIGERIA 0x01 +# endif +# ifndef SUBLANG_ZULU_SOUTH_AFRICA +# define SUBLANG_ZULU_SOUTH_AFRICA 0x01 +# endif +/* GetLocaleInfoA operations. */ +# ifndef LOCALE_SNAME +# define LOCALE_SNAME 0x5c +# endif +#endif + + +#if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE +/* Mac OS X 10.2 or newer */ + +/* Canonicalize a Mac OS X locale name to a Unix locale name. + NAME is a sufficiently large buffer. + On input, it contains the Mac OS X locale name. + On output, it contains the Unix locale name. */ +# if !defined IN_LIBINTL +static +# endif +void +gl_locale_name_canonicalize (char *name) +{ + /* This conversion is based on a posting by + Deborah GoldSmith <goldsmit@apple.com> on 2005-03-08, + http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */ + + /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and + ISO 3166) names. Prior to Mac OS X 10.3, there is no API for doing this. + Therefore we do it ourselves, using a table based on the results of the + Mac OS X 10.3.8 function + CFLocaleCreateCanonicalLocaleIdentifierFromString(). */ + typedef struct { const char legacy[21+1]; const char unixy[5+1]; } + legacy_entry; + static const legacy_entry legacy_table[] = { + { "Afrikaans", "af" }, + { "Albanian", "sq" }, + { "Amharic", "am" }, + { "Arabic", "ar" }, + { "Armenian", "hy" }, + { "Assamese", "as" }, + { "Aymara", "ay" }, + { "Azerbaijani", "az" }, + { "Basque", "eu" }, + { "Belarusian", "be" }, + { "Belorussian", "be" }, + { "Bengali", "bn" }, + { "Brazilian Portugese", "pt_BR" }, + { "Brazilian Portuguese", "pt_BR" }, + { "Breton", "br" }, + { "Bulgarian", "bg" }, + { "Burmese", "my" }, + { "Byelorussian", "be" }, + { "Catalan", "ca" }, + { "Chewa", "ny" }, + { "Chichewa", "ny" }, + { "Chinese", "zh" }, + { "Chinese, Simplified", "zh_CN" }, + { "Chinese, Traditional", "zh_TW" }, + { "Chinese, Tradtional", "zh_TW" }, + { "Croatian", "hr" }, + { "Czech", "cs" }, + { "Danish", "da" }, + { "Dutch", "nl" }, + { "Dzongkha", "dz" }, + { "English", "en" }, + { "Esperanto", "eo" }, + { "Estonian", "et" }, + { "Faroese", "fo" }, + { "Farsi", "fa" }, + { "Finnish", "fi" }, + { "Flemish", "nl_BE" }, + { "French", "fr" }, + { "Galician", "gl" }, + { "Gallegan", "gl" }, + { "Georgian", "ka" }, + { "German", "de" }, + { "Greek", "el" }, + { "Greenlandic", "kl" }, + { "Guarani", "gn" }, + { "Gujarati", "gu" }, + { "Hawaiian", "haw" }, /* Yes, "haw", not "cpe". */ + { "Hebrew", "he" }, + { "Hindi", "hi" }, + { "Hungarian", "hu" }, + { "Icelandic", "is" }, + { "Indonesian", "id" }, + { "Inuktitut", "iu" }, + { "Irish", "ga" }, + { "Italian", "it" }, + { "Japanese", "ja" }, + { "Javanese", "jv" }, + { "Kalaallisut", "kl" }, + { "Kannada", "kn" }, + { "Kashmiri", "ks" }, + { "Kazakh", "kk" }, + { "Khmer", "km" }, + { "Kinyarwanda", "rw" }, + { "Kirghiz", "ky" }, + { "Korean", "ko" }, + { "Kurdish", "ku" }, + { "Latin", "la" }, + { "Latvian", "lv" }, + { "Lithuanian", "lt" }, + { "Macedonian", "mk" }, + { "Malagasy", "mg" }, + { "Malay", "ms" }, + { "Malayalam", "ml" }, + { "Maltese", "mt" }, + { "Manx", "gv" }, + { "Marathi", "mr" }, + { "Moldavian", "mo" }, + { "Mongolian", "mn" }, + { "Nepali", "ne" }, + { "Norwegian", "nb" }, /* Yes, "nb", not the obsolete "no". */ + { "Nyanja", "ny" }, + { "Nynorsk", "nn" }, + { "Oriya", "or" }, + { "Oromo", "om" }, + { "Panjabi", "pa" }, + { "Pashto", "ps" }, + { "Persian", "fa" }, + { "Polish", "pl" }, + { "Portuguese", "pt" }, + { "Portuguese, Brazilian", "pt_BR" }, + { "Punjabi", "pa" }, + { "Pushto", "ps" }, + { "Quechua", "qu" }, + { "Romanian", "ro" }, + { "Ruanda", "rw" }, + { "Rundi", "rn" }, + { "Russian", "ru" }, + { "Sami", "se_NO" }, /* Not just "se". */ + { "Sanskrit", "sa" }, + { "Scottish", "gd" }, + { "Serbian", "sr" }, + { "Simplified Chinese", "zh_CN" }, + { "Sindhi", "sd" }, + { "Sinhalese", "si" }, + { "Slovak", "sk" }, + { "Slovenian", "sl" }, + { "Somali", "so" }, + { "Spanish", "es" }, + { "Sundanese", "su" }, + { "Swahili", "sw" }, + { "Swedish", "sv" }, + { "Tagalog", "tl" }, + { "Tajik", "tg" }, + { "Tajiki", "tg" }, + { "Tamil", "ta" }, + { "Tatar", "tt" }, + { "Telugu", "te" }, + { "Thai", "th" }, + { "Tibetan", "bo" }, + { "Tigrinya", "ti" }, + { "Tongan", "to" }, + { "Traditional Chinese", "zh_TW" }, + { "Turkish", "tr" }, + { "Turkmen", "tk" }, + { "Uighur", "ug" }, + { "Ukrainian", "uk" }, + { "Urdu", "ur" }, + { "Uzbek", "uz" }, + { "Vietnamese", "vi" }, + { "Welsh", "cy" }, + { "Yiddish", "yi" } + }; + + /* Convert new-style locale names with language tags (ISO 639 and ISO 15924) + to Unix (ISO 639 and ISO 3166) names. */ + typedef struct { const char langtag[7+1]; const char unixy[12+1]; } + langtag_entry; + static const langtag_entry langtag_table[] = { + /* Mac OS X has "az-Arab", "az-Cyrl", "az-Latn". + The default script for az on Unix is Latin. */ + { "az-Latn", "az" }, + /* Mac OS X has "ga-dots". Does not yet exist on Unix. */ + { "ga-dots", "ga" }, + /* Mac OS X has "kk-Cyrl". Does not yet exist on Unix. */ + /* Mac OS X has "mn-Cyrl", "mn-Mong". + The default script for mn on Unix is Cyrillic. */ + { "mn-Cyrl", "mn" }, + /* Mac OS X has "ms-Arab", "ms-Latn". + The default script for ms on Unix is Latin. */ + { "ms-Latn", "ms" }, + /* Mac OS X has "tg-Cyrl". + The default script for tg on Unix is Cyrillic. */ + { "tg-Cyrl", "tg" }, + /* Mac OS X has "tk-Cyrl". Does not yet exist on Unix. */ + /* Mac OS X has "tt-Cyrl". + The default script for tt on Unix is Cyrillic. */ + { "tt-Cyrl", "tt" }, + /* Mac OS X has "zh-Hans", "zh-Hant". + Country codes are used to distinguish these on Unix. */ + { "zh-Hans", "zh_CN" }, + { "zh-Hant", "zh_TW" } + }; + + /* Convert script names (ISO 15924) to Unix conventions. + See http://www.unicode.org/iso15924/iso15924-codes.html */ + typedef struct { const char script[4+1]; const char unixy[9+1]; } + script_entry; + static const script_entry script_table[] = { + { "Arab", "arabic" }, + { "Cyrl", "cyrillic" }, + { "Mong", "mongolian" } + }; + + /* Step 1: Convert using legacy_table. */ + if (name[0] >= 'A' && name[0] <= 'Z') + { + unsigned int i1, i2; + i1 = 0; + i2 = sizeof (legacy_table) / sizeof (legacy_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if name occurs in legacy_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const legacy_entry *p = &legacy_table[i]; + if (strcmp (name, p->legacy) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name, legacy_table[i1].legacy) == 0) + { + strcpy (name, legacy_table[i1].unixy); + return; + } + } + + /* Step 2: Convert using langtag_table and script_table. */ + if (strlen (name) == 7 && name[2] == '-') + { + unsigned int i1, i2; + i1 = 0; + i2 = sizeof (langtag_table) / sizeof (langtag_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if name occurs in langtag_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const langtag_entry *p = &langtag_table[i]; + if (strcmp (name, p->langtag) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name, langtag_table[i1].langtag) == 0) + { + strcpy (name, langtag_table[i1].unixy); + return; + } + + i1 = 0; + i2 = sizeof (script_table) / sizeof (script_entry); + while (i2 - i1 > 1) + { + /* At this point we know that if (name + 3) occurs in script_table, + its index must be >= i1 and < i2. */ + unsigned int i = (i1 + i2) >> 1; + const script_entry *p = &script_table[i]; + if (strcmp (name + 3, p->script) < 0) + i2 = i; + else + i1 = i; + } + if (strcmp (name + 3, script_table[i1].script) == 0) + { + name[2] = '@'; + strcpy (name + 3, script_table[i1].unixy); + return; + } + } + + /* Step 3: Convert new-style dash to Unix underscore. */ + { + char *p; + for (p = name; *p != '\0'; p++) + if (*p == '-') + *p = '_'; + } +} + +#endif + + +#if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Native Windows or Cygwin */ + +/* Canonicalize a Windows native locale name to a Unix locale name. + NAME is a sufficiently large buffer. + On input, it contains the Windows locale name. + On output, it contains the Unix locale name. */ +# if !defined IN_LIBINTL +static +# endif +void +gl_locale_name_canonicalize (char *name) +{ + /* FIXME: This is probably incomplete: it does not handle "zh-Hans" and + "zh-Hant". */ + char *p; + + for (p = name; *p != '\0'; p++) + if (*p == '-') + { + *p = '_'; + p++; + for (; *p != '\0'; p++) + { + if (*p >= 'a' && *p <= 'z') + *p += 'A' - 'a'; + if (*p == '-') + { + *p = '\0'; + return; + } + } + return; + } +} + +# if !defined IN_LIBINTL +static +# endif +const char * +gl_locale_name_from_win32_LANGID (LANGID langid) +{ + /* Activate the new code only when the GETTEXT_MUI environment variable is + set, for the time being, since the new code is not well tested. */ + if (getenv ("GETTEXT_MUI") != NULL) + { + static char namebuf[256]; + + /* Query the system's notion of locale name. + On Windows95/98/ME, GetLocaleInfoA returns some incorrect results. + But we don't need to support systems that are so old. */ + if (GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), LOCALE_SNAME, + namebuf, sizeof (namebuf) - 1)) + { + /* Convert it to a Unix locale name. */ + gl_locale_name_canonicalize (namebuf); + return namebuf; + } + } + /* Internet Explorer has an LCID to RFC3066 name mapping stored in + HKEY_CLASSES_ROOT\Mime\Database\Rfc1766. But we better don't use that + since IE's i18n subsystem is known to be inconsistent with the native + Windows base (e.g. they have different character conversion facilities + that produce different results). */ + /* Use our own table. */ + { + int primary, sub; + + /* Split into language and territory part. */ + primary = PRIMARYLANGID (langid); + sub = SUBLANGID (langid); + + /* Dispatch on language. + See also http://www.unicode.org/unicode/onlinedat/languages.html . + For details about languages, see http://www.ethnologue.com/ . */ + switch (primary) + { + case LANG_AFRIKAANS: + switch (sub) + { + case SUBLANG_AFRIKAANS_SOUTH_AFRICA: return "af_ZA"; + } + return "af"; + case LANG_ALBANIAN: + switch (sub) + { + case SUBLANG_ALBANIAN_ALBANIA: return "sq_AL"; + } + return "sq"; + case LANG_ALSATIAN: + switch (sub) + { + case SUBLANG_ALSATIAN_FRANCE: return "gsw_FR"; + } + return "gsw"; + case LANG_AMHARIC: + switch (sub) + { + case SUBLANG_AMHARIC_ETHIOPIA: return "am_ET"; + } + return "am"; + case LANG_ARABIC: + switch (sub) + { + case SUBLANG_ARABIC_SAUDI_ARABIA: return "ar_SA"; + case SUBLANG_ARABIC_IRAQ: return "ar_IQ"; + case SUBLANG_ARABIC_EGYPT: return "ar_EG"; + case SUBLANG_ARABIC_LIBYA: return "ar_LY"; + case SUBLANG_ARABIC_ALGERIA: return "ar_DZ"; + case SUBLANG_ARABIC_MOROCCO: return "ar_MA"; + case SUBLANG_ARABIC_TUNISIA: return "ar_TN"; + case SUBLANG_ARABIC_OMAN: return "ar_OM"; + case SUBLANG_ARABIC_YEMEN: return "ar_YE"; + case SUBLANG_ARABIC_SYRIA: return "ar_SY"; + case SUBLANG_ARABIC_JORDAN: return "ar_JO"; + case SUBLANG_ARABIC_LEBANON: return "ar_LB"; + case SUBLANG_ARABIC_KUWAIT: return "ar_KW"; + case SUBLANG_ARABIC_UAE: return "ar_AE"; + case SUBLANG_ARABIC_BAHRAIN: return "ar_BH"; + case SUBLANG_ARABIC_QATAR: return "ar_QA"; + } + return "ar"; + case LANG_ARMENIAN: + switch (sub) + { + case SUBLANG_ARMENIAN_ARMENIA: return "hy_AM"; + } + return "hy"; + case LANG_ASSAMESE: + switch (sub) + { + case SUBLANG_ASSAMESE_INDIA: return "as_IN"; + } + return "as"; + case LANG_AZERI: + switch (sub) + { + /* FIXME: Adjust this when Azerbaijani locales appear on Unix. */ + case 0x1e: return "az@latin"; + case SUBLANG_AZERI_LATIN: return "az_AZ@latin"; + case 0x1d: return "az@cyrillic"; + case SUBLANG_AZERI_CYRILLIC: return "az_AZ@cyrillic"; + } + return "az"; + case LANG_BASHKIR: + switch (sub) + { + case SUBLANG_BASHKIR_RUSSIA: return "ba_RU"; + } + return "ba"; + case LANG_BASQUE: + switch (sub) + { + case SUBLANG_BASQUE_BASQUE: return "eu_ES"; + } + return "eu"; /* Ambiguous: could be "eu_ES" or "eu_FR". */ + case LANG_BELARUSIAN: + switch (sub) + { + case SUBLANG_BELARUSIAN_BELARUS: return "be_BY"; + } + return "be"; + case LANG_BENGALI: + switch (sub) + { + case SUBLANG_BENGALI_INDIA: return "bn_IN"; + case SUBLANG_BENGALI_BANGLADESH: return "bn_BD"; + } + return "bn"; + case LANG_BRETON: + switch (sub) + { + case SUBLANG_BRETON_FRANCE: return "br_FR"; + } + return "br"; + case LANG_BULGARIAN: + switch (sub) + { + case SUBLANG_BULGARIAN_BULGARIA: return "bg_BG"; + } + return "bg"; + case LANG_BURMESE: + switch (sub) + { + case SUBLANG_DEFAULT: return "my_MM"; + } + return "my"; + case LANG_CAMBODIAN: + switch (sub) + { + case SUBLANG_CAMBODIAN_CAMBODIA: return "km_KH"; + } + return "km"; + case LANG_CATALAN: + switch (sub) + { + case SUBLANG_CATALAN_SPAIN: return "ca_ES"; + } + return "ca"; + case LANG_CHEROKEE: + switch (sub) + { + case SUBLANG_DEFAULT: return "chr_US"; + } + return "chr"; + case LANG_CHINESE: + switch (sub) + { + case SUBLANG_CHINESE_TRADITIONAL: case 0x1f: return "zh_TW"; + case SUBLANG_CHINESE_SIMPLIFIED: case 0x00: return "zh_CN"; + case SUBLANG_CHINESE_HONGKONG: return "zh_HK"; /* traditional */ + case SUBLANG_CHINESE_SINGAPORE: return "zh_SG"; /* simplified */ + case SUBLANG_CHINESE_MACAU: return "zh_MO"; /* traditional */ + } + return "zh"; + case LANG_CORSICAN: + switch (sub) + { + case SUBLANG_CORSICAN_FRANCE: return "co_FR"; + } + return "co"; + case LANG_CROATIAN: /* LANG_CROATIAN == LANG_SERBIAN == LANG_BOSNIAN + * What used to be called Serbo-Croatian + * should really now be two separate + * languages because of political reasons. + * (Says tml, who knows nothing about Serbian + * or Croatian.) + * (I can feel those flames coming already.) + */ + switch (sub) + { + /* Croatian */ + case 0x00: return "hr"; + case SUBLANG_CROATIAN_CROATIA: return "hr_HR"; + case SUBLANG_CROATIAN_BOSNIA_HERZEGOVINA_LATIN: return "hr_BA"; + /* Serbian */ + case 0x1f: return "sr"; + case 0x1c: return "sr"; /* latin */ + case SUBLANG_SERBIAN_LATIN: return "sr_CS"; /* latin */ + case 0x09: return "sr_RS"; /* latin */ + case 0x0b: return "sr_ME"; /* latin */ + case 0x06: return "sr_BA"; /* latin */ + case 0x1b: return "sr@cyrillic"; + case SUBLANG_SERBIAN_CYRILLIC: return "sr_CS@cyrillic"; + case 0x0a: return "sr_RS@cyrillic"; + case 0x0c: return "sr_ME@cyrillic"; + case 0x07: return "sr_BA@cyrillic"; + /* Bosnian */ + case 0x1e: return "bs"; + case 0x1a: return "bs"; /* latin */ + case SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_LATIN: return "bs_BA"; /* latin */ + case 0x19: return "bs@cyrillic"; + case SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC: return "bs_BA@cyrillic"; + } + return "hr"; + case LANG_CZECH: + switch (sub) + { + case SUBLANG_CZECH_CZECH_REPUBLIC: return "cs_CZ"; + } + return "cs"; + case LANG_DANISH: + switch (sub) + { + case SUBLANG_DANISH_DENMARK: return "da_DK"; + } + return "da"; + case LANG_DARI: + /* FIXME: Adjust this when such locales appear on Unix. */ + switch (sub) + { + case SUBLANG_DARI_AFGHANISTAN: return "prs_AF"; + } + return "prs"; + case LANG_DIVEHI: + switch (sub) + { + case SUBLANG_DIVEHI_MALDIVES: return "dv_MV"; + } + return "dv"; + case LANG_DUTCH: + switch (sub) + { + case SUBLANG_DUTCH: return "nl_NL"; + case SUBLANG_DUTCH_BELGIAN: /* FLEMISH, VLAAMS */ return "nl_BE"; + case SUBLANG_DUTCH_SURINAM: return "nl_SR"; + } + return "nl"; + case LANG_EDO: + switch (sub) + { + case SUBLANG_DEFAULT: return "bin_NG"; + } + return "bin"; + case LANG_ENGLISH: + switch (sub) + { + /* SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. Heh. I thought + * English was the language spoken in England. + * Oh well. + */ + case SUBLANG_ENGLISH_US: return "en_US"; + case SUBLANG_ENGLISH_UK: return "en_GB"; + case SUBLANG_ENGLISH_AUS: return "en_AU"; + case SUBLANG_ENGLISH_CAN: return "en_CA"; + case SUBLANG_ENGLISH_NZ: return "en_NZ"; + case SUBLANG_ENGLISH_EIRE: return "en_IE"; + case SUBLANG_ENGLISH_SOUTH_AFRICA: return "en_ZA"; + case SUBLANG_ENGLISH_JAMAICA: return "en_JM"; + case SUBLANG_ENGLISH_CARIBBEAN: return "en_GD"; /* Grenada? */ + case SUBLANG_ENGLISH_BELIZE: return "en_BZ"; + case SUBLANG_ENGLISH_TRINIDAD: return "en_TT"; + case SUBLANG_ENGLISH_ZIMBABWE: return "en_ZW"; + case SUBLANG_ENGLISH_PHILIPPINES: return "en_PH"; + case SUBLANG_ENGLISH_INDONESIA: return "en_ID"; + case SUBLANG_ENGLISH_HONGKONG: return "en_HK"; + case SUBLANG_ENGLISH_INDIA: return "en_IN"; + case SUBLANG_ENGLISH_MALAYSIA: return "en_MY"; + case SUBLANG_ENGLISH_SINGAPORE: return "en_SG"; + } + return "en"; + case LANG_ESTONIAN: + switch (sub) + { + case SUBLANG_ESTONIAN_ESTONIA: return "et_EE"; + } + return "et"; + case LANG_FAEROESE: + switch (sub) + { + case SUBLANG_FAEROESE_FAROE_ISLANDS: return "fo_FO"; + } + return "fo"; + case LANG_FARSI: + switch (sub) + { + case SUBLANG_FARSI_IRAN: return "fa_IR"; + } + return "fa"; + case LANG_FINNISH: + switch (sub) + { + case SUBLANG_FINNISH_FINLAND: return "fi_FI"; + } + return "fi"; + case LANG_FRENCH: + switch (sub) + { + case SUBLANG_FRENCH: return "fr_FR"; + case SUBLANG_FRENCH_BELGIAN: /* WALLOON */ return "fr_BE"; + case SUBLANG_FRENCH_CANADIAN: return "fr_CA"; + case SUBLANG_FRENCH_SWISS: return "fr_CH"; + case SUBLANG_FRENCH_LUXEMBOURG: return "fr_LU"; + case SUBLANG_FRENCH_MONACO: return "fr_MC"; + case SUBLANG_FRENCH_WESTINDIES: return "fr"; /* Caribbean? */ + case SUBLANG_FRENCH_REUNION: return "fr_RE"; + case SUBLANG_FRENCH_CONGO: return "fr_CG"; + case SUBLANG_FRENCH_SENEGAL: return "fr_SN"; + case SUBLANG_FRENCH_CAMEROON: return "fr_CM"; + case SUBLANG_FRENCH_COTEDIVOIRE: return "fr_CI"; + case SUBLANG_FRENCH_MALI: return "fr_ML"; + case SUBLANG_FRENCH_MOROCCO: return "fr_MA"; + case SUBLANG_FRENCH_HAITI: return "fr_HT"; + } + return "fr"; + case LANG_FRISIAN: + switch (sub) + { + case SUBLANG_FRISIAN_NETHERLANDS: return "fy_NL"; + } + return "fy"; + case LANG_FULFULDE: + /* Spoken in Nigeria, Guinea, Senegal, Mali, Niger, Cameroon, Benin. */ + switch (sub) + { + case SUBLANG_DEFAULT: return "ff_NG"; + } + return "ff"; + case LANG_GAELIC: + switch (sub) + { + case 0x01: /* SCOTTISH */ + /* old, superseded by LANG_SCOTTISH_GAELIC */ + return "gd_GB"; + case SUBLANG_IRISH_IRELAND: return "ga_IE"; + } + return "ga"; + case LANG_GALICIAN: + switch (sub) + { + case SUBLANG_GALICIAN_SPAIN: return "gl_ES"; + } + return "gl"; + case LANG_GEORGIAN: + switch (sub) + { + case SUBLANG_GEORGIAN_GEORGIA: return "ka_GE"; + } + return "ka"; + case LANG_GERMAN: + switch (sub) + { + case SUBLANG_GERMAN: return "de_DE"; + case SUBLANG_GERMAN_SWISS: return "de_CH"; + case SUBLANG_GERMAN_AUSTRIAN: return "de_AT"; + case SUBLANG_GERMAN_LUXEMBOURG: return "de_LU"; + case SUBLANG_GERMAN_LIECHTENSTEIN: return "de_LI"; + } + return "de"; + case LANG_GREEK: + switch (sub) + { + case SUBLANG_GREEK_GREECE: return "el_GR"; + } + return "el"; + case LANG_GREENLANDIC: + switch (sub) + { + case SUBLANG_GREENLANDIC_GREENLAND: return "kl_GL"; + } + return "kl"; + case LANG_GUARANI: + switch (sub) + { + case SUBLANG_DEFAULT: return "gn_PY"; + } + return "gn"; + case LANG_GUJARATI: + switch (sub) + { + case SUBLANG_GUJARATI_INDIA: return "gu_IN"; + } + return "gu"; + case LANG_HAUSA: + switch (sub) + { + case 0x1f: return "ha"; + case SUBLANG_HAUSA_NIGERIA_LATIN: return "ha_NG"; + } + return "ha"; + case LANG_HAWAIIAN: + /* FIXME: Do they mean Hawaiian ("haw_US", 1000 speakers) + or Hawaii Creole English ("cpe_US", 600000 speakers)? */ + switch (sub) + { + case SUBLANG_DEFAULT: return "cpe_US"; + } + return "cpe"; + case LANG_HEBREW: + switch (sub) + { + case SUBLANG_HEBREW_ISRAEL: return "he_IL"; + } + return "he"; + case LANG_HINDI: + switch (sub) + { + case SUBLANG_HINDI_INDIA: return "hi_IN"; + } + return "hi"; + case LANG_HUNGARIAN: + switch (sub) + { + case SUBLANG_HUNGARIAN_HUNGARY: return "hu_HU"; + } + return "hu"; + case LANG_IBIBIO: + switch (sub) + { + case SUBLANG_DEFAULT: return "nic_NG"; + } + return "nic"; + case LANG_ICELANDIC: + switch (sub) + { + case SUBLANG_ICELANDIC_ICELAND: return "is_IS"; + } + return "is"; + case LANG_IGBO: + switch (sub) + { + case SUBLANG_IGBO_NIGERIA: return "ig_NG"; + } + return "ig"; + case LANG_INDONESIAN: + switch (sub) + { + case SUBLANG_INDONESIAN_INDONESIA: return "id_ID"; + } + return "id"; + case LANG_INUKTITUT: + switch (sub) + { + case 0x1e: return "iu"; /* syllabic */ + case SUBLANG_INUKTITUT_CANADA: return "iu_CA"; /* syllabic */ + case 0x1f: return "iu@latin"; + case SUBLANG_INUKTITUT_CANADA_LATIN: return "iu_CA@latin"; + } + return "iu"; + case LANG_ITALIAN: + switch (sub) + { + case SUBLANG_ITALIAN: return "it_IT"; + case SUBLANG_ITALIAN_SWISS: return "it_CH"; + } + return "it"; + case LANG_JAPANESE: + switch (sub) + { + case SUBLANG_JAPANESE_JAPAN: return "ja_JP"; + } + return "ja"; + case LANG_KANNADA: + switch (sub) + { + case SUBLANG_KANNADA_INDIA: return "kn_IN"; + } + return "kn"; + case LANG_KANURI: + switch (sub) + { + case SUBLANG_DEFAULT: return "kr_NG"; + } + return "kr"; + case LANG_KASHMIRI: + switch (sub) + { + case SUBLANG_DEFAULT: return "ks_PK"; + case SUBLANG_KASHMIRI_INDIA: return "ks_IN"; + } + return "ks"; + case LANG_KAZAK: + switch (sub) + { + case SUBLANG_KAZAK_KAZAKHSTAN: return "kk_KZ"; + } + return "kk"; + case LANG_KICHE: + /* FIXME: Adjust this when such locales appear on Unix. */ + switch (sub) + { + case SUBLANG_KICHE_GUATEMALA: return "qut_GT"; + } + return "qut"; + case LANG_KINYARWANDA: + switch (sub) + { + case SUBLANG_KINYARWANDA_RWANDA: return "rw_RW"; + } + return "rw"; + case LANG_KONKANI: + /* FIXME: Adjust this when such locales appear on Unix. */ + switch (sub) + { + case SUBLANG_KONKANI_INDIA: return "kok_IN"; + } + return "kok"; + case LANG_KOREAN: + switch (sub) + { + case SUBLANG_DEFAULT: return "ko_KR"; + } + return "ko"; + case LANG_KYRGYZ: + switch (sub) + { + case SUBLANG_KYRGYZ_KYRGYZSTAN: return "ky_KG"; + } + return "ky"; + case LANG_LAO: + switch (sub) + { + case SUBLANG_LAO_LAOS: return "lo_LA"; + } + return "lo"; + case LANG_LATIN: + switch (sub) + { + case SUBLANG_DEFAULT: return "la_VA"; + } + return "la"; + case LANG_LATVIAN: + switch (sub) + { + case SUBLANG_LATVIAN_LATVIA: return "lv_LV"; + } + return "lv"; + case LANG_LITHUANIAN: + switch (sub) + { + case SUBLANG_LITHUANIAN_LITHUANIA: return "lt_LT"; + } + return "lt"; + case LANG_LUXEMBOURGISH: + switch (sub) + { + case SUBLANG_LUXEMBOURGISH_LUXEMBOURG: return "lb_LU"; + } + return "lb"; + case LANG_MACEDONIAN: + switch (sub) + { + case SUBLANG_MACEDONIAN_MACEDONIA: return "mk_MK"; + } + return "mk"; + case LANG_MALAY: + switch (sub) + { + case SUBLANG_MALAY_MALAYSIA: return "ms_MY"; + case SUBLANG_MALAY_BRUNEI_DARUSSALAM: return "ms_BN"; + } + return "ms"; + case LANG_MALAYALAM: + switch (sub) + { + case SUBLANG_MALAYALAM_INDIA: return "ml_IN"; + } + return "ml"; + case LANG_MALTESE: + switch (sub) + { + case SUBLANG_MALTESE_MALTA: return "mt_MT"; + } + return "mt"; + case LANG_MANIPURI: + /* FIXME: Adjust this when such locales appear on Unix. */ + switch (sub) + { + case SUBLANG_DEFAULT: return "mni_IN"; + } + return "mni"; + case LANG_MAORI: + switch (sub) + { + case SUBLANG_MAORI_NEW_ZEALAND: return "mi_NZ"; + } + return "mi"; + case LANG_MAPUDUNGUN: + switch (sub) + { + case SUBLANG_MAPUDUNGUN_CHILE: return "arn_CL"; + } + return "arn"; + case LANG_MARATHI: + switch (sub) + { + case SUBLANG_MARATHI_INDIA: return "mr_IN"; + } + return "mr"; + case LANG_MOHAWK: + switch (sub) + { + case SUBLANG_MOHAWK_CANADA: return "moh_CA"; + } + return "moh"; + case LANG_MONGOLIAN: + switch (sub) + { + case SUBLANG_MONGOLIAN_CYRILLIC_MONGOLIA: case 0x1e: return "mn_MN"; + case SUBLANG_MONGOLIAN_PRC: case 0x1f: return "mn_CN"; + } + return "mn"; /* Ambiguous: could be "mn_CN" or "mn_MN". */ + case LANG_NEPALI: + switch (sub) + { + case SUBLANG_NEPALI_NEPAL: return "ne_NP"; + case SUBLANG_NEPALI_INDIA: return "ne_IN"; + } + return "ne"; + case LANG_NORWEGIAN: + switch (sub) + { + case 0x1f: return "nb"; + case SUBLANG_NORWEGIAN_BOKMAL: return "nb_NO"; + case 0x1e: return "nn"; + case SUBLANG_NORWEGIAN_NYNORSK: return "nn_NO"; + } + return "no"; + case LANG_OCCITAN: + switch (sub) + { + case SUBLANG_OCCITAN_FRANCE: return "oc_FR"; + } + return "oc"; + case LANG_ORIYA: + switch (sub) + { + case SUBLANG_ORIYA_INDIA: return "or_IN"; + } + return "or"; + case LANG_OROMO: + switch (sub) + { + case SUBLANG_DEFAULT: return "om_ET"; + } + return "om"; + case LANG_PAPIAMENTU: + switch (sub) + { + case SUBLANG_DEFAULT: return "pap_AN"; + } + return "pap"; + case LANG_PASHTO: + switch (sub) + { + case SUBLANG_PASHTO_AFGHANISTAN: return "ps_AF"; + } + return "ps"; /* Ambiguous: could be "ps_PK" or "ps_AF". */ + case LANG_POLISH: + switch (sub) + { + case SUBLANG_POLISH_POLAND: return "pl_PL"; + } + return "pl"; + case LANG_PORTUGUESE: + switch (sub) + { + /* Hmm. SUBLANG_PORTUGUESE_BRAZILIAN == SUBLANG_DEFAULT. + Same phenomenon as SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. */ + case SUBLANG_PORTUGUESE_BRAZILIAN: return "pt_BR"; + case SUBLANG_PORTUGUESE: return "pt_PT"; + } + return "pt"; + case LANG_PUNJABI: + switch (sub) + { + case SUBLANG_PUNJABI_INDIA: return "pa_IN"; /* Gurmukhi script */ + case SUBLANG_PUNJABI_PAKISTAN: return "pa_PK"; /* Arabic script */ + } + return "pa"; + case LANG_QUECHUA: + /* Note: Microsoft uses the non-ISO language code "quz". */ + switch (sub) + { + case SUBLANG_QUECHUA_BOLIVIA: return "qu_BO"; + case SUBLANG_QUECHUA_ECUADOR: return "qu_EC"; + case SUBLANG_QUECHUA_PERU: return "qu_PE"; + } + return "qu"; + case LANG_ROMANIAN: + switch (sub) + { + case SUBLANG_ROMANIAN_ROMANIA: return "ro_RO"; + case SUBLANG_ROMANIAN_MOLDOVA: return "ro_MD"; + } + return "ro"; + case LANG_ROMANSH: + switch (sub) + { + case SUBLANG_ROMANSH_SWITZERLAND: return "rm_CH"; + } + return "rm"; + case LANG_RUSSIAN: + switch (sub) + { + case SUBLANG_RUSSIAN_RUSSIA: return "ru_RU"; + case SUBLANG_RUSSIAN_MOLDAVIA: return "ru_MD"; + } + return "ru"; /* Ambiguous: could be "ru_RU" or "ru_UA" or "ru_MD". */ + case LANG_SAMI: + switch (sub) + { + /* Northern Sami */ + case 0x00: return "se"; + case SUBLANG_SAMI_NORTHERN_NORWAY: return "se_NO"; + case SUBLANG_SAMI_NORTHERN_SWEDEN: return "se_SE"; + case SUBLANG_SAMI_NORTHERN_FINLAND: return "se_FI"; + /* Lule Sami */ + case 0x1f: return "smj"; + case SUBLANG_SAMI_LULE_NORWAY: return "smj_NO"; + case SUBLANG_SAMI_LULE_SWEDEN: return "smj_SE"; + /* Southern Sami */ + case 0x1e: return "sma"; + case SUBLANG_SAMI_SOUTHERN_NORWAY: return "sma_NO"; + case SUBLANG_SAMI_SOUTHERN_SWEDEN: return "sma_SE"; + /* Skolt Sami */ + case 0x1d: return "sms"; + case SUBLANG_SAMI_SKOLT_FINLAND: return "sms_FI"; + /* Inari Sami */ + case 0x1c: return "smn"; + case SUBLANG_SAMI_INARI_FINLAND: return "smn_FI"; + } + return "se"; /* or "smi"? */ + case LANG_SANSKRIT: + switch (sub) + { + case SUBLANG_SANSKRIT_INDIA: return "sa_IN"; + } + return "sa"; + case LANG_SCOTTISH_GAELIC: + switch (sub) + { + case SUBLANG_DEFAULT: return "gd_GB"; + } + return "gd"; + case LANG_SINDHI: + switch (sub) + { + case SUBLANG_SINDHI_INDIA: return "sd_IN"; + case SUBLANG_SINDHI_PAKISTAN: return "sd_PK"; + /*case SUBLANG_SINDHI_AFGHANISTAN: return "sd_AF";*/ + } + return "sd"; + case LANG_SINHALESE: + switch (sub) + { + case SUBLANG_SINHALESE_SRI_LANKA: return "si_LK"; + } + return "si"; + case LANG_SLOVAK: + switch (sub) + { + case SUBLANG_SLOVAK_SLOVAKIA: return "sk_SK"; + } + return "sk"; + case LANG_SLOVENIAN: + switch (sub) + { + case SUBLANG_SLOVENIAN_SLOVENIA: return "sl_SI"; + } + return "sl"; + case LANG_SOMALI: + switch (sub) + { + case SUBLANG_DEFAULT: return "so_SO"; + } + return "so"; + case LANG_SORBIAN: + /* FIXME: Adjust this when such locales appear on Unix. */ + switch (sub) + { + /* Upper Sorbian */ + case 0x00: return "hsb"; + case SUBLANG_UPPER_SORBIAN_GERMANY: return "hsb_DE"; + /* Lower Sorbian */ + case 0x1f: return "dsb"; + case SUBLANG_LOWER_SORBIAN_GERMANY: return "dsb_DE"; + } + return "wen"; + case LANG_SOTHO: + /* <http://www.microsoft.com/globaldev/reference/lcid-all.mspx> calls + it "Sepedi"; according to + <http://www.ethnologue.com/show_language.asp?code=nso> + <http://www.ethnologue.com/show_language.asp?code=sot> + it's the same as Northern Sotho. */ + switch (sub) + { + case SUBLANG_SOTHO_SOUTH_AFRICA: return "nso_ZA"; + } + return "nso"; + case LANG_SPANISH: + switch (sub) + { + case SUBLANG_SPANISH: return "es_ES"; + case SUBLANG_SPANISH_MEXICAN: return "es_MX"; + case SUBLANG_SPANISH_MODERN: + return "es_ES@modern"; /* not seen on Unix */ + case SUBLANG_SPANISH_GUATEMALA: return "es_GT"; + case SUBLANG_SPANISH_COSTA_RICA: return "es_CR"; + case SUBLANG_SPANISH_PANAMA: return "es_PA"; + case SUBLANG_SPANISH_DOMINICAN_REPUBLIC: return "es_DO"; + case SUBLANG_SPANISH_VENEZUELA: return "es_VE"; + case SUBLANG_SPANISH_COLOMBIA: return "es_CO"; + case SUBLANG_SPANISH_PERU: return "es_PE"; + case SUBLANG_SPANISH_ARGENTINA: return "es_AR"; + case SUBLANG_SPANISH_ECUADOR: return "es_EC"; + case SUBLANG_SPANISH_CHILE: return "es_CL"; + case SUBLANG_SPANISH_URUGUAY: return "es_UY"; + case SUBLANG_SPANISH_PARAGUAY: return "es_PY"; + case SUBLANG_SPANISH_BOLIVIA: return "es_BO"; + case SUBLANG_SPANISH_EL_SALVADOR: return "es_SV"; + case SUBLANG_SPANISH_HONDURAS: return "es_HN"; + case SUBLANG_SPANISH_NICARAGUA: return "es_NI"; + case SUBLANG_SPANISH_PUERTO_RICO: return "es_PR"; + case SUBLANG_SPANISH_US: return "es_US"; + } + return "es"; + case LANG_SUTU: + switch (sub) + { + case SUBLANG_DEFAULT: return "bnt_TZ"; /* or "st_LS" or "nso_ZA"? */ + } + return "bnt"; + case LANG_SWAHILI: + switch (sub) + { + case SUBLANG_SWAHILI_KENYA: return "sw_KE"; + } + return "sw"; + case LANG_SWEDISH: + switch (sub) + { + case SUBLANG_SWEDISH_SWEDEN: return "sv_SE"; + case SUBLANG_SWEDISH_FINLAND: return "sv_FI"; + } + return "sv"; + case LANG_SYRIAC: + switch (sub) + { + case SUBLANG_SYRIAC_SYRIA: return "syr_SY"; /* An extinct language. */ + } + return "syr"; + case LANG_TAGALOG: + switch (sub) + { + case SUBLANG_TAGALOG_PHILIPPINES: return "tl_PH"; /* or "fil_PH"? */ + } + return "tl"; /* or "fil"? */ + case LANG_TAJIK: + switch (sub) + { + case 0x1f: return "tg"; + case SUBLANG_TAJIK_TAJIKISTAN: return "tg_TJ"; + } + return "tg"; + case LANG_TAMAZIGHT: + /* Note: Microsoft uses the non-ISO language code "tmz". */ + switch (sub) + { + /* FIXME: Adjust this when Tamazight locales appear on Unix. */ + case SUBLANG_TAMAZIGHT_ARABIC: return "ber_MA@arabic"; + case 0x1f: return "ber@latin"; + case SUBLANG_TAMAZIGHT_ALGERIA_LATIN: return "ber_DZ@latin"; + } + return "ber"; + case LANG_TAMIL: + switch (sub) + { + case SUBLANG_TAMIL_INDIA: return "ta_IN"; + } + return "ta"; /* Ambiguous: could be "ta_IN" or "ta_LK" or "ta_SG". */ + case LANG_TATAR: + switch (sub) + { + case SUBLANG_TATAR_RUSSIA: return "tt_RU"; + } + return "tt"; + case LANG_TELUGU: + switch (sub) + { + case SUBLANG_TELUGU_INDIA: return "te_IN"; + } + return "te"; + case LANG_THAI: + switch (sub) + { + case SUBLANG_THAI_THAILAND: return "th_TH"; + } + return "th"; + case LANG_TIBETAN: + switch (sub) + { + case SUBLANG_TIBETAN_PRC: + /* Most Tibetans would not like "bo_CN". But Tibet does not yet + have a country code of its own. */ + return "bo"; + case SUBLANG_TIBETAN_BHUTAN: return "bo_BT"; + } + return "bo"; + case LANG_TIGRINYA: + switch (sub) + { + case SUBLANG_TIGRINYA_ETHIOPIA: return "ti_ET"; + case SUBLANG_TIGRINYA_ERITREA: return "ti_ER"; + } + return "ti"; + case LANG_TSONGA: + switch (sub) + { + case SUBLANG_DEFAULT: return "ts_ZA"; + } + return "ts"; + case LANG_TSWANA: + /* Spoken in South Africa, Botswana. */ + switch (sub) + { + case SUBLANG_TSWANA_SOUTH_AFRICA: return "tn_ZA"; + } + return "tn"; + case LANG_TURKISH: + switch (sub) + { + case SUBLANG_TURKISH_TURKEY: return "tr_TR"; + } + return "tr"; + case LANG_TURKMEN: + switch (sub) + { + case SUBLANG_TURKMEN_TURKMENISTAN: return "tk_TM"; + } + return "tk"; + case LANG_UIGHUR: + switch (sub) + { + case SUBLANG_UIGHUR_PRC: return "ug_CN"; + } + return "ug"; + case LANG_UKRAINIAN: + switch (sub) + { + case SUBLANG_UKRAINIAN_UKRAINE: return "uk_UA"; + } + return "uk"; + case LANG_URDU: + switch (sub) + { + case SUBLANG_URDU_PAKISTAN: return "ur_PK"; + case SUBLANG_URDU_INDIA: return "ur_IN"; + } + return "ur"; + case LANG_UZBEK: + switch (sub) + { + case 0x1f: return "uz"; + case SUBLANG_UZBEK_LATIN: return "uz_UZ"; + case 0x1e: return "uz@cyrillic"; + case SUBLANG_UZBEK_CYRILLIC: return "uz_UZ@cyrillic"; + } + return "uz"; + case LANG_VENDA: + switch (sub) + { + case SUBLANG_DEFAULT: return "ve_ZA"; + } + return "ve"; + case LANG_VIETNAMESE: + switch (sub) + { + case SUBLANG_VIETNAMESE_VIETNAM: return "vi_VN"; + } + return "vi"; + case LANG_WELSH: + switch (sub) + { + case SUBLANG_WELSH_UNITED_KINGDOM: return "cy_GB"; + } + return "cy"; + case LANG_WOLOF: + switch (sub) + { + case SUBLANG_WOLOF_SENEGAL: return "wo_SN"; + } + return "wo"; + case LANG_XHOSA: + switch (sub) + { + case SUBLANG_XHOSA_SOUTH_AFRICA: return "xh_ZA"; + } + return "xh"; + case LANG_YAKUT: + switch (sub) + { + case SUBLANG_YAKUT_RUSSIA: return "sah_RU"; + } + return "sah"; + case LANG_YI: + switch (sub) + { + case SUBLANG_YI_PRC: return "ii_CN"; + } + return "ii"; + case LANG_YIDDISH: + switch (sub) + { + case SUBLANG_DEFAULT: return "yi_IL"; + } + return "yi"; + case LANG_YORUBA: + switch (sub) + { + case SUBLANG_YORUBA_NIGERIA: return "yo_NG"; + } + return "yo"; + case LANG_ZULU: + switch (sub) + { + case SUBLANG_ZULU_SOUTH_AFRICA: return "zu_ZA"; + } + return "zu"; + default: return "C"; + } + } +} + +# if !defined IN_LIBINTL +static +# endif +const char * +gl_locale_name_from_win32_LCID (LCID lcid) +{ + LANGID langid; + + /* Strip off the sorting rules, keep only the language part. */ + langid = LANGIDFROMLCID (lcid); + + return gl_locale_name_from_win32_LANGID (langid); +} + +#endif + + +#if HAVE_USELOCALE /* glibc or Mac OS X */ + +/* Simple hash set of strings. We don't want to drag in lots of hash table + code here. */ + +# define SIZE_BITS (sizeof (size_t) * CHAR_BIT) + +/* A hash function for NUL-terminated char* strings using + the method described by Bruno Haible. + See http://www.haible.de/bruno/hashfunc.html. */ +static size_t +string_hash (const void *x) +{ + const char *s = (const char *) x; + size_t h = 0; + + for (; *s; s++) + h = *s + ((h << 9) | (h >> (SIZE_BITS - 9))); + + return h; +} + +/* A hash table of fixed size. Multiple threads can access it read-only + simultaneously, but only one thread can insert into it at the same time. */ + +/* A node in a hash bucket collision list. */ +struct hash_node + { + struct hash_node * volatile next; + char contents[100]; /* has variable size */ + }; + +# define HASH_TABLE_SIZE 257 +static struct hash_node * volatile struniq_hash_table[HASH_TABLE_SIZE] + /* = { NULL, ..., NULL } */; + +/* This lock protects the struniq_hash_table against multiple simultaneous + insertions. */ +gl_lock_define_initialized(static, struniq_lock) + +/* Store a copy of the given string in a string pool with indefinite extent. + Return a pointer to this copy. */ +static const char * +struniq (const char *string) +{ + size_t hashcode = string_hash (string); + size_t slot = hashcode % HASH_TABLE_SIZE; + size_t size; + struct hash_node *new_node; + struct hash_node *p; + for (p = struniq_hash_table[slot]; p != NULL; p = p->next) + if (strcmp (p->contents, string) == 0) + return p->contents; + size = strlen (string) + 1; + new_node = + (struct hash_node *) + malloc (offsetof (struct hash_node, contents[0]) + size); + if (new_node == NULL) + /* Out of memory. Return a statically allocated string. */ + return "C"; + memcpy (new_node->contents, string, size); + /* Lock while inserting new_node. */ + gl_lock_lock (struniq_lock); + /* Check whether another thread already added the string while we were + waiting on the lock. */ + for (p = struniq_hash_table[slot]; p != NULL; p = p->next) + if (strcmp (p->contents, string) == 0) + { + free (new_node); + new_node = p; + goto done; + } + /* Really insert new_node into the hash table. Fill new_node entirely first, + because other threads may be iterating over the linked list. */ + new_node->next = struniq_hash_table[slot]; + struniq_hash_table[slot] = new_node; + done: + /* Unlock after new_node is inserted. */ + gl_lock_unlock (struniq_lock); + return new_node->contents; +} + +#endif + + +#if defined IN_LIBINTL || HAVE_USELOCALE + +/* Like gl_locale_name_thread, except that the result is not in storage of + indefinite extent. */ +# if !defined IN_LIBINTL +static +# endif +const char * +gl_locale_name_thread_unsafe (int category, const char *categoryname) +{ +# if HAVE_USELOCALE + { + locale_t thread_locale = uselocale (NULL); + if (thread_locale != LC_GLOBAL_LOCALE) + { +# if __GLIBC__ >= 2 && !defined __UCLIBC__ + /* Work around an incorrect definition of the _NL_LOCALE_NAME macro in + glibc < 2.12. + See <http://sourceware.org/bugzilla/show_bug.cgi?id=10968>. */ + const char *name = + nl_langinfo (_NL_ITEM ((category), _NL_ITEM_INDEX (-1))); + if (name[0] == '\0') + /* Fallback code for glibc < 2.4, which did not implement + nl_langinfo (_NL_LOCALE_NAME (category)). */ + name = thread_locale->__names[category]; + return name; +# elif defined __FreeBSD__ || (defined __APPLE__ && defined __MACH__) + /* FreeBSD, Mac OS X */ + int mask; + + switch (category) + { + case LC_CTYPE: + mask = LC_CTYPE_MASK; + break; + case LC_NUMERIC: + mask = LC_NUMERIC_MASK; + break; + case LC_TIME: + mask = LC_TIME_MASK; + break; + case LC_COLLATE: + mask = LC_COLLATE_MASK; + break; + case LC_MONETARY: + mask = LC_MONETARY_MASK; + break; + case LC_MESSAGES: + mask = LC_MESSAGES_MASK; + break; + default: /* We shouldn't get here. */ + return ""; + } + return querylocale (mask, thread_locale); +# endif + } + } +# endif + return NULL; +} + +#endif + +const char * +gl_locale_name_thread (int category, const char *categoryname) +{ +#if HAVE_USELOCALE + const char *name = gl_locale_name_thread_unsafe (category, categoryname); + if (name != NULL) + return struniq (name); +#endif + return NULL; +} + +/* XPG3 defines the result of 'setlocale (category, NULL)' as: + "Directs 'setlocale()' to query 'category' and return the current + setting of 'local'." + However it does not specify the exact format. Neither do SUSV2 and + ISO C 99. So we can use this feature only on selected systems (e.g. + those using GNU C Library). */ +#if defined _LIBC || ((defined __GLIBC__ && __GLIBC__ >= 2) && !defined __UCLIBC__) +# define HAVE_LOCALE_NULL +#endif + +const char * +gl_locale_name_posix (int category, const char *categoryname) +{ + /* Use the POSIX methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. + On some systems this can be done by the 'setlocale' function itself. */ +#if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL + return setlocale (category, NULL); +#else + /* On other systems we ignore what setlocale reports and instead look at the + environment variables directly. This is necessary + 1. on systems which have a facility for customizing the default locale + (Mac OS X, native Windows, Cygwin) and where the system's setlocale() + function ignores this default locale (Mac OS X, Cygwin), in two cases: + a. when the user missed to use the setlocale() override from libintl + (for example by not including <libintl.h>), + b. when setlocale supports only the "C" locale, such as on Cygwin + 1.5.x. In this case even the override from libintl cannot help. + 2. on all systems where setlocale supports only the "C" locale. */ + /* Strictly speaking, it is a POSIX violation to look at the environment + variables regardless whether setlocale has been called or not. POSIX + says: + "For C-language programs, the POSIX locale shall be the + default locale when the setlocale() function is not called." + But we assume that all programs that use internationalized APIs call + setlocale (LC_ALL, ""). */ + return gl_locale_name_environ (category, categoryname); +#endif +} + +const char * +gl_locale_name_environ (int category, const char *categoryname) +{ + const char *retval; + + /* Setting of LC_ALL overrides all other. */ + retval = getenv ("LC_ALL"); + if (retval != NULL && retval[0] != '\0') + return retval; + /* Next comes the name of the desired category. */ + retval = getenv (categoryname); + if (retval != NULL && retval[0] != '\0') + return retval; + /* Last possibility is the LANG environment variable. */ + retval = getenv ("LANG"); + if (retval != NULL && retval[0] != '\0') + { +#if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE + /* Mac OS X 10.2 or newer. + Ignore invalid LANG value set by the Terminal application. */ + if (strcmp (retval, "UTF-8") != 0) +#endif +#if defined __CYGWIN__ + /* Cygwin. + Ignore dummy LANG value set by ~/.profile. */ + if (strcmp (retval, "C.UTF-8") != 0) +#endif + return retval; + } + + return NULL; +} + +const char * +gl_locale_name_default (void) +{ + /* POSIX:2001 says: + "All implementations shall define a locale as the default locale, to be + invoked when no environment variables are set, or set to the empty + string. This default locale can be the POSIX locale or any other + implementation-defined locale. Some implementations may provide + facilities for local installation administrators to set the default + locale, customizing it for each location. POSIX:2001 does not require + such a facility. + + The systems with such a facility are Mac OS X and Windows: They provide a + GUI that allows the user to choose a locale. + - On Mac OS X, by default, none of LC_* or LANG are set. Starting with + Mac OS X 10.4 or 10.5, LANG is set for processes launched by the + 'Terminal' application (but sometimes to an incorrect value "UTF-8"). + When no environment variable is set, setlocale (LC_ALL, "") uses the + "C" locale. + - On native Windows, by default, none of LC_* or LANG are set. + When no environment variable is set, setlocale (LC_ALL, "") uses the + locale chosen by the user. + - On Cygwin 1.5.x, by default, none of LC_* or LANG are set. + When no environment variable is set, setlocale (LC_ALL, "") uses the + "C" locale. + - On Cygwin 1.7, by default, LANG is set to "C.UTF-8" when the default + ~/.profile is executed. + When no environment variable is set, setlocale (LC_ALL, "") uses the + "C.UTF-8" locale, which operates in the same way as the "C" locale. + */ + +#if !(HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE || defined WINDOWS_NATIVE || defined __CYGWIN__) + + /* The system does not have a way of setting the locale, other than the + POSIX specified environment variables. We use C as default locale. */ + return "C"; + +#else + + /* Return an XPG style locale name language[_territory][@modifier]. + Don't even bother determining the codeset; it's not useful in this + context, because message catalogs are not specific to a single + codeset. */ + +# if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE + /* Mac OS X 10.2 or newer */ + { + /* Cache the locale name, since CoreFoundation calls are expensive. */ + static const char *cached_localename; + + if (cached_localename == NULL) + { + char namebuf[256]; +# if HAVE_CFLOCALECOPYCURRENT /* Mac OS X 10.3 or newer */ + CFLocaleRef locale = CFLocaleCopyCurrent (); + CFStringRef name = CFLocaleGetIdentifier (locale); + + if (CFStringGetCString (name, namebuf, sizeof (namebuf), + kCFStringEncodingASCII)) + { + gl_locale_name_canonicalize (namebuf); + cached_localename = strdup (namebuf); + } + CFRelease (locale); +# elif HAVE_CFPREFERENCESCOPYAPPVALUE /* Mac OS X 10.2 or newer */ + CFTypeRef value = + CFPreferencesCopyAppValue (CFSTR ("AppleLocale"), + kCFPreferencesCurrentApplication); + if (value != NULL + && CFGetTypeID (value) == CFStringGetTypeID () + && CFStringGetCString ((CFStringRef)value, + namebuf, sizeof (namebuf), + kCFStringEncodingASCII)) + { + gl_locale_name_canonicalize (namebuf); + cached_localename = strdup (namebuf); + } +# endif + if (cached_localename == NULL) + cached_localename = "C"; + } + return cached_localename; + } + +# endif + +# if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Native Windows or Cygwin */ + { + LCID lcid; + + /* Use native Windows API locale ID. */ + lcid = GetThreadLocale (); + + return gl_locale_name_from_win32_LCID (lcid); + } +# endif +#endif +} + +/* Determine the current locale's name, and canonicalize it into XPG syntax + language[_territory][.codeset][@modifier] + The codeset part in the result is not reliable; the locale_charset() + should be used for codeset information instead. + The result must not be freed; it is statically allocated. */ + +const char * +gl_locale_name (int category, const char *categoryname) +{ + const char *retval; + + retval = gl_locale_name_thread (category, categoryname); + if (retval != NULL) + return retval; + + retval = gl_locale_name_posix (category, categoryname); + if (retval != NULL) + return retval; + + return gl_locale_name_default (); +} diff --git a/gl/tests/localename.h b/gl/tests/localename.h new file mode 100644 index 0000000000..330932dd39 --- /dev/null +++ b/gl/tests/localename.h @@ -0,0 +1,95 @@ +/* Determine name of the currently selected locale. + Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _GL_LOCALENAME_H +#define _GL_LOCALENAME_H + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Determine the current locale's name. + It considers both the POSIX notion of locale name (see functions + gl_locale_name_thread and gl_locale_name_posix) and the system notion + of locale name (see function gl_locale_name_default). + CATEGORY is a locale category abbreviation, as defined in <locale.h>, + but not LC_ALL. E.g. LC_MESSAGES. + CATEGORYNAME is the name of CATEGORY as a string, e.g. "LC_MESSAGES". + Return the locale category's name, canonicalized into XPG syntax + language[_territory][.codeset][@modifier] + The codeset part in the result is not reliable; the locale_charset() + should be used for codeset information instead. + The result must not be freed; it is statically allocated. */ +extern const char * gl_locale_name (int category, const char *categoryname); + +/* Determine the current per-thread locale's name, as specified by uselocale() + calls. + CATEGORY is a locale category abbreviation, as defined in <locale.h>, + but not LC_ALL. E.g. LC_MESSAGES. + CATEGORYNAME is the name of CATEGORY as a string, e.g. "LC_MESSAGES". + Return the locale category's name, canonicalized into XPG syntax + language[_territory][.codeset][@modifier] + or NULL if no locale has been specified for the current thread. + The codeset part in the result is not reliable; the locale_charset() + should be used for codeset information instead. + The result must not be freed; it is statically allocated. */ +extern const char * gl_locale_name_thread (int category, const char *categoryname); + +/* Determine the thread-independent current locale's name, as specified by + setlocale() calls or by environment variables. + CATEGORY is a locale category abbreviation, as defined in <locale.h>, + but not LC_ALL. E.g. LC_MESSAGES. + CATEGORYNAME is the name of CATEGORY as a string, e.g. "LC_MESSAGES". + Return the locale category's name, canonicalized into XPG syntax + language[_territory][.codeset][@modifier] + or NULL if no locale has been specified to setlocale() or by environment + variables. + The codeset part in the result is not reliable; the locale_charset() + should be used for codeset information instead. + The result must not be freed; it is statically allocated. */ +extern const char * gl_locale_name_posix (int category, const char *categoryname); + +/* Determine the default locale's name, as specified by environment + variables. + Return the locale category's name, or NULL if no locale has been specified + by environment variables. + The result must not be freed; it is statically allocated. */ +extern const char * gl_locale_name_environ (int category, const char *categoryname); + +/* Determine the default locale's name. This is the current locale's name, + if not specified by uselocale() calls, by setlocale() calls, or by + environment variables. This locale name is usually determined by systems + settings that the user can manipulate through a GUI. + + Quoting POSIX:2001: + "All implementations shall define a locale as the default locale, + to be invoked when no environment variables are set, or set to the + empty string. This default locale can be the C locale or any other + implementation-defined locale. Some implementations may provide + facilities for local installation administrators to set the default + locale, customizing it for each location. IEEE Std 1003.1-2001 does + not require such a facility." + + The result must not be freed; it is statically allocated. */ +extern const char * gl_locale_name_default (void); + + +#ifdef __cplusplus +} +#endif + +#endif /* _GL_LOCALENAME_H */ diff --git a/gl/tests/setlocale.c b/gl/tests/setlocale.c new file mode 100644 index 0000000000..8a47cdefcb --- /dev/null +++ b/gl/tests/setlocale.c @@ -0,0 +1,938 @@ +/* Set the current locale. + Copyright (C) 2009, 2011-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ + +#include <config.h> + +/* Override setlocale() so that when the default locale is requested + (locale = ""), the environment variables LC_ALL, LC_*, and LANG are + considered. + Also include all the functionality from libintl's setlocale() override. */ + +/* Please keep this file in sync with + gettext/gettext-runtime/intl/setlocale.c ! */ + +/* Specification. */ +#include <locale.h> + +#include <stdlib.h> +#include <string.h> + +#include "localename.h" + +#if 1 + +# undef setlocale + +/* Return string representation of locale category CATEGORY. */ +static const char * +category_to_name (int category) +{ + const char *retval; + + switch (category) + { + case LC_COLLATE: + retval = "LC_COLLATE"; + break; + case LC_CTYPE: + retval = "LC_CTYPE"; + break; + case LC_MONETARY: + retval = "LC_MONETARY"; + break; + case LC_NUMERIC: + retval = "LC_NUMERIC"; + break; + case LC_TIME: + retval = "LC_TIME"; + break; + case LC_MESSAGES: + retval = "LC_MESSAGES"; + break; + default: + /* If you have a better idea for a default value let me know. */ + retval = "LC_XXX"; + } + + return retval; +} + +# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + +/* The native Windows setlocale() function expects locale names of the form + "German" or "German_Germany" or "DEU", but not "de" or "de_DE". We need + to convert the names from the form with ISO 639 language code and ISO 3166 + country code to the form with English names or with three-letter identifier. + The three-letter identifiers known by a Windows XP SP2 or SP3 are: + AFK Afrikaans_South Africa.1252 + ARA Arabic_Saudi Arabia.1256 + ARB Arabic_Lebanon.1256 + ARE Arabic_Egypt.1256 + ARG Arabic_Algeria.1256 + ARH Arabic_Bahrain.1256 + ARI Arabic_Iraq.1256 + ARJ Arabic_Jordan.1256 + ARK Arabic_Kuwait.1256 + ARL Arabic_Libya.1256 + ARM Arabic_Morocco.1256 + ARO Arabic_Oman.1256 + ARQ Arabic_Qatar.1256 + ARS Arabic_Syria.1256 + ART Arabic_Tunisia.1256 + ARU Arabic_U.A.E..1256 + ARY Arabic_Yemen.1256 + AZE Azeri (Latin)_Azerbaijan.1254 + BEL Belarusian_Belarus.1251 + BGR Bulgarian_Bulgaria.1251 + BSB Bosnian_Bosnia and Herzegovina.1250 + BSC Bosnian (Cyrillic)_Bosnia and Herzegovina.1250 (wrong encoding!) + CAT Catalan_Spain.1252 + CHH Chinese_Hong Kong S.A.R..950 + CHI Chinese_Singapore.936 + CHS Chinese_People's Republic of China.936 + CHT Chinese_Taiwan.950 + CSY Czech_Czech Republic.1250 + CYM Welsh_United Kingdom.1252 + DAN Danish_Denmark.1252 + DEA German_Austria.1252 + DEC German_Liechtenstein.1252 + DEL German_Luxembourg.1252 + DES German_Switzerland.1252 + DEU German_Germany.1252 + ELL Greek_Greece.1253 + ENA English_Australia.1252 + ENB English_Caribbean.1252 + ENC English_Canada.1252 + ENG English_United Kingdom.1252 + ENI English_Ireland.1252 + ENJ English_Jamaica.1252 + ENL English_Belize.1252 + ENP English_Republic of the Philippines.1252 + ENS English_South Africa.1252 + ENT English_Trinidad and Tobago.1252 + ENU English_United States.1252 + ENW English_Zimbabwe.1252 + ENZ English_New Zealand.1252 + ESA Spanish_Panama.1252 + ESB Spanish_Bolivia.1252 + ESC Spanish_Costa Rica.1252 + ESD Spanish_Dominican Republic.1252 + ESE Spanish_El Salvador.1252 + ESF Spanish_Ecuador.1252 + ESG Spanish_Guatemala.1252 + ESH Spanish_Honduras.1252 + ESI Spanish_Nicaragua.1252 + ESL Spanish_Chile.1252 + ESM Spanish_Mexico.1252 + ESN Spanish_Spain.1252 + ESO Spanish_Colombia.1252 + ESP Spanish_Spain.1252 + ESR Spanish_Peru.1252 + ESS Spanish_Argentina.1252 + ESU Spanish_Puerto Rico.1252 + ESV Spanish_Venezuela.1252 + ESY Spanish_Uruguay.1252 + ESZ Spanish_Paraguay.1252 + ETI Estonian_Estonia.1257 + EUQ Basque_Spain.1252 + FAR Farsi_Iran.1256 + FIN Finnish_Finland.1252 + FOS Faroese_Faroe Islands.1252 + FPO Filipino_Philippines.1252 + FRA French_France.1252 + FRB French_Belgium.1252 + FRC French_Canada.1252 + FRL French_Luxembourg.1252 + FRM French_Principality of Monaco.1252 + FRS French_Switzerland.1252 + FYN Frisian_Netherlands.1252 + GLC Galician_Spain.1252 + HEB Hebrew_Israel.1255 + HRB Croatian_Bosnia and Herzegovina.1250 + HRV Croatian_Croatia.1250 + HUN Hungarian_Hungary.1250 + IND Indonesian_Indonesia.1252 + IRE Irish_Ireland.1252 + ISL Icelandic_Iceland.1252 + ITA Italian_Italy.1252 + ITS Italian_Switzerland.1252 + IUK Inuktitut (Latin)_Canada.1252 + JPN Japanese_Japan.932 + KKZ Kazakh_Kazakhstan.1251 + KOR Korean_Korea.949 + KYR Kyrgyz_Kyrgyzstan.1251 + LBX Luxembourgish_Luxembourg.1252 + LTH Lithuanian_Lithuania.1257 + LVI Latvian_Latvia.1257 + MKI FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251 + MON Mongolian_Mongolia.1251 + MPD Mapudungun_Chile.1252 + MSB Malay_Brunei Darussalam.1252 + MSL Malay_Malaysia.1252 + MWK Mohawk_Canada.1252 + NLB Dutch_Belgium.1252 + NLD Dutch_Netherlands.1252 + NON Norwegian-Nynorsk_Norway.1252 + NOR Norwegian (Bokmål)_Norway.1252 + NSO Northern Sotho_South Africa.1252 + PLK Polish_Poland.1250 + PTB Portuguese_Brazil.1252 + PTG Portuguese_Portugal.1252 + QUB Quechua_Bolivia.1252 + QUE Quechua_Ecuador.1252 + QUP Quechua_Peru.1252 + RMC Romansh_Switzerland.1252 + ROM Romanian_Romania.1250 + RUS Russian_Russia.1251 + SKY Slovak_Slovakia.1250 + SLV Slovenian_Slovenia.1250 + SMA Sami (Southern)_Norway.1252 + SMB Sami (Southern)_Sweden.1252 + SME Sami (Northern)_Norway.1252 + SMF Sami (Northern)_Sweden.1252 + SMG Sami (Northern)_Finland.1252 + SMJ Sami (Lule)_Norway.1252 + SMK Sami (Lule)_Sweden.1252 + SMN Sami (Inari)_Finland.1252 + SMS Sami (Skolt)_Finland.1252 + SQI Albanian_Albania.1250 + SRB Serbian (Cyrillic)_Serbia and Montenegro.1251 + SRL Serbian (Latin)_Serbia and Montenegro.1250 + SRN Serbian (Cyrillic)_Bosnia and Herzegovina.1251 + SRS Serbian (Latin)_Bosnia and Herzegovina.1250 + SVE Swedish_Sweden.1252 + SVF Swedish_Finland.1252 + SWK Swahili_Kenya.1252 + THA Thai_Thailand.874 + TRK Turkish_Turkey.1254 + TSN Tswana_South Africa.1252 + TTT Tatar_Russia.1251 + UKR Ukrainian_Ukraine.1251 + URD Urdu_Islamic Republic of Pakistan.1256 + USA English_United States.1252 + UZB Uzbek (Latin)_Uzbekistan.1254 + VIT Vietnamese_Viet Nam.1258 + XHO Xhosa_South Africa.1252 + ZHH Chinese_Hong Kong S.A.R..950 + ZHI Chinese_Singapore.936 + ZHM Chinese_Macau S.A.R..950 + ZUL Zulu_South Africa.1252 + */ + +/* Table from ISO 639 language code, optionally with country or script suffix, + to English name. + Keep in sync with the gl_locale_name_from_win32_LANGID function in + localename.c! */ +struct table_entry +{ + const char *code; + const char *english; +}; +static const struct table_entry language_table[] = + { + { "af", "Afrikaans" }, + { "am", "Amharic" }, + { "ar", "Arabic" }, + { "arn", "Mapudungun" }, + { "as", "Assamese" }, + { "az@cyrillic", "Azeri (Cyrillic)" }, + { "az@latin", "Azeri (Latin)" }, + { "ba", "Bashkir" }, + { "be", "Belarusian" }, + { "ber", "Tamazight" }, + { "ber@arabic", "Tamazight (Arabic)" }, + { "ber@latin", "Tamazight (Latin)" }, + { "bg", "Bulgarian" }, + { "bin", "Edo" }, + { "bn", "Bengali" }, + { "bn_BD", "Bengali (Bangladesh)" }, + { "bn_IN", "Bengali (India)" }, + { "bnt", "Sutu" }, + { "bo", "Tibetan" }, + { "br", "Breton" }, + { "bs", "BSB" }, /* "Bosnian (Latin)" */ + { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */ + { "ca", "Catalan" }, + { "chr", "Cherokee" }, + { "co", "Corsican" }, + { "cpe", "Hawaiian" }, + { "cs", "Czech" }, + { "cy", "Welsh" }, + { "da", "Danish" }, + { "de", "German" }, + { "dsb", "Lower Sorbian" }, + { "dv", "Divehi" }, + { "el", "Greek" }, + { "en", "English" }, + { "es", "Spanish" }, + { "et", "Estonian" }, + { "eu", "Basque" }, + { "fa", "Farsi" }, + { "ff", "Fulfulde" }, + { "fi", "Finnish" }, + { "fo", "Faroese" }, /* "Faeroese" does not work */ + { "fr", "French" }, + { "fy", "Frisian" }, + { "ga", "IRE" }, /* Gaelic (Ireland) */ + { "gd", "Gaelic (Scotland)" }, + { "gd", "Scottish Gaelic" }, + { "gl", "Galician" }, + { "gn", "Guarani" }, + { "gsw", "Alsatian" }, + { "gu", "Gujarati" }, + { "ha", "Hausa" }, + { "he", "Hebrew" }, + { "hi", "Hindi" }, + { "hr", "Croatian" }, + { "hsb", "Upper Sorbian" }, + { "hu", "Hungarian" }, + { "hy", "Armenian" }, + { "id", "Indonesian" }, + { "ig", "Igbo" }, + { "ii", "Yi" }, + { "is", "Icelandic" }, + { "it", "Italian" }, + { "iu", "IUK" }, /* Inuktitut */ + { "ja", "Japanese" }, + { "ka", "Georgian" }, + { "kk", "Kazakh" }, + { "kl", "Greenlandic" }, + { "km", "Cambodian" }, + { "km", "Khmer" }, + { "kn", "Kannada" }, + { "ko", "Korean" }, + { "kok", "Konkani" }, + { "kr", "Kanuri" }, + { "ks", "Kashmiri" }, + { "ks_IN", "Kashmiri_India" }, + { "ks_PK", "Kashmiri (Arabic)_Pakistan" }, + { "ky", "Kyrgyz" }, + { "la", "Latin" }, + { "lb", "Luxembourgish" }, + { "lo", "Lao" }, + { "lt", "Lithuanian" }, + { "lv", "Latvian" }, + { "mi", "Maori" }, + { "mk", "FYRO Macedonian" }, + { "mk", "Macedonian" }, + { "ml", "Malayalam" }, + { "mn", "Mongolian" }, + { "mni", "Manipuri" }, + { "moh", "Mohawk" }, + { "mr", "Marathi" }, + { "ms", "Malay" }, + { "mt", "Maltese" }, + { "my", "Burmese" }, + { "nb", "NOR" }, /* Norwegian Bokmål */ + { "ne", "Nepali" }, + { "nic", "Ibibio" }, + { "nl", "Dutch" }, + { "nn", "NON" }, /* Norwegian Nynorsk */ + { "no", "Norwegian" }, + { "nso", "Northern Sotho" }, + { "nso", "Sepedi" }, + { "oc", "Occitan" }, + { "om", "Oromo" }, + { "or", "Oriya" }, + { "pa", "Punjabi" }, + { "pap", "Papiamentu" }, + { "pl", "Polish" }, + { "prs", "Dari" }, + { "ps", "Pashto" }, + { "pt", "Portuguese" }, + { "qu", "Quechua" }, + { "qut", "K'iche'" }, + { "rm", "Romansh" }, + { "ro", "Romanian" }, + { "ru", "Russian" }, + { "rw", "Kinyarwanda" }, + { "sa", "Sanskrit" }, + { "sah", "Yakut" }, + { "sd", "Sindhi" }, + { "se", "Sami (Northern)" }, + { "se", "Northern Sami" }, + { "si", "Sinhalese" }, + { "sk", "Slovak" }, + { "sl", "Slovenian" }, + { "sma", "Sami (Southern)" }, + { "sma", "Southern Sami" }, + { "smj", "Sami (Lule)" }, + { "smj", "Lule Sami" }, + { "smn", "Sami (Inari)" }, + { "smn", "Inari Sami" }, + { "sms", "Sami (Skolt)" }, + { "sms", "Skolt Sami" }, + { "so", "Somali" }, + { "sq", "Albanian" }, + { "sr", "Serbian (Latin)" }, + { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */ + { "sw", "Swahili" }, + { "syr", "Syriac" }, + { "ta", "Tamil" }, + { "te", "Telugu" }, + { "tg", "Tajik" }, + { "th", "Thai" }, + { "ti", "Tigrinya" }, + { "tk", "Turkmen" }, + { "tl", "Filipino" }, + { "tn", "Tswana" }, + { "tr", "Turkish" }, + { "ts", "Tsonga" }, + { "tt", "Tatar" }, + { "ug", "Uighur" }, + { "uk", "Ukrainian" }, + { "ur", "Urdu" }, + { "uz", "Uzbek" }, + { "uz", "Uzbek (Latin)" }, + { "uz@cyrillic", "Uzbek (Cyrillic)" }, + { "ve", "Venda" }, + { "vi", "Vietnamese" }, + { "wen", "Sorbian" }, + { "wo", "Wolof" }, + { "xh", "Xhosa" }, + { "yi", "Yiddish" }, + { "yo", "Yoruba" }, + { "zh", "Chinese" }, + { "zu", "Zulu" } + }; + +/* Table from ISO 3166 country code to English name. + Keep in sync with the gl_locale_name_from_win32_LANGID function in + localename.c! */ +static const struct table_entry country_table[] = + { + { "AE", "U.A.E." }, + { "AF", "Afghanistan" }, + { "AL", "Albania" }, + { "AM", "Armenia" }, + { "AN", "Netherlands Antilles" }, + { "AR", "Argentina" }, + { "AT", "Austria" }, + { "AU", "Australia" }, + { "AZ", "Azerbaijan" }, + { "BA", "Bosnia and Herzegovina" }, + { "BD", "Bangladesh" }, + { "BE", "Belgium" }, + { "BG", "Bulgaria" }, + { "BH", "Bahrain" }, + { "BN", "Brunei Darussalam" }, + { "BO", "Bolivia" }, + { "BR", "Brazil" }, + { "BT", "Bhutan" }, + { "BY", "Belarus" }, + { "BZ", "Belize" }, + { "CA", "Canada" }, + { "CG", "Congo" }, + { "CH", "Switzerland" }, + { "CI", "Cote d'Ivoire" }, + { "CL", "Chile" }, + { "CM", "Cameroon" }, + { "CN", "People's Republic of China" }, + { "CO", "Colombia" }, + { "CR", "Costa Rica" }, + { "CS", "Serbia and Montenegro" }, + { "CZ", "Czech Republic" }, + { "DE", "Germany" }, + { "DK", "Denmark" }, + { "DO", "Dominican Republic" }, + { "DZ", "Algeria" }, + { "EC", "Ecuador" }, + { "EE", "Estonia" }, + { "EG", "Egypt" }, + { "ER", "Eritrea" }, + { "ES", "Spain" }, + { "ET", "Ethiopia" }, + { "FI", "Finland" }, + { "FO", "Faroe Islands" }, + { "FR", "France" }, + { "GB", "United Kingdom" }, + { "GD", "Caribbean" }, + { "GE", "Georgia" }, + { "GL", "Greenland" }, + { "GR", "Greece" }, + { "GT", "Guatemala" }, + { "HK", "Hong Kong" }, + { "HK", "Hong Kong S.A.R." }, + { "HN", "Honduras" }, + { "HR", "Croatia" }, + { "HT", "Haiti" }, + { "HU", "Hungary" }, + { "ID", "Indonesia" }, + { "IE", "Ireland" }, + { "IL", "Israel" }, + { "IN", "India" }, + { "IQ", "Iraq" }, + { "IR", "Iran" }, + { "IS", "Iceland" }, + { "IT", "Italy" }, + { "JM", "Jamaica" }, + { "JO", "Jordan" }, + { "JP", "Japan" }, + { "KE", "Kenya" }, + { "KG", "Kyrgyzstan" }, + { "KH", "Cambodia" }, + { "KR", "South Korea" }, + { "KW", "Kuwait" }, + { "KZ", "Kazakhstan" }, + { "LA", "Laos" }, + { "LB", "Lebanon" }, + { "LI", "Liechtenstein" }, + { "LK", "Sri Lanka" }, + { "LT", "Lithuania" }, + { "LU", "Luxembourg" }, + { "LV", "Latvia" }, + { "LY", "Libya" }, + { "MA", "Morocco" }, + { "MC", "Principality of Monaco" }, + { "MD", "Moldava" }, + { "MD", "Moldova" }, + { "ME", "Montenegro" }, + { "MK", "Former Yugoslav Republic of Macedonia" }, + { "ML", "Mali" }, + { "MM", "Myanmar" }, + { "MN", "Mongolia" }, + { "MO", "Macau S.A.R." }, + { "MT", "Malta" }, + { "MV", "Maldives" }, + { "MX", "Mexico" }, + { "MY", "Malaysia" }, + { "NG", "Nigeria" }, + { "NI", "Nicaragua" }, + { "NL", "Netherlands" }, + { "NO", "Norway" }, + { "NP", "Nepal" }, + { "NZ", "New Zealand" }, + { "OM", "Oman" }, + { "PA", "Panama" }, + { "PE", "Peru" }, + { "PH", "Philippines" }, + { "PK", "Islamic Republic of Pakistan" }, + { "PL", "Poland" }, + { "PR", "Puerto Rico" }, + { "PT", "Portugal" }, + { "PY", "Paraguay" }, + { "QA", "Qatar" }, + { "RE", "Reunion" }, + { "RO", "Romania" }, + { "RS", "Serbia" }, + { "RU", "Russia" }, + { "RW", "Rwanda" }, + { "SA", "Saudi Arabia" }, + { "SE", "Sweden" }, + { "SG", "Singapore" }, + { "SI", "Slovenia" }, + { "SK", "Slovak" }, + { "SN", "Senegal" }, + { "SO", "Somalia" }, + { "SR", "Suriname" }, + { "SV", "El Salvador" }, + { "SY", "Syria" }, + { "TH", "Thailand" }, + { "TJ", "Tajikistan" }, + { "TM", "Turkmenistan" }, + { "TN", "Tunisia" }, + { "TR", "Turkey" }, + { "TT", "Trinidad and Tobago" }, + { "TW", "Taiwan" }, + { "TZ", "Tanzania" }, + { "UA", "Ukraine" }, + { "US", "United States" }, + { "UY", "Uruguay" }, + { "VA", "Vatican" }, + { "VE", "Venezuela" }, + { "VN", "Viet Nam" }, + { "YE", "Yemen" }, + { "ZA", "South Africa" }, + { "ZW", "Zimbabwe" } + }; + +/* Given a string STRING, find the set of indices i such that TABLE[i].code is + the given STRING. It is a range [lo,hi-1]. */ +typedef struct { size_t lo; size_t hi; } range_t; +static void +search (const struct table_entry *table, size_t table_size, const char *string, + range_t *result) +{ + /* The table is sorted. Perform a binary search. */ + size_t hi = table_size; + size_t lo = 0; + while (lo < hi) + { + /* Invariant: + for i < lo, strcmp (table[i].code, string) < 0, + for i >= hi, strcmp (table[i].code, string) > 0. */ + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ + int cmp = strcmp (table[mid].code, string); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0) + hi = mid; + else + { + /* Found an i with + strcmp (language_table[i].code, string) == 0. + Find the entire interval of such i. */ + { + size_t i; + + for (i = mid; i > lo; ) + { + i--; + if (strcmp (table[i].code, string) < 0) + { + lo = i + 1; + break; + } + } + } + { + size_t i; + + for (i = mid; i < hi; i++) + { + if (strcmp (table[i].code, string) > 0) + { + hi = i; + break; + } + } + } + /* The set of i with + strcmp (language_table[i].code, string) == 0 + is the interval [lo, hi-1]. */ + break; + } + } + result->lo = lo; + result->hi = hi; +} + +/* Like setlocale, but accept also locale names in the form ll or ll_CC, + where ll is an ISO 639 language code and CC is an ISO 3166 country code. */ +static char * +setlocale_unixlike (int category, const char *locale) +{ + char *result; + char llCC_buf[64]; + char ll_buf[64]; + char CC_buf[64]; + + /* First, try setlocale with the original argument unchanged. */ + result = setlocale (category, locale); + if (result != NULL) + return result; + + /* Otherwise, assume the argument is in the form + language[_territory][.codeset][@modifier] + and try to map it using the tables. */ + if (strlen (locale) < sizeof (llCC_buf)) + { + /* Second try: Remove the codeset part. */ + { + const char *p = locale; + char *q = llCC_buf; + + /* Copy the part before the dot. */ + for (; *p != '\0' && *p != '.'; p++, q++) + *q = *p; + if (*p == '.') + /* Skip the part up to the '@', if any. */ + for (; *p != '\0' && *p != '@'; p++) + ; + /* Copy the part starting with '@', if any. */ + for (; *p != '\0'; p++, q++) + *q = *p; + *q = '\0'; + } + /* llCC_buf now contains + language[_territory][@modifier] + */ + if (strcmp (llCC_buf, locale) != 0) + { + result = setlocale (category, llCC_buf); + if (result != NULL) + return result; + } + /* Look it up in language_table. */ + { + range_t range; + size_t i; + + search (language_table, + sizeof (language_table) / sizeof (language_table[0]), + llCC_buf, + &range); + + for (i = range.lo; i < range.hi; i++) + { + /* Try the replacement in language_table[i]. */ + result = setlocale (category, language_table[i].english); + if (result != NULL) + return result; + } + } + /* Split language[_territory][@modifier] + into ll_buf = language[@modifier] + and CC_buf = territory + */ + { + const char *underscore = strchr (llCC_buf, '_'); + if (underscore != NULL) + { + const char *territory_start = underscore + 1; + const char *territory_end = strchr (territory_start, '@'); + if (territory_end == NULL) + territory_end = territory_start + strlen (territory_start); + + memcpy (ll_buf, llCC_buf, underscore - llCC_buf); + strcpy (ll_buf + (underscore - llCC_buf), territory_end); + + memcpy (CC_buf, territory_start, territory_end - territory_start); + CC_buf[territory_end - territory_start] = '\0'; + + { + /* Look up ll_buf in language_table + and CC_buf in country_table. */ + range_t language_range; + + search (language_table, + sizeof (language_table) / sizeof (language_table[0]), + ll_buf, + &language_range); + if (language_range.lo < language_range.hi) + { + range_t country_range; + + search (country_table, + sizeof (country_table) / sizeof (country_table[0]), + CC_buf, + &country_range); + if (country_range.lo < country_range.hi) + { + size_t i; + size_t j; + + for (i = language_range.lo; i < language_range.hi; i++) + for (j = country_range.lo; j < country_range.hi; j++) + { + /* Concatenate the replacements. */ + const char *part1 = language_table[i].english; + size_t part1_len = strlen (part1); + const char *part2 = country_table[j].english; + size_t part2_len = strlen (part2) + 1; + char buf[64+64]; + + if (!(part1_len + 1 + part2_len <= sizeof (buf))) + abort (); + memcpy (buf, part1, part1_len); + buf[part1_len] = '_'; + memcpy (buf + part1_len + 1, part2, part2_len); + + /* Try the concatenated replacements. */ + result = setlocale (category, buf); + if (result != NULL) + return result; + } + } + + /* Try omitting the country entirely. This may set a locale + corresponding to the wrong country, but is better than + failing entirely. */ + { + size_t i; + + for (i = language_range.lo; i < language_range.hi; i++) + { + /* Try only the language replacement. */ + result = + setlocale (category, language_table[i].english); + if (result != NULL) + return result; + } + } + } + } + } + } + } + + /* Failed. */ + return NULL; +} + +# else +# define setlocale_unixlike setlocale +# endif + +# if LC_MESSAGES == 1729 + +/* The system does not store an LC_MESSAGES locale category. Do it here. */ +static char lc_messages_name[64] = "C"; + +/* Like setlocale, but support also LC_MESSAGES. */ +static char * +setlocale_single (int category, const char *locale) +{ + if (category == LC_MESSAGES) + { + if (locale != NULL) + { + lc_messages_name[sizeof (lc_messages_name) - 1] = '\0'; + strncpy (lc_messages_name, locale, sizeof (lc_messages_name) - 1); + } + return lc_messages_name; + } + else + return setlocale_unixlike (category, locale); +} + +# else +# define setlocale_single setlocale_unixlike +# endif + +char * +rpl_setlocale (int category, const char *locale) +{ + if (locale != NULL && locale[0] == '\0') + { + /* A request to the set the current locale to the default locale. */ + if (category == LC_ALL) + { + /* Set LC_CTYPE first. Then the other categories. */ + static int const categories[] = + { + LC_NUMERIC, + LC_TIME, + LC_COLLATE, + LC_MONETARY, + LC_MESSAGES + }; + char *saved_locale; + const char *base_name; + unsigned int i; + + /* Back up the old locale, in case one of the steps fails. */ + saved_locale = setlocale (LC_ALL, NULL); + if (saved_locale == NULL) + return NULL; + saved_locale = strdup (saved_locale); + if (saved_locale == NULL) + return NULL; + + /* Set LC_CTYPE category. Set all other categories (except possibly + LC_MESSAGES) to the same value in the same call; this is likely to + save calls. */ + base_name = + gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE)); + if (base_name == NULL) + base_name = gl_locale_name_default (); + + if (setlocale_unixlike (LC_ALL, base_name) == NULL) + goto fail; +# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + /* On native Windows, setlocale(LC_ALL,...) may succeed but set the + LC_CTYPE category to an invalid value ("C") when it does not + support the specified encoding. Report a failure instead. */ + if (strchr (base_name, '.') != NULL + && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + goto fail; +# endif + + for (i = 0; i < sizeof (categories) / sizeof (categories[0]); i++) + { + int cat = categories[i]; + const char *name; + + name = gl_locale_name_environ (cat, category_to_name (cat)); + if (name == NULL) + name = gl_locale_name_default (); + + /* If name is the same as base_name, it has already been set + through the setlocale call before the loop. */ + if (strcmp (name, base_name) != 0 +# if LC_MESSAGES == 1729 + || cat == LC_MESSAGES +# endif + ) + if (setlocale_single (cat, name) == NULL) + goto fail; + } + + /* All steps were successful. */ + free (saved_locale); + return setlocale (LC_ALL, NULL); + + fail: + if (saved_locale[0] != '\0') /* don't risk an endless recursion */ + setlocale (LC_ALL, saved_locale); + free (saved_locale); + return NULL; + } + else + { + const char *name = + gl_locale_name_environ (category, category_to_name (category)); + if (name == NULL) + name = gl_locale_name_default (); + + return setlocale_single (category, name); + } + } + else + { +# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL) + { + char *saved_locale; + + /* Back up the old locale. */ + saved_locale = setlocale (LC_ALL, NULL); + if (saved_locale == NULL) + return NULL; + saved_locale = strdup (saved_locale); + if (saved_locale == NULL) + return NULL; + + if (setlocale_unixlike (LC_ALL, locale) == NULL) + { + free (saved_locale); + return NULL; + } + + /* On native Windows, setlocale(LC_ALL,...) may succeed but set the + LC_CTYPE category to an invalid value ("C") when it does not + support the specified encoding. Report a failure instead. */ + if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + { + if (saved_locale[0] != '\0') /* don't risk an endless recursion */ + setlocale (LC_ALL, saved_locale); + free (saved_locale); + return NULL; + } + + /* It was really successful. */ + free (saved_locale); + return setlocale (LC_ALL, NULL); + } + else +# endif + return setlocale_single (category, locale); + } +} + +#endif diff --git a/gl/tests/test-c-strcase.sh b/gl/tests/test-c-strcase.sh new file mode 100755 index 0000000000..5fcf906b66 --- /dev/null +++ b/gl/tests/test-c-strcase.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# Test in the C locale. +./test-c-strcasecmp${EXEEXT} || exit 1 +./test-c-strncasecmp${EXEEXT} || exit 1 + +# Test in an ISO-8859-1 or ISO-8859-15 locale. +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR != none; then + LC_ALL=$LOCALE_FR ./test-c-strcasecmp${EXEEXT} locale || exit 1 + LC_ALL=$LOCALE_FR ./test-c-strncasecmp${EXEEXT} locale || exit 1 +fi + +# Test in a Turkish UTF-8 locale. +: ${LOCALE_TR_UTF8=tr_TR.UTF-8} +if test $LOCALE_TR_UTF8 != none; then + LC_ALL=$LOCALE_TR_UTF8 ./test-c-strcasecmp${EXEEXT} locale || exit 1 + LC_ALL=$LOCALE_TR_UTF8 ./test-c-strncasecmp${EXEEXT} locale || exit 1 +fi + +exit 0 diff --git a/gl/tests/test-c-strcasecmp.c b/gl/tests/test-c-strcasecmp.c new file mode 100644 index 0000000000..1eb595887c --- /dev/null +++ b/gl/tests/test-c-strcasecmp.c @@ -0,0 +1,65 @@ +/* Test of case-insensitive string comparison function. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ + +#include <config.h> + +#include "c-strcase.h" + +#include <locale.h> +#include <string.h> + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + if (argc > 1) + { + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + } + + ASSERT (c_strcasecmp ("paragraph", "Paragraph") == 0); + + ASSERT (c_strcasecmp ("paragrapH", "parAgRaph") == 0); + + ASSERT (c_strcasecmp ("paragraph", "paraLyzed") < 0); + ASSERT (c_strcasecmp ("paraLyzed", "paragraph") > 0); + + ASSERT (c_strcasecmp ("para", "paragraph") < 0); + ASSERT (c_strcasecmp ("paragraph", "para") > 0); + + /* The following tests shows how c_strcasecmp() is different from + strcasecmp(). */ + + ASSERT (c_strcasecmp ("\311mile", "\351mile") < 0); + ASSERT (c_strcasecmp ("\351mile", "\311mile") > 0); + + /* The following tests shows how c_strcasecmp() is different from + mbscasecmp(). */ + + ASSERT (c_strcasecmp ("\303\266zg\303\274r", "\303\226ZG\303\234R") > 0); /* özgür */ + ASSERT (c_strcasecmp ("\303\226ZG\303\234R", "\303\266zg\303\274r") < 0); /* özgür */ + + /* This test shows how strings of different size cannot compare equal. */ + ASSERT (c_strcasecmp ("turkish", "TURK\304\260SH") < 0); + ASSERT (c_strcasecmp ("TURK\304\260SH", "turkish") > 0); + + return 0; +} diff --git a/gl/tests/test-c-strncasecmp.c b/gl/tests/test-c-strncasecmp.c new file mode 100644 index 0000000000..479528b37f --- /dev/null +++ b/gl/tests/test-c-strncasecmp.c @@ -0,0 +1,79 @@ +/* Test of case-insensitive string comparison function. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ + +#include <config.h> + +#include "c-strcase.h" + +#include <locale.h> +#include <string.h> + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + if (argc > 1) + { + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + } + + ASSERT (c_strncasecmp ("paragraph", "Paragraph", 1000000) == 0); + ASSERT (c_strncasecmp ("paragraph", "Paragraph", 9) == 0); + + ASSERT (c_strncasecmp ("paragrapH", "parAgRaph", 1000000) == 0); + ASSERT (c_strncasecmp ("paragrapH", "parAgRaph", 9) == 0); + + ASSERT (c_strncasecmp ("paragraph", "paraLyzed", 10) < 0); + ASSERT (c_strncasecmp ("paragraph", "paraLyzed", 9) < 0); + ASSERT (c_strncasecmp ("paragraph", "paraLyzed", 5) < 0); + ASSERT (c_strncasecmp ("paragraph", "paraLyzed", 4) == 0); + ASSERT (c_strncasecmp ("paraLyzed", "paragraph", 10) > 0); + ASSERT (c_strncasecmp ("paraLyzed", "paragraph", 9) > 0); + ASSERT (c_strncasecmp ("paraLyzed", "paragraph", 5) > 0); + ASSERT (c_strncasecmp ("paraLyzed", "paragraph", 4) == 0); + + ASSERT (c_strncasecmp ("para", "paragraph", 10) < 0); + ASSERT (c_strncasecmp ("para", "paragraph", 9) < 0); + ASSERT (c_strncasecmp ("para", "paragraph", 5) < 0); + ASSERT (c_strncasecmp ("para", "paragraph", 4) == 0); + ASSERT (c_strncasecmp ("paragraph", "para", 10) > 0); + ASSERT (c_strncasecmp ("paragraph", "para", 9) > 0); + ASSERT (c_strncasecmp ("paragraph", "para", 5) > 0); + ASSERT (c_strncasecmp ("paragraph", "para", 4) == 0); + + /* The following tests shows how c_strncasecmp() is different from + strncasecmp(). */ + + ASSERT (c_strncasecmp ("\311mily", "\351mile", 4) < 0); + ASSERT (c_strncasecmp ("\351mile", "\311mily", 4) > 0); + + /* The following tests shows how c_strncasecmp() is different from + mbsncasecmp(). */ + + ASSERT (c_strncasecmp ("\303\266zg\303\274r", "\303\226ZG\303\234R", 99) > 0); /* özgür */ + ASSERT (c_strncasecmp ("\303\226ZG\303\234R", "\303\266zg\303\274r", 99) < 0); /* özgür */ + + /* This test shows how strings of different size cannot compare equal. */ + ASSERT (c_strncasecmp ("turkish", "TURK\304\260SH", 7) < 0); + ASSERT (c_strncasecmp ("TURK\304\260SH", "turkish", 7) > 0); + + return 0; +} diff --git a/gl/tests/test-iconv-h.c b/gl/tests/test-iconv-h.c new file mode 100644 index 0000000000..bc2bfb8f1e --- /dev/null +++ b/gl/tests/test-iconv-h.c @@ -0,0 +1,27 @@ +/* Test of <iconv.h> substitute. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#if HAVE_ICONV +# include <iconv.h> +#endif + +int +main () +{ + return 0; +} diff --git a/gl/tests/test-iconv-utf.c b/gl/tests/test-iconv-utf.c new file mode 100644 index 0000000000..c02735e172 --- /dev/null +++ b/gl/tests/test-iconv-utf.c @@ -0,0 +1,288 @@ +/* Test of character set conversion. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ + +#include <config.h> + +#if HAVE_ICONV +# include <iconv.h> +#endif + +#include <errno.h> +#include <string.h> + +#include "macros.h" + +int +main () +{ +#if HAVE_ICONV + /* Assume that iconv() supports at least the encoding UTF-8. */ + + /* The text is "Japanese (日本語) [\U0001D50D\U0001D51E\U0001D52D]". */ + + /* Test conversion from UTF-8 to UTF-16BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-16BE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-8 to UTF-16LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000a\000p\000a\000n\000e\000s\000e\000 \000(\000\345\145\054\147\236\212)\000 \000[\000\065\330\015\335\065\330\036\335\065\330\055\335]\000"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-16LE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-8 to UTF-32BE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "\000\000\000J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\145\345\000\000\147\054\000\000\212\236\000\000\000)\000\000\000 \000\000\000[\000\001\325\015\000\001\325\036\000\001\325\055\000\000\000]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-32BE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-8 to UTF-32LE with no errors. */ + { + static const char input[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + static const char expected[] = + "J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\000\345\145\000\000\054\147\000\000\236\212\000\000)\000\000\000 \000\000\000[\000\000\000\015\325\001\000\036\325\001\000\055\325\001\000]\000\000\000"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-32LE", "UTF-8"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-16BE to UTF-8 with no errors. */ + { + static const char input[] = + "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-8", "UTF-16BE"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-16LE to UTF-8 with no errors. */ + { + static const char input[] = + "J\000a\000p\000a\000n\000e\000s\000e\000 \000(\000\345\145\054\147\236\212)\000 \000[\000\065\330\015\335\065\330\036\335\065\330\055\335]\000"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-8", "UTF-16LE"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-32BE to UTF-8 with no errors. */ + { + static const char input[] = + "\000\000\000J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\145\345\000\000\147\054\000\000\212\236\000\000\000)\000\000\000 \000\000\000[\000\001\325\015\000\001\325\036\000\001\325\055\000\000\000]"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-8", "UTF-32BE"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } + + /* Test conversion from UTF-32LE to UTF-8 with no errors. */ + { + static const char input[] = + "J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\000\345\145\000\000\054\147\000\000\236\212\000\000)\000\000\000 \000\000\000[\000\000\000\015\325\001\000\036\325\001\000\055\325\001\000]\000\000\000"; + static const char expected[] = + "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]"; + iconv_t cd; + char buf[100]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; + size_t res; + + cd = iconv_open ("UTF-8", "UTF-32LE"); + ASSERT (cd != (iconv_t)(-1)); + + inptr = input; + inbytesleft = sizeof (input) - 1; + outptr = buf; + outbytesleft = sizeof (buf); + res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytesleft, + &outptr, &outbytesleft); + ASSERT (res == 0 && inbytesleft == 0); + ASSERT (outptr == buf + (sizeof (expected) - 1)); + ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0); + + ASSERT (iconv_close (cd) == 0); + } +#endif + + return 0; +} diff --git a/gl/tests/test-locale.c b/gl/tests/test-locale.c new file mode 100644 index 0000000000..e884ea1001 --- /dev/null +++ b/gl/tests/test-locale.c @@ -0,0 +1,79 @@ +/* Test of <locale.h> substitute. + Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ + +#include <config.h> + +#include <locale.h> + +#include "verify.h" + +int a[] = + { + LC_ALL, + LC_COLLATE, + LC_CTYPE, + LC_MESSAGES, + LC_MONETARY, + LC_NUMERIC, + LC_TIME + }; + +#if HAVE_NEWLOCALE +/* Check that the locale_t type and the LC_GLOBAL_LOCALE macro are defined. */ +locale_t b = LC_GLOBAL_LOCALE; +#endif + +/* Check that the 'struct lconv' type is defined. */ +struct lconv l; +int ls; + +/* Check that NULL can be passed through varargs as a pointer type, + per POSIX 2008. */ +verify (sizeof NULL == sizeof (void *)); + +int +main () +{ + /* Check that 'struct lconv' has the ISO C and POSIX specified members. */ + ls += sizeof (*l.decimal_point); + ls += sizeof (*l.thousands_sep); + ls += sizeof (*l.grouping); + ls += sizeof (*l.mon_decimal_point); + ls += sizeof (*l.mon_thousands_sep); + ls += sizeof (*l.mon_grouping); + ls += sizeof (*l.positive_sign); + ls += sizeof (*l.negative_sign); + ls += sizeof (*l.currency_symbol); + ls += sizeof (l.frac_digits); + ls += sizeof (l.p_cs_precedes); + ls += sizeof (l.p_sign_posn); + ls += sizeof (l.p_sep_by_space); + ls += sizeof (l.n_cs_precedes); + ls += sizeof (l.n_sign_posn); + ls += sizeof (l.n_sep_by_space); + ls += sizeof (*l.int_curr_symbol); + ls += sizeof (l.int_frac_digits); + ls += sizeof (l.int_p_cs_precedes); + ls += sizeof (l.int_p_sign_posn); + ls += sizeof (l.int_p_sep_by_space); + ls += sizeof (l.int_n_cs_precedes); + ls += sizeof (l.int_n_sign_posn); + ls += sizeof (l.int_n_sep_by_space); + + return 0; +} diff --git a/gl/tests/test-localename.c b/gl/tests/test-localename.c new file mode 100644 index 0000000000..37b76a385b --- /dev/null +++ b/gl/tests/test-localename.c @@ -0,0 +1,747 @@ +/* Test of gl_locale_name function and its variants. + Copyright (C) 2007-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2007. */ + +#include <config.h> + +#include "localename.h" + +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +#include "macros.h" + + +#if HAVE_NEWLOCALE + +static struct { int cat; int mask; const char *string; } const categories[] = + { + { LC_CTYPE, LC_CTYPE_MASK, "LC_CTYPE" }, + { LC_NUMERIC, LC_NUMERIC_MASK, "LC_NUMERIC" }, + { LC_TIME, LC_TIME_MASK, "LC_TIME" }, + { LC_COLLATE, LC_COLLATE_MASK, "LC_COLLATE" }, + { LC_MONETARY, LC_MONETARY_MASK, "LC_MONETARY" }, + { LC_MESSAGES, LC_MESSAGES_MASK, "LC_MESSAGES" } +# ifdef LC_PAPER + , { LC_PAPER, LC_PAPER_MASK, "LC_PAPER" } +# endif +# ifdef LC_NAME + , { LC_NAME, LC_NAME_MASK, "LC_NAME" } +# endif +# ifdef LC_ADDRESS + , { LC_ADDRESS, LC_ADDRESS_MASK, "LC_ADDRESS" } +# endif +# ifdef LC_TELEPHONE + , { LC_TELEPHONE, LC_TELEPHONE_MASK, "LC_TELEPHONE" } +# endif +# ifdef LC_MEASUREMENT + , { LC_MEASUREMENT, LC_MEASUREMENT_MASK, "LC_MEASUREMENT" } +# endif +# ifdef LC_IDENTIFICATION + , { LC_IDENTIFICATION, LC_IDENTIFICATION_MASK, "LC_IDENTIFICATION" } +# endif + }; + +#endif + +/* Test the gl_locale_name() function. */ +static void +test_locale_name (void) +{ + const char *name; + + /* Check that gl_locale_name returns non-NULL. */ + ASSERT (gl_locale_name (LC_MESSAGES, "LC_MESSAGES") != NULL); + + /* Get into a defined state, */ + setlocale (LC_ALL, "en_US.UTF-8"); +#if HAVE_NEWLOCALE + uselocale (LC_GLOBAL_LOCALE); +#endif + + /* Check that when all environment variables are unset, + gl_locale_name returns the default locale. */ + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LC_NUMERIC"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), + gl_locale_name_default ()) == 0); + ASSERT (strcmp (gl_locale_name (LC_NUMERIC, "LC_NUMERIC"), + gl_locale_name_default ()) == 0); + + /* Check that an empty environment variable is treated like an unset + environment variable. */ + + setenv ("LC_ALL", "", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), + gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "", 1); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), + gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "", 1); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), + gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "", 1); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), + gl_locale_name_default ()) == 0); + + /* Check that LC_ALL overrides the others, and LANG is overridden by the + others. */ + + setenv ("LC_ALL", "C", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "C", 1); + setenv ("LC_MESSAGES", "C", 1); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "C", 1); + setlocale (LC_ALL, ""); + ASSERT (strcmp (gl_locale_name (LC_MESSAGES, "LC_MESSAGES"), "C") == 0); + + /* Check mixed situations. */ + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + setenv ("LANG", "de_DE.UTF-8", 1); + if (setlocale (LC_ALL, "") != NULL) + { + name = gl_locale_name (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, "de_DE.UTF-8") == 0); + name = gl_locale_name (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + unsetenv ("LANG"); + if (setlocale (LC_ALL, "") != NULL) + { + name = gl_locale_name (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, gl_locale_name_default ()) == 0); + name = gl_locale_name (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + +#if HAVE_NEWLOCALE + /* Check that gl_locale_name considers the thread locale. */ + { + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + uselocale (locale); + name = gl_locale_name (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + name = gl_locale_name (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + } + + /* Check that gl_locale_name distinguishes different categories of the + thread locale, and that the name is the right one for each. */ + { + unsigned int i; + + for (i = 0; i < SIZEOF (categories); i++) + { + int category_mask = categories[i].mask; + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + locale = newlocale (category_mask, "de_DE.UTF-8", locale); + if (locale != NULL) + { + unsigned int j; + + uselocale (locale); + for (j = 0; j < SIZEOF (categories); j++) + { + const char *name_j = + gl_locale_name (categories[j].cat, categories[j].string); + if (j == i) + ASSERT (strcmp (name_j, "de_DE.UTF-8") == 0); + else + ASSERT (strcmp (name_j, "fr_FR.UTF-8") == 0); + } + } + } + } + } +#endif +} + +/* Test the gl_locale_name_thread() function. */ +static void +test_locale_name_thread (void) +{ + /* Get into a defined state, */ + setlocale (LC_ALL, "en_US.UTF-8"); + +#if HAVE_NEWLOCALE + /* Check that gl_locale_name_thread returns NULL when no thread locale is + set. */ + uselocale (LC_GLOBAL_LOCALE); + ASSERT (gl_locale_name_thread (LC_CTYPE, "LC_CTYPE") == NULL); + ASSERT (gl_locale_name_thread (LC_MESSAGES, "LC_MESSAGES") == NULL); + + /* Check that gl_locale_name_thread considers the thread locale. */ + { + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + const char *name; + + uselocale (locale); + name = gl_locale_name_thread (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + name = gl_locale_name_thread (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + } + + /* Check that gl_locale_name_thread distinguishes different categories of the + thread locale, and that the name is the right one for each. */ + { + unsigned int i; + + for (i = 0; i < SIZEOF (categories); i++) + { + int category_mask = categories[i].mask; + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + locale = newlocale (category_mask, "de_DE.UTF-8", locale); + if (locale != NULL) + { + unsigned int j; + + uselocale (locale); + for (j = 0; j < SIZEOF (categories); j++) + { + const char *name_j = + gl_locale_name_thread (categories[j].cat, + categories[j].string); + if (j == i) + ASSERT (strcmp (name_j, "de_DE.UTF-8") == 0); + else + ASSERT (strcmp (name_j, "fr_FR.UTF-8") == 0); + } + } + } + } + } + + /* Check that gl_locale_name_thread returns a string that is allocated with + indefinite extent. */ + { + /* Try many locale names in turn, in order to defeat possible caches. */ + static const char * const choices[] = + { + "C", + "POSIX", + "af_ZA", + "af_ZA.UTF-8", + "am_ET", + "am_ET.UTF-8", + "be_BY", + "be_BY.UTF-8", + "bg_BG", + "bg_BG.UTF-8", + "ca_ES", + "ca_ES.UTF-8", + "cs_CZ", + "cs_CZ.UTF-8", + "da_DK", + "da_DK.UTF-8", + "de_AT", + "de_AT.UTF-8", + "de_CH", + "de_CH.UTF-8", + "de_DE", + "de_DE.UTF-8", + "el_GR", + "el_GR.UTF-8", + "en_AU", + "en_AU.UTF-8", + "en_CA", + "en_CA.UTF-8", + "en_GB", + "en_GB.UTF-8", + "en_IE", + "en_IE.UTF-8", + "en_NZ", + "en_NZ.UTF-8", + "en_US", + "en_US.UTF-8", + "es_ES", + "es_ES.UTF-8", + "et_EE", + "et_EE.UTF-8", + "eu_ES", + "eu_ES.UTF-8", + "fi_FI", + "fi_FI.UTF-8", + "fr_BE", + "fr_BE.UTF-8", + "fr_CA", + "fr_CA.UTF-8", + "fr_CH", + "fr_CH.UTF-8", + "fr_FR", + "fr_FR.UTF-8", + "he_IL", + "he_IL.UTF-8", + "hr_HR", + "hr_HR.UTF-8", + "hu_HU", + "hu_HU.UTF-8", + "hy_AM", + "is_IS", + "is_IS.UTF-8", + "it_CH", + "it_CH.UTF-8", + "it_IT", + "it_IT.UTF-8", + "ja_JP.UTF-8", + "kk_KZ", + "kk_KZ.UTF-8", + "ko_KR.UTF-8", + "lt_LT", + "lt_LT.UTF-8", + "nl_BE", + "nl_BE.UTF-8", + "nl_NL", + "nl_NL.UTF-8", + "no_NO", + "no_NO.UTF-8", + "pl_PL", + "pl_PL.UTF-8", + "pt_BR", + "pt_BR.UTF-8", + "pt_PT", + "pt_PT.UTF-8", + "ro_RO", + "ro_RO.UTF-8", + "ru_RU", + "ru_RU.UTF-8", + "sk_SK", + "sk_SK.UTF-8", + "sl_SI", + "sl_SI.UTF-8", + "sv_SE", + "sv_SE.UTF-8", + "tr_TR", + "tr_TR.UTF-8", + "uk_UA", + "uk_UA.UTF-8", + "zh_CN", + "zh_CN.UTF-8", + "zh_HK", + "zh_HK.UTF-8", + "zh_TW", + "zh_TW.UTF-8" + }; + /* Remember which locales are available. */ + unsigned char /* bool */ available[SIZEOF (choices)]; + /* Array of remembered results of gl_locale_name_thread. */ + const char *unsaved_names[SIZEOF (choices)][SIZEOF (categories)]; + /* Array of remembered results of gl_locale_name_thread, stored in safe + memory. */ + char *saved_names[SIZEOF (choices)][SIZEOF (categories)]; + unsigned int j; + + for (j = 0; j < SIZEOF (choices); j++) + { + locale_t locale = newlocale (LC_ALL_MASK, choices[j], NULL); + available[j] = (locale != NULL); + if (locale != NULL) + { + unsigned int i; + + uselocale (locale); + for (i = 0; i < SIZEOF (categories); i++) + { + unsaved_names[j][i] = gl_locale_name_thread (categories[i].cat, categories[i].string); + saved_names[j][i] = strdup (unsaved_names[j][i]); + } + uselocale (LC_GLOBAL_LOCALE); + freelocale (locale); + } + } + /* Verify the unsaved_names are still valid. */ + for (j = 0; j < SIZEOF (choices); j++) + if (available[j]) + { + unsigned int i; + + for (i = 0; i < SIZEOF (categories); i++) + ASSERT (strcmp (unsaved_names[j][i], saved_names[j][i]) == 0); + } + /* Allocate many locales, without freeing them. This is an attempt at + overwriting as much of the previously allocated memory as possible. */ + for (j = SIZEOF (choices); j > 0; ) + { + j--; + if (available[j]) + { + locale_t locale = newlocale (LC_ALL_MASK, choices[j], NULL); + unsigned int i; + + ASSERT (locale != NULL); + uselocale (locale); + for (i = 0; i < SIZEOF (categories); i++) + { + const char *name = gl_locale_name_thread (categories[i].cat, categories[i].string); + ASSERT (strcmp (unsaved_names[j][i], name) == 0); + } + uselocale (LC_GLOBAL_LOCALE); + } + } + /* Verify the unsaved_names are still valid. */ + for (j = 0; j < SIZEOF (choices); j++) + if (available[j]) + { + unsigned int i; + + for (i = 0; i < SIZEOF (categories); i++) + ASSERT (strcmp (unsaved_names[j][i], saved_names[j][i]) == 0); + } + } +#else + /* Check that gl_locale_name_thread always returns NULL. */ + ASSERT (gl_locale_name_thread (LC_CTYPE, "LC_CTYPE") == NULL); + ASSERT (gl_locale_name_thread (LC_MESSAGES, "LC_MESSAGES") == NULL); +#endif +} + +/* Test the gl_locale_name_posix() function. */ +static void +test_locale_name_posix (void) +{ + const char *name; + + /* Get into a defined state, */ + setlocale (LC_ALL, "en_US.UTF-8"); +#if HAVE_NEWLOCALE + uselocale (LC_GLOBAL_LOCALE); +#endif + + /* Check that when all environment variables are unset, + gl_locale_name_posix returns either NULL or the default locale. */ + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LC_NUMERIC"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + name = gl_locale_name_posix (LC_NUMERIC, "LC_NUMERIC"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + + /* Check that an empty environment variable is treated like an unset + environment variable. */ + + setenv ("LC_ALL", "", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "", 1); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "", 1); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "", 1); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + + /* Check that LC_ALL overrides the others, and LANG is overridden by the + others. */ + + setenv ("LC_ALL", "C", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "C", 1); + setenv ("LC_MESSAGES", "C", 1); + unsetenv ("LANG"); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "C", 1); + setlocale (LC_ALL, ""); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + /* Check mixed situations. */ + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + setenv ("LANG", "de_DE.UTF-8", 1); + if (setlocale (LC_ALL, "") != NULL) + { + name = gl_locale_name_posix (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, "de_DE.UTF-8") == 0); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + unsetenv ("LANG"); + if (setlocale (LC_ALL, "") != NULL) + { + name = gl_locale_name_posix (LC_CTYPE, "LC_CTYPE"); + ASSERT (name == NULL || strcmp (name, gl_locale_name_default ()) == 0); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + } + +#if HAVE_NEWLOCALE + /* Check that gl_locale_name_posix ignores the thread locale. */ + { + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "C", 1); + setlocale (LC_ALL, ""); + uselocale (locale); + name = gl_locale_name_posix (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + } + } +#endif +} + +/* Test the gl_locale_name_environ() function. */ +static void +test_locale_name_environ (void) +{ + const char *name; + + /* Get into a defined state, */ + setlocale (LC_ALL, "en_US.UTF-8"); +#if HAVE_NEWLOCALE + uselocale (LC_GLOBAL_LOCALE); +#endif + + /* Check that when all environment variables are unset, + gl_locale_name_environ returns NULL. */ + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LC_NUMERIC"); + unsetenv ("LANG"); + ASSERT (gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES") == NULL); + ASSERT (gl_locale_name_environ (LC_NUMERIC, "LC_NUMERIC") == NULL); + + /* Check that an empty environment variable is treated like an unset + environment variable. */ + + setenv ("LC_ALL", "", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + ASSERT (gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES") == NULL); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "", 1); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + ASSERT (gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES") == NULL); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "", 1); + unsetenv ("LANG"); + ASSERT (gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES") == NULL); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "", 1); + ASSERT (gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES") == NULL); + + /* Check that LC_ALL overrides the others, and LANG is overridden by the + others. */ + + setenv ("LC_ALL", "C", 1); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + unsetenv ("LANG"); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + unsetenv ("LC_ALL"); + setenv ("LC_CTYPE", "C", 1); + setenv ("LC_MESSAGES", "C", 1); + unsetenv ("LANG"); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "C", 1); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + + /* Check mixed situations. */ + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + setenv ("LANG", "de_DE.UTF-8", 1); + name = gl_locale_name_environ (LC_CTYPE, "LC_CTYPE"); + ASSERT (strcmp (name, "de_DE.UTF-8") == 0); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + setenv ("LC_MESSAGES", "fr_FR.UTF-8", 1); + unsetenv ("LANG"); + name = gl_locale_name_environ (LC_CTYPE, "LC_CTYPE"); + ASSERT (name == NULL); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "fr_FR.UTF-8") == 0); + +#if HAVE_NEWLOCALE + /* Check that gl_locale_name_environ ignores the thread locale. */ + { + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + unsetenv ("LC_ALL"); + unsetenv ("LC_CTYPE"); + unsetenv ("LC_MESSAGES"); + setenv ("LANG", "C", 1); + setlocale (LC_ALL, ""); + uselocale (locale); + name = gl_locale_name_environ (LC_MESSAGES, "LC_MESSAGES"); + ASSERT (strcmp (name, "C") == 0); + } + } +#endif +} + +/* Test the gl_locale_name_default() function. */ +static void +test_locale_name_default (void) +{ + const char *name = gl_locale_name_default (); + + ASSERT (name != NULL); + + /* Only Mac OS X and Windows have a facility for the user to set the default + locale. */ +#if !((defined __APPLE__ && defined __MACH__) || (defined _WIN32 || defined __WIN32__ || defined __CYGWIN__)) + ASSERT (strcmp (name, "C") == 0); +#endif + +#if HAVE_NEWLOCALE + /* Check that gl_locale_name_default ignores the thread locale. */ + { + locale_t locale = newlocale (LC_ALL_MASK, "fr_FR.UTF-8", NULL); + if (locale != NULL) + { + uselocale (locale); + ASSERT (strcmp (gl_locale_name_default (), name) == 0); + } + } +#endif +} + +int +main () +{ + test_locale_name (); + test_locale_name_thread (); + test_locale_name_posix (); + test_locale_name_environ (); + test_locale_name_default (); + + return 0; +} diff --git a/gl/tests/test-setlocale1.c b/gl/tests/test-setlocale1.c new file mode 100644 index 0000000000..08f9224f8c --- /dev/null +++ b/gl/tests/test-setlocale1.c @@ -0,0 +1,59 @@ +/* Test of setting the current locale. + Copyright (C) 2011-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <locale.h> + +#include "signature.h" +SIGNATURE_CHECK (setlocale, char *, (int, const char *)); + +#include <stdlib.h> +#include <string.h> + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + char *name1; + char *name2; + + /* Try to set the locale by implicitly looking at the LC_ALL environment + variable. + configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + name1 = strdup (setlocale (LC_ALL, NULL)); + + /* Reset the locale. */ + if (setlocale (LC_ALL, "C") == NULL) + return 1; + + /* Try to set the locale by explicitly looking at the LC_ALL environment + variable. + configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL) + return 1; + + name2 = strdup (setlocale (LC_ALL, NULL)); + + /* Test that the two results are the same. */ + ASSERT (strcmp (name1, name2) == 0); + + return 0; +} diff --git a/gl/tests/test-setlocale1.sh b/gl/tests/test-setlocale1.sh new file mode 100755 index 0000000000..59a0532dde --- /dev/null +++ b/gl/tests/test-setlocale1.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +: ${LOCALE_FR=fr_FR} +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +: ${LOCALE_JA=ja_JP} +: ${LOCALE_ZH_CN=zh_CN.GB18030} + +if test $LOCALE_FR = none && test $LOCALE_FR_UTF8 = none \ + && test $LOCALE_JA = none && test $LOCALE_ZH_CN = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no locale for testing is installed" + else + echo "Skipping test: no locale for testing is supported" + fi + exit 77 +fi + +if test $LOCALE_FR != none; then + LC_ALL=$LOCALE_FR ./test-setlocale1${EXEEXT} || exit 1 +fi + +if test $LOCALE_FR_UTF8 != none; then + LC_ALL=$LOCALE_FR_UTF8 ./test-setlocale1${EXEEXT} || exit 1 +fi + +if test $LOCALE_JA != none; then + LC_ALL=$LOCALE_JA ./test-setlocale1${EXEEXT} || exit 1 +fi + +if test $LOCALE_ZH_CN != none; then + LC_ALL=$LOCALE_ZH_CN ./test-setlocale1${EXEEXT} || exit 1 +fi + +exit 0 diff --git a/gl/tests/test-setlocale2.c b/gl/tests/test-setlocale2.c new file mode 100644 index 0000000000..74166477b6 --- /dev/null +++ b/gl/tests/test-setlocale2.c @@ -0,0 +1,55 @@ +/* Test of setting the current locale. + Copyright (C) 2011-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <locale.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int +main () +{ + /* Try to set the locale by implicitly looking at the LC_ALL environment + variable. */ + if (setlocale (LC_ALL, "") != NULL) + /* It was successful. Check whether LC_CTYPE is non-trivial. */ + if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + { + fprintf (stderr, "setlocale did not fail for implicit %s\n", + getenv ("LC_ALL")); + return 1; + } + + /* Reset the locale. */ + if (setlocale (LC_ALL, "C") == NULL) + return 1; + + /* Try to set the locale by explicitly looking at the LC_ALL environment + variable. */ + if (setlocale (LC_ALL, getenv ("LC_ALL")) != NULL) + /* It was successful. Check whether LC_CTYPE is non-trivial. */ + if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) + { + fprintf (stderr, "setlocale did not fail for explicit %s\n", + getenv ("LC_ALL")); + return 1; + } + + return 0; +} diff --git a/gl/tests/test-setlocale2.sh b/gl/tests/test-setlocale2.sh new file mode 100755 index 0000000000..904e147f4a --- /dev/null +++ b/gl/tests/test-setlocale2.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# Test locale names with likely unsupported encoding in Unix syntax. +for name in ar_SA.ISO-8859-1 fr_FR.CP1251 zh_TW.GB18030 zh_CN.BIG5; do + env LC_ALL=$name ./test-setlocale2${EXEEXT} 1 || exit 1 +done + +# Test locale names with likely unsupported encoding in native Windows syntax. +for name in "Arabic_Saudi Arabia.1252" "Arabic_Saudi Arabia.65001" \ + French_France.65001 Japanese_Japan.65001 Turkish_Turkey.65001 \ + Chinese_Taiwan.65001 Chinese_China.54936 Chinese_China.65001; do + # Here we use 'env' to set the LC_ALL environment variable, because on + # Solaris 11 2011-11, the /bin/sh refuses to do it for Turkish_Turkey.65001. + env LC_ALL="$name" ./test-setlocale2${EXEEXT} 1 || exit 1 +done + +exit 0 diff --git a/gl/tests/unistr/test-u8-mbtoucr.c b/gl/tests/unistr/test-u8-mbtoucr.c new file mode 100644 index 0000000000..9e38885e3a --- /dev/null +++ b/gl/tests/unistr/test-u8-mbtoucr.c @@ -0,0 +1,187 @@ +/* Test of u8_mbtoucr() function. + Copyright (C) 2010-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2010. */ + +#include <config.h> + +#include "unistr.h" + +#include "macros.h" + +int +main () +{ + ucs4_t uc; + int ret; + + /* Test NUL unit input. */ + { + static const uint8_t input[] = ""; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == 1); + ASSERT (uc == 0); + } + + /* Test ISO 646 unit input. */ + { + ucs4_t c; + uint8_t buf[1]; + + for (c = 0; c < 0x80; c++) + { + buf[0] = c; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, buf, 1); + ASSERT (ret == 1); + ASSERT (uc == c); + } + } + + /* Test 2-byte character input. */ + { + static const uint8_t input[] = { 0xC3, 0x97 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == 2); + ASSERT (uc == 0x00D7); + } + + /* Test 3-byte character input. */ + { + static const uint8_t input[] = { 0xE2, 0x82, 0xAC }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 3); + ASSERT (ret == 3); + ASSERT (uc == 0x20AC); + } + + /* Test 4-byte character input. */ + { + static const uint8_t input[] = { 0xF4, 0x8F, 0xBF, 0xBD }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 4); + ASSERT (ret == 4); + ASSERT (uc == 0x10FFFD); + } + + /* Test incomplete/invalid 1-byte input. */ + { + static const uint8_t input[] = { 0xC1 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xC3 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xE2 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF4 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xFE }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 1); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + + /* Test incomplete/invalid 2-byte input. */ + { + static const uint8_t input[] = { 0xE0, 0x9F }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xE2, 0x82 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xE2, 0xD0 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF0, 0x8F }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF3, 0x8F }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF3, 0xD0 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 2); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + + /* Test incomplete/invalid 3-byte input. */ + { + static const uint8_t input[] = { 0xF3, 0x8F, 0xBF }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 3); + ASSERT (ret == -2); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF3, 0xD0, 0xBF }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 3); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + { + static const uint8_t input[] = { 0xF3, 0x8F, 0xD0 }; + uc = 0xBADFACE; + ret = u8_mbtoucr (&uc, input, 3); + ASSERT (ret == -1); + ASSERT (uc == 0xFFFD); + } + + return 0; +} diff --git a/gl/tests/unistr/test-u8-uctomb.c b/gl/tests/unistr/test-u8-uctomb.c new file mode 100644 index 0000000000..988808e48d --- /dev/null +++ b/gl/tests/unistr/test-u8-uctomb.c @@ -0,0 +1,157 @@ +/* Test of u8_uctomb() function. + Copyright (C) 2010-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2010. */ + +#include <config.h> + +#include "unistr.h" + +#include "macros.h" + +#define MAGIC 0xBA + +int +main () +{ + /* Test ISO 646 character, in particular the NUL character. */ + { + ucs4_t uc; + + for (uc = 0; uc < 0x80; uc++) + { + uint8_t buf[5] = { MAGIC, MAGIC, MAGIC, MAGIC, MAGIC }; + int ret; + + ret = u8_uctomb (buf, uc, 0); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 1); + ASSERT (ret == 1); + ASSERT (buf[0] == uc); + ASSERT (buf[1] == MAGIC); + } + } + + /* Test 2-byte character. */ + { + ucs4_t uc = 0x00D7; + uint8_t buf[5] = { MAGIC, MAGIC, MAGIC, MAGIC, MAGIC }; + int ret; + + ret = u8_uctomb (buf, uc, 0); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 1); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 2); + ASSERT (ret == 2); + ASSERT (buf[0] == 0xC3); + ASSERT (buf[1] == 0x97); + ASSERT (buf[2] == MAGIC); + } + + /* Test 3-byte character. */ + { + ucs4_t uc = 0x20AC; + uint8_t buf[5] = { MAGIC, MAGIC, MAGIC, MAGIC, MAGIC }; + int ret; + + ret = u8_uctomb (buf, uc, 0); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 1); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 2); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + ASSERT (buf[1] == MAGIC); + + ret = u8_uctomb (buf, uc, 3); + ASSERT (ret == 3); + ASSERT (buf[0] == 0xE2); + ASSERT (buf[1] == 0x82); + ASSERT (buf[2] == 0xAC); + ASSERT (buf[3] == MAGIC); + } + + /* Test 4-byte character. */ + { + ucs4_t uc = 0x10FFFD; + uint8_t buf[5] = { MAGIC, MAGIC, MAGIC, MAGIC, MAGIC }; + int ret; + + ret = u8_uctomb (buf, uc, 0); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 1); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + + ret = u8_uctomb (buf, uc, 2); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + ASSERT (buf[1] == MAGIC); + + ret = u8_uctomb (buf, uc, 3); + ASSERT (ret == -2); + ASSERT (buf[0] == MAGIC); + ASSERT (buf[1] == MAGIC); + ASSERT (buf[2] == MAGIC); + + ret = u8_uctomb (buf, uc, 4); + ASSERT (ret == 4); + ASSERT (buf[0] == 0xF4); + ASSERT (buf[1] == 0x8F); + ASSERT (buf[2] == 0xBF); + ASSERT (buf[3] == 0xBD); + ASSERT (buf[4] == MAGIC); + } + + /* Test invalid characters. */ + { + ucs4_t invalid[] = { 0x110000, 0xD800, 0xDBFF, 0xDC00, 0xDFFF }; + uint8_t buf[5] = { MAGIC, MAGIC, MAGIC, MAGIC, MAGIC }; + size_t i; + + for (i = 0; i < SIZEOF (invalid); i++) + { + ucs4_t uc = invalid[i]; + int n; + + for (n = 0; n <= 4; n++) + { + int ret = u8_uctomb (buf, uc, n); + ASSERT (ret == -1); + ASSERT (buf[0] == MAGIC); + ASSERT (buf[1] == MAGIC); + ASSERT (buf[2] == MAGIC); + ASSERT (buf[3] == MAGIC); + ASSERT (buf[4] == MAGIC); + } + } + } + + return 0; +} diff --git a/gl/unistr.in.h b/gl/unistr.in.h new file mode 100644 index 0000000000..5cda3f5a84 --- /dev/null +++ b/gl/unistr.in.h @@ -0,0 +1,750 @@ +/* Elementary Unicode string functions. + Copyright (C) 2001-2002, 2005-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _UNISTR_H +#define _UNISTR_H + +#include "unitypes.h" + +/* Get common macros for C. */ +#include "unused-parameter.h" + +/* Get bool. */ +#include <stdbool.h> + +/* Get size_t. */ +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Conventions: + + All functions prefixed with u8_ operate on UTF-8 encoded strings. + Their unit is an uint8_t (1 byte). + + All functions prefixed with u16_ operate on UTF-16 encoded strings. + Their unit is an uint16_t (a 2-byte word). + + All functions prefixed with u32_ operate on UCS-4 encoded strings. + Their unit is an uint32_t (a 4-byte word). + + All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly + n units. + + All arguments starting with "str" and the arguments of functions starting + with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string + which terminates at the first NUL unit. This termination unit is + considered part of the string for all memory allocation purposes, but + is not considered part of the string for all other logical purposes. + + Functions returning a string result take a (resultbuf, lengthp) argument + pair. If resultbuf is not NULL and the result fits into *lengthp units, + it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly + allocated string is returned. In both cases, *lengthp is set to the + length (number of units) of the returned string. In case of error, + NULL is returned and errno is set. */ + + +/* Elementary string checks. */ + +/* Check whether an UTF-8 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint8_t * + u8_check (const uint8_t *s, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Check whether an UTF-16 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint16_t * + u16_check (const uint16_t *s, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Check whether an UCS-4 string is well-formed. + Return NULL if valid, or a pointer to the first invalid unit otherwise. */ +extern const uint32_t * + u32_check (const uint32_t *s, size_t n) + _UC_ATTRIBUTE_PURE; + + +/* Elementary string conversions. */ + +/* Convert an UTF-8 string to an UTF-16 string. */ +extern uint16_t * + u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-8 string to an UCS-4 string. */ +extern uint32_t * + u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-16 string to an UTF-8 string. */ +extern uint8_t * + u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, + size_t *lengthp); + +/* Convert an UTF-16 string to an UCS-4 string. */ +extern uint32_t * + u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, + size_t *lengthp); + +/* Convert an UCS-4 string to an UTF-8 string. */ +extern uint8_t * + u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, + size_t *lengthp); + +/* Convert an UCS-4 string to an UTF-16 string. */ +extern uint16_t * + u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, + size_t *lengthp); + + +/* Elementary string functions. */ + +/* Return the length (number of units) of the first character in S, which is + no longer than N. Return 0 if it is the NUL character. Return -1 upon + failure. */ +/* Similar to mblen(), except that s must not be NULL. */ +extern int + u8_mblen (const uint8_t *s, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u16_mblen (const uint16_t *s, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u32_mblen (const uint32_t *s, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, + and an appropriate number of units is returned. + The number of available units, N, must be > 0. */ +/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, + and the NUL character is not treated specially. */ +/* The variants with _safe suffix are safe, even if the library is compiled + without --enable-safety. */ + +#if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); +# else +extern int + u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_unsafe_aux (puc, s, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); +# else +extern int + u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_unsafe_aux (puc, s, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); +# else +static inline int +u32_mbtouc_unsafe (ucs4_t *puc, + const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) +{ + uint32_t c = *s; + +# if CONFIG_UNICODE_SAFETY + if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) +# endif + *puc = c; +# if CONFIG_UNICODE_SAFETY + else + /* invalid multibyte character */ + *puc = 0xfffd; +# endif + return 1; +} +# endif +#endif + +#if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); +# else +extern int + u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); +static inline int +u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else + return u8_mbtouc_aux (puc, s, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); +# else +extern int + u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); +static inline int +u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) +{ + uint16_t c = *s; + + if (c < 0xd800 || c >= 0xe000) + { + *puc = c; + return 1; + } + else + return u16_mbtouc_aux (puc, s, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); +# else +static inline int +u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) +{ + uint32_t c = *s; + + if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) + *puc = c; + else + /* invalid multibyte character */ + *puc = 0xfffd; + return 1; +} +# endif +#endif + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, + and -1 is returned for an invalid sequence of units, -2 is returned for an + incomplete sequence of units. + The number of available units, N, must be > 0. */ +/* Similar to u*_mbtouc(), except that the return value gives more details + about the failure, similar to mbrtowc(). */ + +#if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING +extern int + u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); +#endif + +#if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING +extern int + u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); +#endif + +#if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING +extern int + u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); +#endif + +/* Put the multibyte character represented by UC in S, returning its + length. Return -1 upon failure, -2 if the number of available units, N, + is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ +/* Similar to wctomb(), except that s must not be NULL, and the argument n + must be specified. */ + +#if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING +/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ +extern int + u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); +# if !HAVE_INLINE +extern int + u8_uctomb (uint8_t *s, ucs4_t uc, int n); +# else +static inline int +u8_uctomb (uint8_t *s, ucs4_t uc, int n) +{ + if (uc < 0x80 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u8_uctomb_aux (s, uc, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING +/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ +extern int + u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); +# if !HAVE_INLINE +extern int + u16_uctomb (uint16_t *s, ucs4_t uc, int n); +# else +static inline int +u16_uctomb (uint16_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u16_uctomb_aux (s, uc, n); +} +# endif +#endif + +#if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING +# if !HAVE_INLINE +extern int + u32_uctomb (uint32_t *s, ucs4_t uc, int n); +# else +static inline int +u32_uctomb (uint32_t *s, ucs4_t uc, int n) +{ + if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) + { + if (n > 0) + { + *s = uc; + return 1; + } + else + return -2; + } + else + return -1; +} +# endif +#endif + +/* Copy N units from SRC to DEST. */ +/* Similar to memcpy(). */ +extern uint8_t * + u8_cpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_cpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_cpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Copy N units from SRC to DEST, guaranteeing correct behavior for + overlapping memory areas. */ +/* Similar to memmove(). */ +extern uint8_t * + u8_move (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_move (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_move (uint32_t *dest, const uint32_t *src, size_t n); + +/* Set the first N characters of S to UC. UC should be a character that + occupies only 1 unit. */ +/* Similar to memset(). */ +extern uint8_t * + u8_set (uint8_t *s, ucs4_t uc, size_t n); +extern uint16_t * + u16_set (uint16_t *s, ucs4_t uc, size_t n); +extern uint32_t * + u32_set (uint32_t *s, ucs4_t uc, size_t n); + +/* Compare S1 and S2, each of length N. */ +/* Similar to memcmp(). */ +extern int + u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Compare S1 and S2. */ +/* Similar to the gnulib function memcmp2(). */ +extern int + u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2) + _UC_ATTRIBUTE_PURE; +extern int + u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2) + _UC_ATTRIBUTE_PURE; +extern int + u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2) + _UC_ATTRIBUTE_PURE; + +/* Search the string at S for UC. */ +/* Similar to memchr(). */ +extern uint8_t * + u8_chr (const uint8_t *s, size_t n, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint16_t * + u16_chr (const uint16_t *s, size_t n, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint32_t * + u32_chr (const uint32_t *s, size_t n, ucs4_t uc) + _UC_ATTRIBUTE_PURE; + +/* Count the number of Unicode characters in the N units from S. */ +/* Similar to mbsnlen(). */ +extern size_t + u8_mbsnlen (const uint8_t *s, size_t n) + _UC_ATTRIBUTE_PURE; +extern size_t + u16_mbsnlen (const uint16_t *s, size_t n) + _UC_ATTRIBUTE_PURE; +extern size_t + u32_mbsnlen (const uint32_t *s, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Elementary string functions with memory allocation. */ + +/* Make a freshly allocated copy of S, of length N. */ +extern uint8_t * + u8_cpy_alloc (const uint8_t *s, size_t n); +extern uint16_t * + u16_cpy_alloc (const uint16_t *s, size_t n); +extern uint32_t * + u32_cpy_alloc (const uint32_t *s, size_t n); + +/* Elementary string functions on NUL terminated strings. */ + +/* Return the length (number of units) of the first character in S. + Return 0 if it is the NUL character. Return -1 upon failure. */ +extern int + u8_strmblen (const uint8_t *s) + _UC_ATTRIBUTE_PURE; +extern int + u16_strmblen (const uint16_t *s) + _UC_ATTRIBUTE_PURE; +extern int + u32_strmblen (const uint32_t *s) + _UC_ATTRIBUTE_PURE; + +/* Return the length (number of units) of the first character in S, putting + its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL + character. Return -1 upon failure. */ +extern int + u8_strmbtouc (ucs4_t *puc, const uint8_t *s); +extern int + u16_strmbtouc (ucs4_t *puc, const uint16_t *s); +extern int + u32_strmbtouc (ucs4_t *puc, const uint32_t *s); + +/* Forward iteration step. Advances the pointer past the next character, + or returns NULL if the end of the string has been reached. Puts the + character's 'ucs4_t' representation in *PUC. */ +extern const uint8_t * + u8_next (ucs4_t *puc, const uint8_t *s); +extern const uint16_t * + u16_next (ucs4_t *puc, const uint16_t *s); +extern const uint32_t * + u32_next (ucs4_t *puc, const uint32_t *s); + +/* Backward iteration step. Advances the pointer to point to the previous + character, or returns NULL if the beginning of the string had been reached. + Puts the character's 'ucs4_t' representation in *PUC. */ +extern const uint8_t * + u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start); +extern const uint16_t * + u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start); +extern const uint32_t * + u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start); + +/* Return the number of units in S. */ +/* Similar to strlen(), wcslen(). */ +extern size_t + u8_strlen (const uint8_t *s) + _UC_ATTRIBUTE_PURE; +extern size_t + u16_strlen (const uint16_t *s) + _UC_ATTRIBUTE_PURE; +extern size_t + u32_strlen (const uint32_t *s) + _UC_ATTRIBUTE_PURE; + +/* Return the number of units in S, but at most MAXLEN. */ +/* Similar to strnlen(), wcsnlen(). */ +extern size_t + u8_strnlen (const uint8_t *s, size_t maxlen) + _UC_ATTRIBUTE_PURE; +extern size_t + u16_strnlen (const uint16_t *s, size_t maxlen) + _UC_ATTRIBUTE_PURE; +extern size_t + u32_strnlen (const uint32_t *s, size_t maxlen) + _UC_ATTRIBUTE_PURE; + +/* Copy SRC to DEST. */ +/* Similar to strcpy(), wcscpy(). */ +extern uint8_t * + u8_strcpy (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_strcpy (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_strcpy (uint32_t *dest, const uint32_t *src); + +/* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */ +/* Similar to stpcpy(). */ +extern uint8_t * + u8_stpcpy (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_stpcpy (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_stpcpy (uint32_t *dest, const uint32_t *src); + +/* Copy no more than N units of SRC to DEST. */ +/* Similar to strncpy(), wcsncpy(). */ +extern uint8_t * + u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Copy no more than N units of SRC to DEST. Return a pointer past the last + non-NUL unit written into DEST. */ +/* Similar to stpncpy(). */ +extern uint8_t * + u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n); + +/* Append SRC onto DEST. */ +/* Similar to strcat(), wcscat(). */ +extern uint8_t * + u8_strcat (uint8_t *dest, const uint8_t *src); +extern uint16_t * + u16_strcat (uint16_t *dest, const uint16_t *src); +extern uint32_t * + u32_strcat (uint32_t *dest, const uint32_t *src); + +/* Append no more than N units of SRC onto DEST. */ +/* Similar to strncat(), wcsncat(). */ +extern uint8_t * + u8_strncat (uint8_t *dest, const uint8_t *src, size_t n); +extern uint16_t * + u16_strncat (uint16_t *dest, const uint16_t *src, size_t n); +extern uint32_t * + u32_strncat (uint32_t *dest, const uint32_t *src, size_t n); + +/* Compare S1 and S2. */ +/* Similar to strcmp(), wcscmp(). */ +#ifdef __sun +/* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */ +extern int + u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2) + _UC_ATTRIBUTE_PURE; +# define u8_strcmp u8_strcmp_gnu +#else +extern int + u8_strcmp (const uint8_t *s1, const uint8_t *s2) + _UC_ATTRIBUTE_PURE; +#endif +extern int + u16_strcmp (const uint16_t *s1, const uint16_t *s2) + _UC_ATTRIBUTE_PURE; +extern int + u32_strcmp (const uint32_t *s1, const uint32_t *s2) + _UC_ATTRIBUTE_PURE; + +/* Compare S1 and S2 using the collation rules of the current locale. + Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. + Upon failure, set errno and return any value. */ +/* Similar to strcoll(), wcscoll(). */ +extern int + u8_strcoll (const uint8_t *s1, const uint8_t *s2); +extern int + u16_strcoll (const uint16_t *s1, const uint16_t *s2); +extern int + u32_strcoll (const uint32_t *s1, const uint32_t *s2); + +/* Compare no more than N units of S1 and S2. */ +/* Similar to strncmp(), wcsncmp(). */ +extern int + u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; +extern int + u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n) + _UC_ATTRIBUTE_PURE; + +/* Duplicate S, returning an identical malloc'd string. */ +/* Similar to strdup(), wcsdup(). */ +extern uint8_t * + u8_strdup (const uint8_t *s); +extern uint16_t * + u16_strdup (const uint16_t *s); +extern uint32_t * + u32_strdup (const uint32_t *s); + +/* Find the first occurrence of UC in STR. */ +/* Similar to strchr(), wcschr(). */ +extern uint8_t * + u8_strchr (const uint8_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint16_t * + u16_strchr (const uint16_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint32_t * + u32_strchr (const uint32_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; + +/* Find the last occurrence of UC in STR. */ +/* Similar to strrchr(), wcsrchr(). */ +extern uint8_t * + u8_strrchr (const uint8_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint16_t * + u16_strrchr (const uint16_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; +extern uint32_t * + u32_strrchr (const uint32_t *str, ucs4_t uc) + _UC_ATTRIBUTE_PURE; + +/* Return the length of the initial segment of STR which consists entirely + of Unicode characters not in REJECT. */ +/* Similar to strcspn(), wcscspn(). */ +extern size_t + u8_strcspn (const uint8_t *str, const uint8_t *reject) + _UC_ATTRIBUTE_PURE; +extern size_t + u16_strcspn (const uint16_t *str, const uint16_t *reject) + _UC_ATTRIBUTE_PURE; +extern size_t + u32_strcspn (const uint32_t *str, const uint32_t *reject) + _UC_ATTRIBUTE_PURE; + +/* Return the length of the initial segment of STR which consists entirely + of Unicode characters in ACCEPT. */ +/* Similar to strspn(), wcsspn(). */ +extern size_t + u8_strspn (const uint8_t *str, const uint8_t *accept) + _UC_ATTRIBUTE_PURE; +extern size_t + u16_strspn (const uint16_t *str, const uint16_t *accept) + _UC_ATTRIBUTE_PURE; +extern size_t + u32_strspn (const uint32_t *str, const uint32_t *accept) + _UC_ATTRIBUTE_PURE; + +/* Find the first occurrence in STR of any character in ACCEPT. */ +/* Similar to strpbrk(), wcspbrk(). */ +extern uint8_t * + u8_strpbrk (const uint8_t *str, const uint8_t *accept) + _UC_ATTRIBUTE_PURE; +extern uint16_t * + u16_strpbrk (const uint16_t *str, const uint16_t *accept) + _UC_ATTRIBUTE_PURE; +extern uint32_t * + u32_strpbrk (const uint32_t *str, const uint32_t *accept) + _UC_ATTRIBUTE_PURE; + +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +/* Similar to strstr(), wcsstr(). */ +extern uint8_t * + u8_strstr (const uint8_t *haystack, const uint8_t *needle) + _UC_ATTRIBUTE_PURE; +extern uint16_t * + u16_strstr (const uint16_t *haystack, const uint16_t *needle) + _UC_ATTRIBUTE_PURE; +extern uint32_t * + u32_strstr (const uint32_t *haystack, const uint32_t *needle) + _UC_ATTRIBUTE_PURE; + +/* Test whether STR starts with PREFIX. */ +extern bool + u8_startswith (const uint8_t *str, const uint8_t *prefix) + _UC_ATTRIBUTE_PURE; +extern bool + u16_startswith (const uint16_t *str, const uint16_t *prefix) + _UC_ATTRIBUTE_PURE; +extern bool + u32_startswith (const uint32_t *str, const uint32_t *prefix) + _UC_ATTRIBUTE_PURE; + +/* Test whether STR ends with SUFFIX. */ +extern bool + u8_endswith (const uint8_t *str, const uint8_t *suffix) + _UC_ATTRIBUTE_PURE; +extern bool + u16_endswith (const uint16_t *str, const uint16_t *suffix) + _UC_ATTRIBUTE_PURE; +extern bool + u32_endswith (const uint32_t *str, const uint32_t *suffix) + _UC_ATTRIBUTE_PURE; + +/* Divide STR into tokens separated by characters in DELIM. + This interface is actually more similar to wcstok than to strtok. */ +/* Similar to strtok_r(), wcstok(). */ +extern uint8_t * + u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr); +extern uint16_t * + u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr); +extern uint32_t * + u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr); + + +#ifdef __cplusplus +} +#endif + +#endif /* _UNISTR_H */ diff --git a/gl/unistr/u8-mbtoucr.c b/gl/unistr/u8-mbtoucr.c new file mode 100644 index 0000000000..8bc59d7ce7 --- /dev/null +++ b/gl/unistr/u8-mbtoucr.c @@ -0,0 +1,285 @@ +/* Look at first character in UTF-8 string, returning an error code. + Copyright (C) 1999-2002, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unistr.h" + +int +u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) +{ + uint8_t c = *s; + + if (c < 0x80) + { + *puc = c; + return 1; + } + else if (c >= 0xc2) + { + if (c < 0xe0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x1f) << 6) + | (unsigned int) (s[1] ^ 0x80); + return 2; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xf0) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xe1 || s[1] >= 0xa0) + && (c != 0xed || s[1] < 0xa0)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x0f) << 12) + | ((unsigned int) (s[1] ^ 0x80) << 6) + | (unsigned int) (s[2] ^ 0x80); + return 3; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xf8) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xf1 || s[1] >= 0x90) +#if 1 + && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) +#endif + ) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x07) << 18) + | ((unsigned int) (s[1] ^ 0x80) << 12) + | ((unsigned int) (s[2] ^ 0x80) << 6) + | (unsigned int) (s[3] ^ 0x80); + return 4; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } +#if 0 + else if (c < 0xfc) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xf9 || s[1] >= 0x88)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + if (n >= 5) + { + if ((s[4] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x03) << 24) + | ((unsigned int) (s[1] ^ 0x80) << 18) + | ((unsigned int) (s[2] ^ 0x80) << 12) + | ((unsigned int) (s[3] ^ 0x80) << 6) + | (unsigned int) (s[4] ^ 0x80); + return 5; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + else if (c < 0xfe) + { + if (n >= 2) + { + if ((s[1] ^ 0x80) < 0x40 + && (c >= 0xfd || s[1] >= 0x84)) + { + if (n >= 3) + { + if ((s[2] ^ 0x80) < 0x40) + { + if (n >= 4) + { + if ((s[3] ^ 0x80) < 0x40) + { + if (n >= 5) + { + if ((s[4] ^ 0x80) < 0x40) + { + if (n >= 6) + { + if ((s[5] ^ 0x80) < 0x40) + { + *puc = ((unsigned int) (c & 0x01) << 30) + | ((unsigned int) (s[1] ^ 0x80) << 24) + | ((unsigned int) (s[2] ^ 0x80) << 18) + | ((unsigned int) (s[3] ^ 0x80) << 12) + | ((unsigned int) (s[4] ^ 0x80) << 6) + | (unsigned int) (s[5] ^ 0x80); + return 6; + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } + /* invalid multibyte character */ + } + else + { + /* incomplete multibyte character */ + *puc = 0xfffd; + return -2; + } + } +#endif + } + /* invalid multibyte character */ + *puc = 0xfffd; + return -1; +} diff --git a/gl/unistr/u8-uctomb-aux.c b/gl/unistr/u8-uctomb-aux.c new file mode 100644 index 0000000000..5ee212b7ee --- /dev/null +++ b/gl/unistr/u8-uctomb-aux.c @@ -0,0 +1,69 @@ +/* Conversion UCS-4 to UTF-8. + Copyright (C) 2002, 2006-2007, 2009-2012 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "unistr.h" + +int +u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n) +{ + int count; + + if (uc < 0x80) + /* The case n >= 1 is already handled by the caller. */ + return -2; + else if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + { + if (uc < 0xd800 || uc >= 0xe000) + count = 3; + else + return -1; + } +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n < count) + return -2; + + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + /*case 1:*/ s[0] = uc; + } + return count; +} diff --git a/gl/unistr/u8-uctomb.c b/gl/unistr/u8-uctomb.c new file mode 100644 index 0000000000..e56e4a2386 --- /dev/null +++ b/gl/unistr/u8-uctomb.c @@ -0,0 +1,88 @@ +/* Store a character in UTF-8 string. + Copyright (C) 2002, 2005-2006, 2009-2012 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2002. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#if defined IN_LIBUNISTRING +/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */ +# include "unistring-notinline.h" +#endif + +/* Specification. */ +#include "unistr.h" + +#if !HAVE_INLINE + +int +u8_uctomb (uint8_t *s, ucs4_t uc, int n) +{ + if (uc < 0x80) + { + if (n > 0) + { + s[0] = uc; + return 1; + } + /* else return -2, below. */ + } + else + { + int count; + + if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + { + if (uc < 0xd800 || uc >= 0xe000) + count = 3; + else + return -1; + } +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n >= count) + { + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + /*case 1:*/ s[0] = uc; + } + return count; + } + } + return -2; +} + +#endif diff --git a/gl/unitypes.in.h b/gl/unitypes.in.h new file mode 100644 index 0000000000..e642b7a3be --- /dev/null +++ b/gl/unitypes.in.h @@ -0,0 +1,46 @@ +/* Elementary types and macros for the GNU UniString library. + Copyright (C) 2002, 2005-2006, 2009-2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _UNITYPES_H +#define _UNITYPES_H + +/* Get uint8_t, uint16_t, uint32_t. */ +#include <stdint.h> + +/* Type representing a Unicode character. */ +typedef uint32_t ucs4_t; + +/* Attribute of a function whose result depends only on the arguments + (not pointers!) and which has no side effects. */ +#ifndef _UC_ATTRIBUTE_CONST +# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95) +# define _UC_ATTRIBUTE_CONST __attribute__ ((__const__)) +# else +# define _UC_ATTRIBUTE_CONST +# endif +#endif + +/* Attribute of a function whose result depends only on the arguments + (possibly pointers) and global memory, and which has no side effects. */ +#ifndef _UC_ATTRIBUTE_PURE +# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# define _UC_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define _UC_ATTRIBUTE_PURE +# endif +#endif + +#endif /* _UNITYPES_H */ |