diff options
author | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
commit | d3ed722566f42d3f614b1221a8e4f19092976531 (patch) | |
tree | 4a63e059ef599167cf407311188551fe72221d8d /wcsmbs | |
parent | a0da5fe1e49b819b4d90b77915e21cddd397d064 (diff) | |
download | glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.gz |
Simplify char16_t implementation
Diffstat (limited to 'wcsmbs')
-rw-r--r-- | wcsmbs/c16rtomb.c | 97 | ||||
-rw-r--r-- | wcsmbs/mbrtoc16.c | 75 | ||||
-rw-r--r-- | wcsmbs/mbrtowc.c | 9 | ||||
-rw-r--r-- | wcsmbs/wcsmbsload.c | 85 | ||||
-rw-r--r-- | wcsmbs/wcsmbsload.h | 5 |
5 files changed, 43 insertions, 228 deletions
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c index 3fed0b5d63..5374c755cc 100644 --- a/wcsmbs/c16rtomb.c +++ b/wcsmbs/c16rtomb.c @@ -17,25 +17,8 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -#include <assert.h> -#include <dlfcn.h> -#include <errno.h> -#include <gconv.h> -#include <stdlib.h> #include <uchar.h> -#include <wcsmbsload.h> - -#include <sysdep.h> - -#ifndef EILSEQ -# define EILSEQ EINVAL -#endif - -#if __STDC__ >= 201000L -# define u(c) U##c -#else -# define u(c) L##c -#endif +#include <wchar.h> /* This is the private state used if PS is NULL. */ @@ -44,85 +27,7 @@ static mbstate_t state; size_t c16rtomb (char *s, char16_t c16, mbstate_t *ps) { -#if 1 // XXX The ISO C 11 spec I have does not say anything about handling // XXX surrogates in this interface. return wcrtomb (s, c16, ps ?: &state); -#else - char buf[MB_LEN_MAX]; - struct __gconv_step_data data; - int status; - size_t result; - size_t dummy; - const struct gconv_fcts *fcts; - - /* Set information for this step. */ - data.__invocation_counter = 0; - data.__internal_use = 1; - data.__flags = __GCONV_IS_LAST; - data.__statep = ps ?: &state; - data.__trans = NULL; - - /* A first special case is if S is NULL. This means put PS in the - initial state. */ - if (s == NULL) - { - s = buf; - c16 = u('\0'); - } - - /* Tell where we want to have the result. */ - data.__outbuf = (unsigned char *) s; - data.__outbufend = (unsigned char *) s + MB_CUR_MAX; - - /* Get the conversion functions. */ - fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); - __gconv_fct fct = fcts->fromc16->__fct; -#ifdef PTR_DEMANGLE - if (fcts->tomb->__shlib_handle != NULL) - PTR_DEMANGLE (fct); -#endif - - /* If C16 is the NUL character we write into the output buffer - the byte sequence necessary for PS to get into the initial - state, followed by a NUL byte. */ - if (c16 == L'\0') - { - status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL, - NULL, &dummy, 1, 1)); - - if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT) - *data.__outbuf++ = '\0'; - } - else - { - /* Do a normal conversion. */ - const unsigned char *inbuf = (const unsigned char *) &c16; - - status = DL_CALL_FCT (fct, - (fcts->fromc16, &data, &inbuf, - inbuf + sizeof (char16_t), NULL, &dummy, - 0, 1)); - } - - /* There must not be any problems with the conversion but illegal input - characters. The output buffer must be large enough, otherwise the - definition of MB_CUR_MAX is not correct. All the other possible - errors also must not happen. */ - assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT - || status == __GCONV_ILLEGAL_INPUT - || status == __GCONV_INCOMPLETE_INPUT - || status == __GCONV_FULL_OUTPUT); - - if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT - || status == __GCONV_FULL_OUTPUT) - result = data.__outbuf - (unsigned char *) s; - else - { - result = (size_t) -1; - __set_errno (EILSEQ); - } - - return result; -#endif } diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c index df970fba4f..f5ed2b4ac9 100644 --- a/wcsmbs/mbrtoc16.c +++ b/wcsmbs/mbrtoc16.c @@ -30,12 +30,6 @@ # define EILSEQ EINVAL #endif -#if __STDC__ >= 201000L -# define U(c) U##c -#else -# define U(c) L##c -#endif - /* This is the private state used if PS is NULL. */ static mbstate_t state; @@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (ps == NULL) ps = &state; + /* The standard text does not say that S being NULL means the state + is reset even if the second half of a surrogate still have to be + returned. In fact, the error code description indicates + otherwise. Therefore always first try to return a second + half. */ if (ps->__count & 0x80000000) { /* We have to return the second word for a surrogate. */ @@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) return (size_t) -3; } - char16_t buf[2]; + wchar_t wc; struct __gconv_step_data data; int status; size_t result; size_t dummy; const unsigned char *inbuf, *endbuf; - unsigned char *outbuf = (unsigned char *) buf; + unsigned char *outbuf = (unsigned char *) &wc; const struct gconv_fcts *fcts; /* Set information for this step. */ @@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) initial state. */ if (s == NULL) { - outbuf = (unsigned char *) buf; + pc16 = NULL; s = ""; n = 1; } /* Tell where we want the result. */ data.__outbuf = outbuf; - data.__outbufend = outbuf + sizeof (char16_t); + data.__outbufend = outbuf + sizeof (wchar_t); /* Get the conversion functions. */ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); @@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; endbuf = inbuf + n; if (__builtin_expect (endbuf < inbuf, 0)) - endbuf = (const unsigned char *) ~(uintptr_t) 0; - __gconv_fct fct = fcts->toc16->__fct; + { + endbuf = (const unsigned char *) ~(uintptr_t) 0; + if (endbuf == inbuf) + goto ilseq; + } + __gconv_fct fct = fcts->towc->__fct; #ifdef PTR_DEMANGLE - if (fcts->toc16->__shlib_handle != NULL) + if (fcts->towc->__shlib_handle != NULL) PTR_DEMANGLE (fct); #endif - /* We first have to check whether the character can be represented - without a surrogate. If we immediately pass in a buffer large - enough to hold two char16_t values and the first character does - not require a surrogate the routine will try to convert more - input if N is larger then needed for the first character. */ - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf, NULL, &dummy, 0, 1)); - if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf) - { - data.__outbufend = outbuf + 2 * sizeof (char16_t); - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, - NULL, &dummy, 0, 1)); - } - /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the definition of MB_CUR_MAX is not correct. All the other possible @@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT || status == __GCONV_FULL_OUTPUT) { - if (pc16 != NULL) - *pc16 = buf[0]; + result = inbuf - (const unsigned char *) s; - if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0')) + if (wc < 0x10000) { - /* The converted character is the NUL character. */ - assert (__mbsinit (data.__statep)); - result = 0; + if (pc16 != NULL) + *pc16 = wc; + + if (data.__outbuf != outbuf && wc == L'\0') + { + /* The converted character is the NUL character. */ + assert (__mbsinit (data.__statep)); + result = 0; + } } else { - result = inbuf - (const unsigned char *) s; + /* This is a surrogate. */ + if (pc16 != NULL) + *pc16 = 0xd7c0 + (wc >> 10); - if (data.__outbuf != outbuf + 2) - { - /* This is a surrogate. */ - assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff); - assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff); - ps->__count |= 0x80000000; - ps->__value.__wch = buf[1]; - } + ps->__count |= 0x80000000; + ps->__value.__wch = 0xdc00 + (wc & 0x3ff); } } else if (status == __GCONV_INCOMPLETE_INPUT) result = (size_t) -2; else { + ilseq: result = (size_t) -1; __set_errno (EILSEQ); } diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index 03b8348d30..0c99b7401d 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011 +/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. @@ -73,7 +73,11 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; endbuf = inbuf + n; if (__builtin_expect (endbuf < inbuf, 0)) - endbuf = (const unsigned char *) ~(uintptr_t) 0; + { + endbuf = (const unsigned char *) ~(uintptr_t) 0; + if (endbuf == inbuf) + goto ilseq; + } __gconv_fct fct = fcts->towc->__fct; #ifdef PTR_DEMANGLE if (fcts->towc->__shlib_handle != NULL) @@ -108,6 +112,7 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) result = (size_t) -2; else { + ilseq: result = (size_t) -1; __set_errno (EILSEQ); } diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c index 9ce26f1dc0..27ea442d40 100644 --- a/wcsmbs/wcsmbsload.c +++ b/wcsmbs/wcsmbsload.c @@ -68,44 +68,6 @@ static const struct __gconv_step to_mb = .__data = NULL }; -static const struct __gconv_step to_c16 = -{ - .__shlib_handle = NULL, - .__modname = NULL, - .__counter = INT_MAX, - .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT", - .__to_name = (char *) "UTF-16//", - .__fct = __gconv_transform_ascii_char16, - .__btowc_fct = NULL, - .__init_fct = NULL, - .__end_fct = NULL, - .__min_needed_from = 1, - .__max_needed_from = 1, - .__min_needed_to = 4, - .__max_needed_to = 4, - .__stateful = 0, - .__data = NULL -}; - -static const struct __gconv_step from_c16 = -{ - .__shlib_handle = NULL, - .__modname = NULL, - .__counter = INT_MAX, - .__from_name = (char *) "UTF-16//", - .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT", - .__fct = __gconv_transform_char16_ascii, - .__btowc_fct = NULL, - .__init_fct = NULL, - .__end_fct = NULL, - .__min_needed_from = 4, - .__max_needed_from = 4, - .__min_needed_to = 1, - .__max_needed_to = 1, - .__stateful = 0, - .__data = NULL -}; - /* For the default locale we only have to handle ANSI_X3.4-1968. */ const struct gconv_fcts __wcsmbs_gconv_fcts_c = @@ -114,11 +76,6 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c = .towc_nsteps = 1, .tomb = (struct __gconv_step *) &to_mb, .tomb_nsteps = 1, - - .toc16 = (struct __gconv_step *) &to_c16, - .toc16_nsteps = 1, - .fromc16 = (struct __gconv_step *) &from_c16, - .fromc16_nsteps = 1, }; @@ -234,24 +191,9 @@ __wcsmbs_load_conv (struct __locale_data *new_category) new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL", &new_fcts->tomb_nsteps); - if (new_fcts->tomb != NULL) - { - new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name, - &new_fcts->toc16_nsteps); - - if (new_fcts->toc16 != NULL) - new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16", - &new_fcts->fromc16_nsteps); - else - { - __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps); - new_fcts->toc16 = NULL; - } - } - /* If any of the conversion functions is not available we don't use any since this would mean we cannot convert back and - forth.*/ + forth. NB: NEW_FCTS was allocated with calloc. */ if (new_fcts->tomb == NULL) { if (new_fcts->towc != NULL) @@ -264,12 +206,6 @@ __wcsmbs_load_conv (struct __locale_data *new_category) } else { - // XXX At least for now we live with the CHAR16 not being available. - if (new_fcts->toc16 == NULL) - new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16; - if (new_fcts->fromc16 == NULL) - new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16; - new_category->private.ctype = new_fcts; new_category->private.cleanup = &_nl_cleanup_ctype; } @@ -297,10 +233,6 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy) ++copy->towc->__counter; if (copy->tomb->__shlib_handle != NULL) ++copy->tomb->__counter; - if (copy->toc16->__shlib_handle != NULL) - ++copy->toc16->__counter; - if (copy->fromc16->__shlib_handle != NULL) - ++copy->fromc16->__counter; } @@ -320,19 +252,6 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name) return 1; } - copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", ©->fromc16_nsteps); - if (copy->fromc16 == NULL) - copy->toc16 = NULL; - else - { - copy->toc16 = __wcsmbs_getfct ("CHAR16", name, ©->toc16_nsteps); - if (copy->toc16 == NULL) - { - __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps); - copy->fromc16 = NULL; - } - } - return 0; } @@ -348,8 +267,6 @@ _nl_cleanup_ctype (struct __locale_data *locale) /* Free the old conversions. */ __gconv_close_transform (data->tomb, data->tomb_nsteps); __gconv_close_transform (data->towc, data->towc_nsteps); - __gconv_close_transform (data->fromc16, data->fromc16_nsteps); - __gconv_close_transform (data->toc16, data->toc16_nsteps); free ((char *) data); } } diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h index 064c41c82f..98f53bcc4c 100644 --- a/wcsmbs/wcsmbsload.h +++ b/wcsmbs/wcsmbsload.h @@ -32,11 +32,6 @@ struct gconv_fcts size_t towc_nsteps; struct __gconv_step *tomb; size_t tomb_nsteps; - - struct __gconv_step *toc16; - size_t toc16_nsteps; - struct __gconv_step *fromc16; - size_t fromc16_nsteps; }; /* Set of currently active conversion functions. */ |