summaryrefslogtreecommitdiff
path: root/wcsmbs/mbrtoc16.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-08 07:19:21 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-08 07:19:21 -0500
commitd3ed722566f42d3f614b1221a8e4f19092976531 (patch)
tree4a63e059ef599167cf407311188551fe72221d8d /wcsmbs/mbrtoc16.c
parenta0da5fe1e49b819b4d90b77915e21cddd397d064 (diff)
downloadglibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.gz
Simplify char16_t implementation
Diffstat (limited to 'wcsmbs/mbrtoc16.c')
-rw-r--r--wcsmbs/mbrtoc16.c75
1 files changed, 34 insertions, 41 deletions
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index df970fba4f..f5ed2b4ac9 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -30,12 +30,6 @@
# define EILSEQ EINVAL
#endif
-#if __STDC__ >= 201000L
-# define U(c) U##c
-#else
-# define U(c) L##c
-#endif
-
/* This is the private state used if PS is NULL. */
static mbstate_t state;
@@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (ps == NULL)
ps = &state;
+ /* The standard text does not say that S being NULL means the state
+ is reset even if the second half of a surrogate still have to be
+ returned. In fact, the error code description indicates
+ otherwise. Therefore always first try to return a second
+ half. */
if (ps->__count & 0x80000000)
{
/* We have to return the second word for a surrogate. */
@@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
return (size_t) -3;
}
- char16_t buf[2];
+ wchar_t wc;
struct __gconv_step_data data;
int status;
size_t result;
size_t dummy;
const unsigned char *inbuf, *endbuf;
- unsigned char *outbuf = (unsigned char *) buf;
+ unsigned char *outbuf = (unsigned char *) &wc;
const struct gconv_fcts *fcts;
/* Set information for this step. */
@@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
initial state. */
if (s == NULL)
{
- outbuf = (unsigned char *) buf;
+ pc16 = NULL;
s = "";
n = 1;
}
/* Tell where we want the result. */
data.__outbuf = outbuf;
- data.__outbufend = outbuf + sizeof (char16_t);
+ data.__outbufend = outbuf + sizeof (wchar_t);
/* Get the conversion functions. */
fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
@@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
inbuf = (const unsigned char *) s;
endbuf = inbuf + n;
if (__builtin_expect (endbuf < inbuf, 0))
- endbuf = (const unsigned char *) ~(uintptr_t) 0;
- __gconv_fct fct = fcts->toc16->__fct;
+ {
+ endbuf = (const unsigned char *) ~(uintptr_t) 0;
+ if (endbuf == inbuf)
+ goto ilseq;
+ }
+ __gconv_fct fct = fcts->towc->__fct;
#ifdef PTR_DEMANGLE
- if (fcts->toc16->__shlib_handle != NULL)
+ if (fcts->towc->__shlib_handle != NULL)
PTR_DEMANGLE (fct);
#endif
- /* We first have to check whether the character can be represented
- without a surrogate. If we immediately pass in a buffer large
- enough to hold two char16_t values and the first character does
- not require a surrogate the routine will try to convert more
- input if N is larger then needed for the first character. */
- status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+ status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf,
NULL, &dummy, 0, 1));
- if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
- {
- data.__outbufend = outbuf + 2 * sizeof (char16_t);
- status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
- NULL, &dummy, 0, 1));
- }
-
/* There must not be any problems with the conversion but illegal input
characters. The output buffer must be large enough, otherwise the
definition of MB_CUR_MAX is not correct. All the other possible
@@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|| status == __GCONV_FULL_OUTPUT)
{
- if (pc16 != NULL)
- *pc16 = buf[0];
+ result = inbuf - (const unsigned char *) s;
- if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
+ if (wc < 0x10000)
{
- /* The converted character is the NUL character. */
- assert (__mbsinit (data.__statep));
- result = 0;
+ if (pc16 != NULL)
+ *pc16 = wc;
+
+ if (data.__outbuf != outbuf && wc == L'\0')
+ {
+ /* The converted character is the NUL character. */
+ assert (__mbsinit (data.__statep));
+ result = 0;
+ }
}
else
{
- result = inbuf - (const unsigned char *) s;
+ /* This is a surrogate. */
+ if (pc16 != NULL)
+ *pc16 = 0xd7c0 + (wc >> 10);
- if (data.__outbuf != outbuf + 2)
- {
- /* This is a surrogate. */
- assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
- assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
- ps->__count |= 0x80000000;
- ps->__value.__wch = buf[1];
- }
+ ps->__count |= 0x80000000;
+ ps->__value.__wch = 0xdc00 + (wc & 0x3ff);
}
}
else if (status == __GCONV_INCOMPLETE_INPUT)
result = (size_t) -2;
else
{
+ ilseq:
result = (size_t) -1;
__set_errno (EILSEQ);
}