diff options
author | Bruno Haible <bruno@clisp.org> | 2009-03-07 16:37:25 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-03-07 16:37:25 +0100 |
commit | 401aeb2d65f0dc85ddf318bec7454dc403c0bf8e (patch) | |
tree | 069d9dee978438eabe92628e148228100ee8edd6 /lib | |
parent | 2cf1b44c580bc7f6854d52231a86f688096f129b (diff) | |
download | gnulib-401aeb2d65f0dc85ddf318bec7454dc403c0bf8e.tar.gz |
New module 'uninorm/u8-normxfrm'.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/uninorm.h | 15 | ||||
-rw-r--r-- | lib/uninorm/u-normxfrm.h | 89 | ||||
-rw-r--r-- | lib/uninorm/u8-normxfrm.c | 34 |
3 files changed, 138 insertions, 0 deletions
diff --git a/lib/uninorm.h b/lib/uninorm.h index 1750779fe6..6680f819ca 100644 --- a/lib/uninorm.h +++ b/lib/uninorm.h @@ -164,6 +164,21 @@ extern int uninorm_t nf, int *result); +/* Converts the string S of length N to a string in locale encoding, in such a + way that comparing uN_normxfrm (S1) and uN_normxfrm (S2) with memcmp2() is + equivalent to comparing S1 and S2 with uN_normcoll(). + NF must be either UNINORM_NFC or UNINORM_NFKC. */ +extern char * + u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); +extern char * + u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); +extern char * + u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp); + + /* Normalization of a stream of Unicode characters. A "stream of Unicode characters" is essentially a function that accepts an diff --git a/lib/uninorm/u-normxfrm.h b/lib/uninorm/u-normxfrm.h new file mode 100644 index 0000000000..60036b1058 --- /dev/null +++ b/lib/uninorm/u-normxfrm.h @@ -0,0 +1,89 @@ +/* Locale dependent transformation for comparison of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +char * +FUNC (const UNIT *s, size_t n, uninorm_t nf, + char *resultbuf, size_t *lengthp) +{ + UNIT normsbuf[2048 / sizeof (UNIT)]; + UNIT *norms; + size_t norms_length; + char convsbuf[2048]; + char *convs; + size_t convs_length; + int ret; + char *result; + + /* Normalize the Unicode string. */ + norms_length = sizeof (normsbuf) / sizeof (UNIT); + norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length); + if (norms == NULL) + /* errno is set here. */ + return NULL; + + /* Convert it to locale encoding. */ + convs = convsbuf; + convs_length = sizeof (convsbuf) - 1; + ret = U_CONV_TO_ENCODING (locale_charset (), + iconveh_error, + norms, norms_length, + NULL, + &convs, &convs_length); + if (ret < 0) + { + if (norms != normsbuf) + { + int saved_errno = errno; + free (norms); + errno = saved_errno; + } + return NULL; + } + + if (norms != normsbuf) + free (norms); + + /* Ensure one more byte is available. */ + if (convs != convsbuf) + { + char *memory = (char *) realloc (convs, convs_length + 1); + if (memory == NULL) + { + free (convs); + errno = ENOMEM; + return NULL; + } + convs = memory; + } + + /* Apply locale dependent transformations for comparison. */ + result = memxfrm (convs, convs_length, resultbuf, lengthp); + if (result == NULL) + { + if (convs != convsbuf) + { + int saved_errno = errno; + free (convs); + errno = saved_errno; + } + return NULL; + } + + if (convs != convsbuf) + free (convs); + return result; +} diff --git a/lib/uninorm/u8-normxfrm.c b/lib/uninorm/u8-normxfrm.c new file mode 100644 index 0000000000..31da05d187 --- /dev/null +++ b/lib/uninorm/u8-normxfrm.c @@ -0,0 +1,34 @@ +/* Locale dependent transformation for comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include "uninorm.h" + +#include <errno.h> +#include <stdlib.h> + +#include "localcharset.h" +#include "uniconv.h" +#include "memxfrm.h" + +#define FUNC u8_normxfrm +#define UNIT uint8_t +#define U_NORMALIZE u8_normalize +#define U_CONV_TO_ENCODING u8_conv_to_encoding +#include "u-normxfrm.h" |