summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2009-03-07 16:37:25 +0100
committerBruno Haible <bruno@clisp.org>2009-03-07 16:37:25 +0100
commit401aeb2d65f0dc85ddf318bec7454dc403c0bf8e (patch)
tree069d9dee978438eabe92628e148228100ee8edd6 /lib
parent2cf1b44c580bc7f6854d52231a86f688096f129b (diff)
downloadgnulib-401aeb2d65f0dc85ddf318bec7454dc403c0bf8e.tar.gz
New module 'uninorm/u8-normxfrm'.
Diffstat (limited to 'lib')
-rw-r--r--lib/uninorm.h15
-rw-r--r--lib/uninorm/u-normxfrm.h89
-rw-r--r--lib/uninorm/u8-normxfrm.c34
3 files changed, 138 insertions, 0 deletions
diff --git a/lib/uninorm.h b/lib/uninorm.h
index 1750779fe6..6680f819ca 100644
--- a/lib/uninorm.h
+++ b/lib/uninorm.h
@@ -164,6 +164,21 @@ extern int
uninorm_t nf, int *result);
+/* Converts the string S of length N to a string in locale encoding, in such a
+ way that comparing uN_normxfrm (S1) and uN_normxfrm (S2) with memcmp2() is
+ equivalent to comparing S1 and S2 with uN_normcoll().
+ NF must be either UNINORM_NFC or UNINORM_NFKC. */
+extern char *
+ u8_normxfrm (const uint8_t *s, size_t n, uninorm_t nf,
+ char *resultbuf, size_t *lengthp);
+extern char *
+ u16_normxfrm (const uint16_t *s, size_t n, uninorm_t nf,
+ char *resultbuf, size_t *lengthp);
+extern char *
+ u32_normxfrm (const uint32_t *s, size_t n, uninorm_t nf,
+ char *resultbuf, size_t *lengthp);
+
+
/* Normalization of a stream of Unicode characters.
A "stream of Unicode characters" is essentially a function that accepts an
diff --git a/lib/uninorm/u-normxfrm.h b/lib/uninorm/u-normxfrm.h
new file mode 100644
index 0000000000..60036b1058
--- /dev/null
+++ b/lib/uninorm/u-normxfrm.h
@@ -0,0 +1,89 @@
+/* Locale dependent transformation for comparison of Unicode strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+char *
+FUNC (const UNIT *s, size_t n, uninorm_t nf,
+ char *resultbuf, size_t *lengthp)
+{
+ UNIT normsbuf[2048 / sizeof (UNIT)];
+ UNIT *norms;
+ size_t norms_length;
+ char convsbuf[2048];
+ char *convs;
+ size_t convs_length;
+ int ret;
+ char *result;
+
+ /* Normalize the Unicode string. */
+ norms_length = sizeof (normsbuf) / sizeof (UNIT);
+ norms = U_NORMALIZE (nf, s, n, normsbuf, &norms_length);
+ if (norms == NULL)
+ /* errno is set here. */
+ return NULL;
+
+ /* Convert it to locale encoding. */
+ convs = convsbuf;
+ convs_length = sizeof (convsbuf) - 1;
+ ret = U_CONV_TO_ENCODING (locale_charset (),
+ iconveh_error,
+ norms, norms_length,
+ NULL,
+ &convs, &convs_length);
+ if (ret < 0)
+ {
+ if (norms != normsbuf)
+ {
+ int saved_errno = errno;
+ free (norms);
+ errno = saved_errno;
+ }
+ return NULL;
+ }
+
+ if (norms != normsbuf)
+ free (norms);
+
+ /* Ensure one more byte is available. */
+ if (convs != convsbuf)
+ {
+ char *memory = (char *) realloc (convs, convs_length + 1);
+ if (memory == NULL)
+ {
+ free (convs);
+ errno = ENOMEM;
+ return NULL;
+ }
+ convs = memory;
+ }
+
+ /* Apply locale dependent transformations for comparison. */
+ result = memxfrm (convs, convs_length, resultbuf, lengthp);
+ if (result == NULL)
+ {
+ if (convs != convsbuf)
+ {
+ int saved_errno = errno;
+ free (convs);
+ errno = saved_errno;
+ }
+ return NULL;
+ }
+
+ if (convs != convsbuf)
+ free (convs);
+ return result;
+}
diff --git a/lib/uninorm/u8-normxfrm.c b/lib/uninorm/u8-normxfrm.c
new file mode 100644
index 0000000000..31da05d187
--- /dev/null
+++ b/lib/uninorm/u8-normxfrm.c
@@ -0,0 +1,34 @@
+/* Locale dependent transformation for comparison of UTF-8 strings.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "uninorm.h"
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "localcharset.h"
+#include "uniconv.h"
+#include "memxfrm.h"
+
+#define FUNC u8_normxfrm
+#define UNIT uint8_t
+#define U_NORMALIZE u8_normalize
+#define U_CONV_TO_ENCODING u8_conv_to_encoding
+#include "u-normxfrm.h"