summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2009-06-30 00:42:04 +0200
committerBruno Haible <bruno@clisp.org>2009-06-30 00:42:04 +0200
commit116c15a81610cdeab05883d1a8e5149c1964ab20 (patch)
treeebb18d5f7e078ba2c8c9d7097db5cc05c1c45057
parent668e2b571d78b8efa276475170b345ab79c9713f (diff)
downloadlibunistring-116c15a81610cdeab05883d1a8e5149c1964ab20.tar.gz
New functions for case mapping of substrings.
-rw-r--r--ChangeLog10
-rw-r--r--NEWS4
-rwxr-xr-xautogen.sh18
-rw-r--r--doc/libunistring.texi1
-rw-r--r--doc/unicase.texi94
5 files changed, 127 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 8042dfd..6e35ccd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2009-06-29 Bruno Haible <bruno@clisp.org>
+
+ New functions for case mapping of substrings.
+ * autogen.sh (GNULIB_MODULES): Add unicase/* modules relating to
+ context sensitive case mapping. Preprocess also unicase.h.
+ * doc/unicase.texi (Case mappings of substrings): New section.
+ (Case insensitive comparison): Mention u*_ct_casefold functions.
+ * doc/libunistring.texi: Update menu.
+ Reported by Paolo Bonzini.
+
2009-06-11 Thien-Thi Nguyen <ttn@gnuvola.org>
* doc/libunistring.texi (Autoconf macro): Small fixes.
diff --git a/NEWS b/NEWS
index a4b8aa2..bc52e97 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+New in 0.9.1:
+* In the include file unicase.h, functions for case mapping of substrings have
+ been added.
+
New in 0.9:
* The new include file unicase.h implements case folding.
* The new include file uninorm.h implements normalization.
diff --git a/autogen.sh b/autogen.sh
index 42a3564..e062b33 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -355,6 +355,8 @@ if test $skip_gnulib = false; then
uninorm/u32-normcoll
uninorm/u32-normxfrm
unicase/base
+ unicase/empty-prefix-context
+ unicase/empty-suffix-context
unicase/locale-language
unicase/tolower
unicase/totitle
@@ -363,6 +365,10 @@ if test $skip_gnulib = false; then
unicase/u8-casecoll
unicase/u8-casefold
unicase/u8-casexfrm
+ unicase/u8-ct-casefold
+ unicase/u8-ct-tolower
+ unicase/u8-ct-totitle
+ unicase/u8-ct-toupper
unicase/u8-is-cased
unicase/u8-is-casefolded
unicase/u8-is-lowercase
@@ -375,6 +381,10 @@ if test $skip_gnulib = false; then
unicase/u16-casecoll
unicase/u16-casefold
unicase/u16-casexfrm
+ unicase/u16-ct-casefold
+ unicase/u16-ct-tolower
+ unicase/u16-ct-totitle
+ unicase/u16-ct-toupper
unicase/u16-is-cased
unicase/u16-is-casefolded
unicase/u16-is-lowercase
@@ -387,6 +397,10 @@ if test $skip_gnulib = false; then
unicase/u32-casecoll
unicase/u32-casefold
unicase/u32-casexfrm
+ unicase/u32-ct-casefold
+ unicase/u32-ct-tolower
+ unicase/u32-ct-totitle
+ unicase/u32-ct-toupper
unicase/u32-is-cased
unicase/u32-is-casefolded
unicase/u32-is-lowercase
@@ -419,6 +433,10 @@ if test $skip_gnulib = false; then
< lib/uninorm.h \
> lib/uninorm.h.tmp \
&& mv lib/uninorm.h.tmp lib/uninorm.h
+ sed -e 's/extern const casing_/extern LIBUNISTRING_DLL_VARIABLE const casing_/' \
+ < lib/unicase.h \
+ > lib/unicase.h.tmp \
+ && mv lib/unicase.h.tmp lib/unicase.h
$GNULIB_TOOL --copy-file build-aux/config.guess; chmod a+x build-aux/config.guess
$GNULIB_TOOL --copy-file build-aux/config.sub; chmod a+x build-aux/config.sub
# If we got no texinfo.tex so far, take the snapshot from gnulib.
diff --git a/doc/libunistring.texi b/doc/libunistring.texi
index 5694a27..8eb8061 100644
--- a/doc/libunistring.texi
+++ b/doc/libunistring.texi
@@ -232,6 +232,7 @@ unicase,h
* Case mappings of characters::
* Case mappings of strings::
+* Case mappings of substrings::
* Case insensitive comparison::
* Case detection::
diff --git a/doc/unicase.texi b/doc/unicase.texi
index 5b39901..89df977 100644
--- a/doc/unicase.texi
+++ b/doc/unicase.texi
@@ -12,6 +12,7 @@ Greek sigma and the Lithuanian i correctly.
@menu
* Case mappings of characters::
* Case mappings of strings::
+* Case mappings of substrings::
* Case insensitive comparison::
* Case detection::
@end menu
@@ -129,6 +130,92 @@ The @var{nf} argument identifies the normalization form to apply after the
case-mapping. It can also be NULL, for no normalization.
@end deftypefun
+@node Case mappings of substrings
+@section Case mappings of substrings
+
+Case mapping of a substring cannot simply be performed by extracting the
+substring and then applying the case mapping function to it. This does not
+work because case mapping requires some information about the surrounding
+characters. The following functions allow to apply case mappings to
+substrings of a given string, while taking into account the characters that
+precede it (the ``prefix'') and the characters that follow it (the ``suffix'').
+
+@deftp Type casing_prefix_context_t
+This data type denotes the case-mapping context that is given by a prefix
+string. It is an immediate type that can be copied by simple assignment,
+without involving memory allocation. It is not an array type.
+@end deftp
+
+@deftypevr Constant casing_prefix_context_t unicase_empty_prefix_context
+This constant is the case-mapping context that corresponds to an empty prefix
+string.
+@end deftypevr
+
+The following functions return @code{casing_prefix_context_t} objects:
+
+@deftypefun casing_prefix_context_t u8_casing_prefix_context (const uint8_t *@var{s}, size_t @var{n})
+@deftypefunx casing_prefix_context_t u16_casing_prefix_context (const uint16_t *@var{s}, size_t @var{n})
+@deftypefunx casing_prefix_context_t u32_casing_prefix_context (const uint32_t *@var{s}, size_t @var{n})
+Returns the case-mapping context of a given prefix string.
+@end deftypefun
+
+@deftypefun casing_prefix_context_t u8_casing_prefixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context})
+@deftypefunx casing_prefix_context_t u16_casing_prefixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context})
+@deftypefunx casing_prefix_context_t u32_casing_prefixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context})
+Returns the case-mapping context of the prefix concat(@var{a}, @var{s}),
+given the case-mapping context of the prefix @var{a}.
+@end deftypefun
+
+@deftp Type casing_suffix_context_t
+This data type denotes the case-mapping context that is given by a suffix
+string. It is an immediate type that can be copied by simple assignment,
+without involving memory allocation. It is not an array type.
+@end deftp
+
+@deftypevr Constant casing_suffix_context_t unicase_empty_suffix_context
+This constant is the case-mapping context that corresponds to an empty suffix
+string.
+@end deftypevr
+
+The following functions return @code{casing_suffix_context_t} objects:
+
+@deftypefun casing_suffix_context_t u8_casing_suffix_context (const uint8_t *@var{s}, size_t @var{n})
+@deftypefunx casing_suffix_context_t u16_casing_suffix_context (const uint16_t *@var{s}, size_t @var{n})
+@deftypefunx casing_suffix_context_t u32_casing_suffix_context (const uint32_t *@var{s}, size_t @var{n})
+Returns the case-mapping context of a given suffix string.
+@end deftypefun
+
+@deftypefun casing_suffix_context_t u8_casing_suffixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context})
+@deftypefunx casing_suffix_context_t u16_casing_suffixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context})
+@deftypefunx casing_suffix_context_t u32_casing_suffixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context})
+Returns the case-mapping context of the suffix concat(@var{s}, @var{a}),
+given the case-mapping context of the suffix @var{a}.
+@end deftypefun
+
+The following functions perform a case mapping, considering the
+prefix context and the suffix context.
+
+@deftypefun {uint8_t *} u8_ct_toupper (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_ct_toupper (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_ct_toupper (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the uppercase mapping of a string that is surrounded by a prefix
+and a suffix.
+@end deftypefun
+
+@deftypefun {uint8_t *} u8_ct_tolower (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_ct_tolower (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_ct_tolower (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the lowercase mapping of a string that is surrounded by a prefix
+and a suffix.
+@end deftypefun
+
+@deftypefun {uint8_t *} u8_ct_totitle (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_ct_totitle (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_ct_totitle (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the titlecase mapping of a string that is surrounded by a prefix
+and a suffix.
+@end deftypefun
+
@node Case insensitive comparison
@section Case insensitive comparison
@@ -150,6 +237,13 @@ The @var{nf} argument identifies the normalization form to apply after the
case-mapping. It can also be NULL, for no normalization.
@end deftypefun
+@deftypefun {uint8_t *} u8_ct_casefold (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_ct_casefold (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_ct_casefold (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the case folded string. The case folding takes into account the
+case mapping contexts of the prefix and suffix strings.
+@end deftypefun
+
@deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
@deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
@deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})