diff options
author | Bruno Haible <bruno@clisp.org> | 2009-06-30 00:42:04 +0200 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-30 00:42:04 +0200 |
commit | 116c15a81610cdeab05883d1a8e5149c1964ab20 (patch) | |
tree | ebb18d5f7e078ba2c8c9d7097db5cc05c1c45057 | |
parent | 668e2b571d78b8efa276475170b345ab79c9713f (diff) | |
download | libunistring-116c15a81610cdeab05883d1a8e5149c1964ab20.tar.gz |
New functions for case mapping of substrings.
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | NEWS | 4 | ||||
-rwxr-xr-x | autogen.sh | 18 | ||||
-rw-r--r-- | doc/libunistring.texi | 1 | ||||
-rw-r--r-- | doc/unicase.texi | 94 |
5 files changed, 127 insertions, 0 deletions
@@ -1,3 +1,13 @@ +2009-06-29 Bruno Haible <bruno@clisp.org> + + New functions for case mapping of substrings. + * autogen.sh (GNULIB_MODULES): Add unicase/* modules relating to + context sensitive case mapping. Preprocess also unicase.h. + * doc/unicase.texi (Case mappings of substrings): New section. + (Case insensitive comparison): Mention u*_ct_casefold functions. + * doc/libunistring.texi: Update menu. + Reported by Paolo Bonzini. + 2009-06-11 Thien-Thi Nguyen <ttn@gnuvola.org> * doc/libunistring.texi (Autoconf macro): Small fixes. @@ -1,3 +1,7 @@ +New in 0.9.1: +* In the include file unicase.h, functions for case mapping of substrings have + been added. + New in 0.9: * The new include file unicase.h implements case folding. * The new include file uninorm.h implements normalization. @@ -355,6 +355,8 @@ if test $skip_gnulib = false; then uninorm/u32-normcoll uninorm/u32-normxfrm unicase/base + unicase/empty-prefix-context + unicase/empty-suffix-context unicase/locale-language unicase/tolower unicase/totitle @@ -363,6 +365,10 @@ if test $skip_gnulib = false; then unicase/u8-casecoll unicase/u8-casefold unicase/u8-casexfrm + unicase/u8-ct-casefold + unicase/u8-ct-tolower + unicase/u8-ct-totitle + unicase/u8-ct-toupper unicase/u8-is-cased unicase/u8-is-casefolded unicase/u8-is-lowercase @@ -375,6 +381,10 @@ if test $skip_gnulib = false; then unicase/u16-casecoll unicase/u16-casefold unicase/u16-casexfrm + unicase/u16-ct-casefold + unicase/u16-ct-tolower + unicase/u16-ct-totitle + unicase/u16-ct-toupper unicase/u16-is-cased unicase/u16-is-casefolded unicase/u16-is-lowercase @@ -387,6 +397,10 @@ if test $skip_gnulib = false; then unicase/u32-casecoll unicase/u32-casefold unicase/u32-casexfrm + unicase/u32-ct-casefold + unicase/u32-ct-tolower + unicase/u32-ct-totitle + unicase/u32-ct-toupper unicase/u32-is-cased unicase/u32-is-casefolded unicase/u32-is-lowercase @@ -419,6 +433,10 @@ if test $skip_gnulib = false; then < lib/uninorm.h \ > lib/uninorm.h.tmp \ && mv lib/uninorm.h.tmp lib/uninorm.h + sed -e 's/extern const casing_/extern LIBUNISTRING_DLL_VARIABLE const casing_/' \ + < lib/unicase.h \ + > lib/unicase.h.tmp \ + && mv lib/unicase.h.tmp lib/unicase.h $GNULIB_TOOL --copy-file build-aux/config.guess; chmod a+x build-aux/config.guess $GNULIB_TOOL --copy-file build-aux/config.sub; chmod a+x build-aux/config.sub # If we got no texinfo.tex so far, take the snapshot from gnulib. diff --git a/doc/libunistring.texi b/doc/libunistring.texi index 5694a27..8eb8061 100644 --- a/doc/libunistring.texi +++ b/doc/libunistring.texi @@ -232,6 +232,7 @@ unicase,h * Case mappings of characters:: * Case mappings of strings:: +* Case mappings of substrings:: * Case insensitive comparison:: * Case detection:: diff --git a/doc/unicase.texi b/doc/unicase.texi index 5b39901..89df977 100644 --- a/doc/unicase.texi +++ b/doc/unicase.texi @@ -12,6 +12,7 @@ Greek sigma and the Lithuanian i correctly. @menu * Case mappings of characters:: * Case mappings of strings:: +* Case mappings of substrings:: * Case insensitive comparison:: * Case detection:: @end menu @@ -129,6 +130,92 @@ The @var{nf} argument identifies the normalization form to apply after the case-mapping. It can also be NULL, for no normalization. @end deftypefun +@node Case mappings of substrings +@section Case mappings of substrings + +Case mapping of a substring cannot simply be performed by extracting the +substring and then applying the case mapping function to it. This does not +work because case mapping requires some information about the surrounding +characters. The following functions allow to apply case mappings to +substrings of a given string, while taking into account the characters that +precede it (the ``prefix'') and the characters that follow it (the ``suffix''). + +@deftp Type casing_prefix_context_t +This data type denotes the case-mapping context that is given by a prefix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_prefix_context_t unicase_empty_prefix_context +This constant is the case-mapping context that corresponds to an empty prefix +string. +@end deftypevr + +The following functions return @code{casing_prefix_context_t} objects: + +@deftypefun casing_prefix_context_t u8_casing_prefix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u16_casing_prefix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_prefix_context_t u32_casing_prefix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given prefix string. +@end deftypefun + +@deftypefun casing_prefix_context_t u8_casing_prefixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u16_casing_prefixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +@deftypefunx casing_prefix_context_t u32_casing_prefixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{a_context}) +Returns the case-mapping context of the prefix concat(@var{a}, @var{s}), +given the case-mapping context of the prefix @var{a}. +@end deftypefun + +@deftp Type casing_suffix_context_t +This data type denotes the case-mapping context that is given by a suffix +string. It is an immediate type that can be copied by simple assignment, +without involving memory allocation. It is not an array type. +@end deftp + +@deftypevr Constant casing_suffix_context_t unicase_empty_suffix_context +This constant is the case-mapping context that corresponds to an empty suffix +string. +@end deftypevr + +The following functions return @code{casing_suffix_context_t} objects: + +@deftypefun casing_suffix_context_t u8_casing_suffix_context (const uint8_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u16_casing_suffix_context (const uint16_t *@var{s}, size_t @var{n}) +@deftypefunx casing_suffix_context_t u32_casing_suffix_context (const uint32_t *@var{s}, size_t @var{n}) +Returns the case-mapping context of a given suffix string. +@end deftypefun + +@deftypefun casing_suffix_context_t u8_casing_suffixes_context (const uint8_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u16_casing_suffixes_context (const uint16_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +@deftypefunx casing_suffix_context_t u32_casing_suffixes_context (const uint32_t *@var{s}, size_t @var{n}, casing_suffix_context_t @var{a_context}) +Returns the case-mapping context of the suffix concat(@var{s}, @var{a}), +given the case-mapping context of the suffix @var{a}. +@end deftypefun + +The following functions perform a case mapping, considering the +prefix context and the suffix context. + +@deftypefun {uint8_t *} u8_ct_toupper (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_toupper (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_toupper (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_tolower (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_tolower (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_tolower (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + +@deftypefun {uint8_t *} u8_ct_totitle (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_totitle (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_totitle (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string that is surrounded by a prefix +and a suffix. +@end deftypefun + @node Case insensitive comparison @section Case insensitive comparison @@ -150,6 +237,13 @@ The @var{nf} argument identifies the normalization form to apply after the case-mapping. It can also be NULL, for no normalization. @end deftypefun +@deftypefun {uint8_t *} u8_ct_casefold (const uint8_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_ct_casefold (const uint16_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_ct_casefold (const uint32_t *@var{s}, size_t @var{n}, casing_prefix_context_t @var{prefix_context}, casing_suffix_context_t @var{suffix_context}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. The case folding takes into account the +case mapping contexts of the prefix and suffix strings. +@end deftypefun + @deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) @deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) @deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) |