summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2009-04-05 13:42:37 +0200
committerBruno Haible <bruno@clisp.org>2009-04-05 13:42:37 +0200
commit15f8fa4dc366d2c2374ed6d7c0973d02adf02821 (patch)
tree280a8ec0be447e7de6a03e12f283bcb4d6e1d180 /doc
parent16b787e19ccaa0339f001f12b3f27def20d2dde9 (diff)
downloadlibunistring-15f8fa4dc366d2c2374ed6d7c0973d02adf02821.tar.gz
Documentation of <unicase.h>.
Diffstat (limited to 'doc')
-rw-r--r--doc/unicase.texi211
1 files changed, 211 insertions, 0 deletions
diff --git a/doc/unicase.texi b/doc/unicase.texi
new file mode 100644
index 0000000..6fa86c7
--- /dev/null
+++ b/doc/unicase.texi
@@ -0,0 +1,211 @@
+@node unicase.h
+@chapter Case mappings @code{<unicase.h>}
+
+This include file defines functions for case mapping for Unicode strings and
+case insensitive comparison of Unicode strings and C strings.
+
+These string functions fix the problems that were mentioned in
+@ref{char * strings}, namely, they handle the Croatian
+@sc{LETTER DZ WITH CARON}, the German @sc{LATIN SMALL LETTER SHARP S}, the
+Greek sigma and the Lithuanian i correctly.
+
+@menu
+* Case mappings of characters::
+* Case mappings of strings::
+* Case insensitive comparison::
+* Case detection::
+@end menu
+
+@node Case mappings of characters
+@section Case mappings of characters
+
+The following functions implement case mappings on Unicode characters ---
+for those cases only where the result of the mapping is a again a single
+Unicode character.
+
+These mappings are locale and context independent.
+
+@cartouche
+@strong{WARNING!} These functions are not sufficient for languages such as
+German, Greek and Lithuanian. Better use the functions below that treat an
+entire string at once and are language aware.
+@end cartouche
+
+@deftypefun ucs4_t uc_toupper (ucs4_t @var{uc})
+Returns the uppercase mapping of the Unicode character @var{uc}.
+@end deftypefun
+
+@deftypefun ucs4_t uc_tolower (ucs4_t @var{uc})
+Returns the lowercase mapping of the Unicode character @var{uc}.
+@end deftypefun
+
+@deftypefun ucs4_t uc_totitle (ucs4_t @var{uc})
+Returns the titlecase mapping of the Unicode character @var{uc}.
+@end deftypefun
+
+@node Case mappings of strings
+@section Case mappings of strings
+
+Case mapping should always be performed on entire strings, not on individual
+characters. The functions in this sections do so.
+
+These functions allow to apply a normalization after the case mapping. The
+reason is that if you want to treat @samp{@"{a}} and @samp{@"{A}} the same,
+you most often also want to treat the composed and decomposed forms of such
+a character, U+00C4 @sc{LATIN CAPITAL LETTER A WITH DIAERESIS} and
+U+0041 @sc{LATIN CAPITAL LETTER A} U+0308 @sc{COMBINING DIAERESIS} the same.
+The @var{nf} argument designates the normalization.
+
+These functions are locale dependent. The @var{iso639_language} argument
+identifies the language (e.g. @code{"tr"} for Turkish). NULL means to use
+locale independent case mappings.
+
+@deftypefun {const char *} uc_locale_language ()
+Returns the ISO 639 language code of the current locale.
+Returns @code{""} if it is unknown, or in the "C" locale.
+@end deftypefun
+
+@deftypefun {uint8_t *} u8_toupper (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_toupper (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_toupper (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the uppercase mapping of a string.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It can also be NULL, for no normalization.
+@end deftypefun
+
+@deftypefun {uint8_t *} u8_tolower (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_tolower (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_tolower (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the lowercase mapping of a string.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It can also be NULL, for no normalization.
+@end deftypefun
+
+@deftypefun {uint8_t *} u8_totitle (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_totitle (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_totitle (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the titlecase mapping of a string.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It can also be NULL, for no normalization.
+@end deftypefun
+
+@node Case insensitive comparison
+@section Case insensitive comparison
+
+The following functions implement comparison that ignores differences in case
+and normalization.
+
+@deftypefun {uint8_t *} u8_casefold (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint16_t *} u16_casefold (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {uint32_t *} u32_casefold (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
+Returns the case folded string.
+
+Comparing @code{u8_casefold (@var{s1})} and @code{u8_casefold (@var{s2})}
+with the @code{u8_cmp2} function is equivalent to comparing @var{s1} and
+@var{s2} with @code{u8_casecmp}.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It can also be NULL, for no normalization.
+@end deftypefun
+
+@deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int ulc_casecmp (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+Compares @var{s1} and @var{s2}, ignoring differences in case and normalization.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It can also be NULL, for no normalization.
+
+If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2},
+0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0.
+Upon failure, returns -1 with @code{errno} set.
+@end deftypefun
+
+The following functions additionally take into account the sorting rules of the
+current locale.
+
+@deftypefun {char *} u8_casexfrm (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {char *} u16_casexfrm (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {char *} u32_casexfrm (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp})
+@deftypefunx {char *} ulc_casexfrm (const char *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp})
+Converts the string @var{s} of length @var{n} to a string in locale encoding,
+in such a way that comparing @code{u8_casexfrm (@var{s1})} and
+@code{u8_casexfrm (@var{s2})} with @code{memcmp2} is equivalent to comparing
+@var{s1} and @var{s2} with @code{u8_casecoll}.
+
+@var{nf} must be either @code{UNINORM_NFC}, @code{UNINORM_NFKC}, or NULL for
+no normalization.
+@end deftypefun
+
+@deftypefun int u8_casecoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int u16_casecoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int u32_casecoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+@deftypefunx int ulc_casecoll (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp})
+Compares @var{s1} and @var{s2}, ignoring differences in case and normalization,
+using the collation rules of the current locale.
+
+The @var{nf} argument identifies the normalization form to apply after the
+case-mapping. It must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}.
+It can also be NULL, for no normalization.
+
+If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2},
+0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0.
+Upon failure, returns -1 with @code{errno} set.
+@end deftypefun
+
+@node Case detection
+@section Case detection
+
+The following functions determine whether a Unicode string is entirely in
+upper case. or entirely in lower case, or entirely in title case, or already
+case-folded.
+
+@deftypefun int u8_is_uppercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u16_is_uppercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u32_is_uppercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to upper case is
+a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with
+@code{errno} set.
+@end deftypefun
+
+@deftypefun int u8_is_lowercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u16_is_lowercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u32_is_lowercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to lower case is
+a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with
+@code{errno} set.
+@end deftypefun
+
+@deftypefun int u8_is_titlecase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u16_is_titlecase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u32_is_titlecase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to title case is
+a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with
+@code{errno} set.
+@end deftypefun
+
+@deftypefun int u8_is_casefolded (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u16_is_casefolded (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u32_is_casefolded (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+Sets @code{*@var{resultp}} to true if applying case folding to NFD(@var{S}) is
+a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with
+@code{errno} set.
+@end deftypefun
+
+The following functions determine whether case mappings have any effect on a
+Unicode string.
+
+@deftypefun int u8_is_cased (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u16_is_cased (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+@deftypefunx int u32_is_cased (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp})
+Sets @code{*@var{resultp}} to true if case matters for @var{s}, that is, if
+mapping NFD(@var{s}) to either upper case or lower case or title case is not
+a no-op. Set @code{*@var{resultp}} to false if NFD(@var{s}) maps to itself
+under the upper case mapping, under the lower case mapping, and under the title
+case mapping; in other words, when NFD(@var{s}) consists entirely of caseless
+characters. Upon failure, returns -1 with @code{errno} set.
+@end deftypefun