From 15f8fa4dc366d2c2374ed6d7c0973d02adf02821 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 5 Apr 2009 13:42:37 +0200 Subject: Documentation of . --- doc/unicase.texi | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 doc/unicase.texi (limited to 'doc') diff --git a/doc/unicase.texi b/doc/unicase.texi new file mode 100644 index 0000000..6fa86c7 --- /dev/null +++ b/doc/unicase.texi @@ -0,0 +1,211 @@ +@node unicase.h +@chapter Case mappings @code{} + +This include file defines functions for case mapping for Unicode strings and +case insensitive comparison of Unicode strings and C strings. + +These string functions fix the problems that were mentioned in +@ref{char * strings}, namely, they handle the Croatian +@sc{LETTER DZ WITH CARON}, the German @sc{LATIN SMALL LETTER SHARP S}, the +Greek sigma and the Lithuanian i correctly. + +@menu +* Case mappings of characters:: +* Case mappings of strings:: +* Case insensitive comparison:: +* Case detection:: +@end menu + +@node Case mappings of characters +@section Case mappings of characters + +The following functions implement case mappings on Unicode characters --- +for those cases only where the result of the mapping is a again a single +Unicode character. + +These mappings are locale and context independent. + +@cartouche +@strong{WARNING!} These functions are not sufficient for languages such as +German, Greek and Lithuanian. Better use the functions below that treat an +entire string at once and are language aware. +@end cartouche + +@deftypefun ucs4_t uc_toupper (ucs4_t @var{uc}) +Returns the uppercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_tolower (ucs4_t @var{uc}) +Returns the lowercase mapping of the Unicode character @var{uc}. +@end deftypefun + +@deftypefun ucs4_t uc_totitle (ucs4_t @var{uc}) +Returns the titlecase mapping of the Unicode character @var{uc}. +@end deftypefun + +@node Case mappings of strings +@section Case mappings of strings + +Case mapping should always be performed on entire strings, not on individual +characters. The functions in this sections do so. + +These functions allow to apply a normalization after the case mapping. The +reason is that if you want to treat @samp{@"{a}} and @samp{@"{A}} the same, +you most often also want to treat the composed and decomposed forms of such +a character, U+00C4 @sc{LATIN CAPITAL LETTER A WITH DIAERESIS} and +U+0041 @sc{LATIN CAPITAL LETTER A} U+0308 @sc{COMBINING DIAERESIS} the same. +The @var{nf} argument designates the normalization. + +These functions are locale dependent. The @var{iso639_language} argument +identifies the language (e.g. @code{"tr"} for Turkish). NULL means to use +locale independent case mappings. + +@deftypefun {const char *} uc_locale_language () +Returns the ISO 639 language code of the current locale. +Returns @code{""} if it is unknown, or in the "C" locale. +@end deftypefun + +@deftypefun {uint8_t *} u8_toupper (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_toupper (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_toupper (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the uppercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_tolower (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_tolower (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_tolower (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the lowercase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun {uint8_t *} u8_totitle (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_totitle (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_totitle (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the titlecase mapping of a string. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@node Case insensitive comparison +@section Case insensitive comparison + +The following functions implement comparison that ignores differences in case +and normalization. + +@deftypefun {uint8_t *} u8_casefold (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint16_t *} u16_casefold (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {uint32_t *} u32_casefold (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) +Returns the case folded string. + +Comparing @code{u8_casefold (@var{s1})} and @code{u8_casefold (@var{s2})} +with the @code{u8_cmp2} function is equivalent to comparing @var{s1} and +@var{s2} with @code{u8_casecmp}. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. +@end deftypefun + +@deftypefun int u8_casecmp (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecmp (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecmp (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecmp (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +The following functions additionally take into account the sorting rules of the +current locale. + +@deftypefun {char *} u8_casexfrm (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u16_casexfrm (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} u32_casexfrm (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +@deftypefunx {char *} ulc_casexfrm (const char *@var{s}, size_t @var{n}, const char *@var{iso639_language}, uninorm_t @var{nf}, char *@var{resultbuf}, size_t *@var{lengthp}) +Converts the string @var{s} of length @var{n} to a string in locale encoding, +in such a way that comparing @code{u8_casexfrm (@var{s1})} and +@code{u8_casexfrm (@var{s2})} with @code{memcmp2} is equivalent to comparing +@var{s1} and @var{s2} with @code{u8_casecoll}. + +@var{nf} must be either @code{UNINORM_NFC}, @code{UNINORM_NFKC}, or NULL for +no normalization. +@end deftypefun + +@deftypefun int u8_casecoll (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u16_casecoll (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int u32_casecoll (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +@deftypefunx int ulc_casecoll (const char *@var{s1}, size_t @var{n1}, const char *@var{s2}, size_t @var{n2}, const char *@var{iso639_language}, uninorm_t @var{nf}, int *@var{resultp}) +Compares @var{s1} and @var{s2}, ignoring differences in case and normalization, +using the collation rules of the current locale. + +The @var{nf} argument identifies the normalization form to apply after the +case-mapping. It must be either @code{UNINORM_NFC} or @code{UNINORM_NFKC}. +It can also be NULL, for no normalization. + +If successful, sets @code{*@var{resultp}} to -1 if @var{s1} < @var{s2}, +0 if @var{s1} = @var{s2}, 1 if @var{s1} > @var{s2}, and returns 0. +Upon failure, returns -1 with @code{errno} set. +@end deftypefun + +@node Case detection +@section Case detection + +The following functions determine whether a Unicode string is entirely in +upper case. or entirely in lower case, or entirely in title case, or already +case-folded. + +@deftypefun int u8_is_uppercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_uppercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_uppercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to upper case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_lowercase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_lowercase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_lowercase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to lower case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_titlecase (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_titlecase (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_titlecase (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if mapping NFD(@var{s}) to title case is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +@deftypefun int u8_is_casefolded (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_casefolded (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_casefolded (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if applying case folding to NFD(@var{S}) is +a no-op, or to false otherwise, and returns 0. Upon failure, returns -1 with +@code{errno} set. +@end deftypefun + +The following functions determine whether case mappings have any effect on a +Unicode string. + +@deftypefun int u8_is_cased (const uint8_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u16_is_cased (const uint16_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +@deftypefunx int u32_is_cased (const uint32_t *@var{s}, size_t @var{n}, const char *@var{iso639_language}, bool *@var{resultp}) +Sets @code{*@var{resultp}} to true if case matters for @var{s}, that is, if +mapping NFD(@var{s}) to either upper case or lower case or title case is not +a no-op. Set @code{*@var{resultp}} to false if NFD(@var{s}) maps to itself +under the upper case mapping, under the lower case mapping, and under the title +case mapping; in other words, when NFD(@var{s}) consists entirely of caseless +characters. Upon failure, returns -1 with @code{errno} set. +@end deftypefun -- cgit v1.2.1