summaryrefslogtreecommitdiff
path: root/doc/unistr.texi
diff options
context:
space:
mode:
Diffstat (limited to 'doc/unistr.texi')
-rw-r--r--doc/unistr.texi102
1 files changed, 93 insertions, 9 deletions
diff --git a/doc/unistr.texi b/doc/unistr.texi
index 60f1daa..da0f4da 100644
--- a/doc/unistr.texi
+++ b/doc/unistr.texi
@@ -35,31 +35,61 @@ The following functions perform conversions between the different forms of Unico
@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-8 string to an UTF-16 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-8 string to an UTF-32 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-16 string to an UTF-8 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-16 string to an UTF-32 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-32 string to an UTF-8 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
Converts an UTF-32 string to an UTF-16 string.
+
+The @var{resultbuf} and @var{lengthp} arguments are as described in
+chapter @ref{Conventions}.
@end deftypefun
@node Elementary string functions
@section Elementary string functions
+@menu
+* Iterating::
+* Creating Unicode strings::
+* Copying Unicode strings::
+* Comparing Unicode strings::
+* Searching for a character::
+* Counting characters::
+@end menu
+
+@node Iterating
+@subsection Iterating over a Unicode string
+
@cindex iterating
The following functions inspect and return details about the first character
in a Unicode string.
@@ -75,9 +105,9 @@ This function is similar to @posixfunc{mblen}, except that it operates on a
Unicode string and that @var{s} must not be NULL.
@end deftypefun
-@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
-@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
-@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
+@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
+@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
+@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
Returns the length (number of units) of the first character in @var{s},
putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure,
@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units
@@ -85,17 +115,21 @@ is returned.
The number of available units, @var{n}, must be > 0.
+This function fails if an invalid sequence of units is encountered at the
+beginning of @var{s}, or if additional units (after the @var{n} provided units)
+would be needed to form a character.
+
This function is similar to @posixfunc{mbtowc}, except that it operates on a
Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0,
and the NUL character is not treated specially.
@end deftypefun
-@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
-@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
-@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
-This function is like @code{u8_mbtouc_unsafe}, except that it will detect an
-invalid UTF-8 character, even if the library is compiled without
-@option{--enable-safety}.
+@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
+@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
+@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
+This function is identical to @code{u8_mbtouc}/@code{u16_mbtouc}/@code{u32_mbtouc}.
+Earlier versions of this function performed fewer range-checks on the sequence
+of units.
@end deftypefun
@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
@@ -112,6 +146,9 @@ This function is similar to @code{u8_mbtouc}, except that the return value
gives more details about the failure, similar to @posixfunc{mbrtowc}.
@end deftypefun
+@node Creating Unicode strings
+@subsection Creating Unicode strings one character at a time
+
The following function stores a Unicode character as a Unicode string in
memory.
@@ -127,6 +164,9 @@ Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be
specified.
@end deftypefun
+@node Copying Unicode strings
+@subsection Copying Unicode strings
+
@cindex copying
The following functions copy Unicode strings in memory.
@@ -161,6 +201,9 @@ This function is similar to @posixfunc{memset}, except that it operates on
Unicode strings.
@end deftypefun
+@node Comparing Unicode strings
+@subsection Comparing Unicode strings
+
@cindex comparing
The following function compares two Unicode strings of the same length.
@@ -191,6 +234,9 @@ This function is similar to the gnulib function @func{memcmp2}, except that it
operates on Unicode strings.
@end deftypefun
+@node Searching for a character
+@subsection Searching for a character in a Unicode string
+
@cindex searching, for a character
The following function searches for a given Unicode character.
@@ -205,6 +251,9 @@ This function is similar to @posixfunc{memchr}, except that it operates on
Unicode strings.
@end deftypefun
+@node Counting characters
+@subsection Counting the characters in a Unicode string
+
@cindex counting
The following function counts the number of Unicode characters.
@@ -233,6 +282,20 @@ Makes a freshly allocated copy of @var{s}, of length @var{n}.
@node Elementary string functions on NUL terminated strings
@section Elementary string functions on NUL terminated strings
+@menu
+* Iterating over a NUL terminated Unicode string::
+* Length::
+* Copying a NUL terminated Unicode string::
+* Comparing NUL terminated Unicode strings::
+* Duplicating a NUL terminated Unicode string::
+* Searching for a character in a NUL terminated Unicode string::
+* Searching for a substring::
+* Tokenizing::
+@end menu
+
+@node Iterating over a NUL terminated Unicode string
+@subsection Iterating over a NUL terminated Unicode string
+
The following functions inspect and return details about the first character
in a Unicode string.
@@ -273,6 +336,9 @@ Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}.
Note that this function works only on well-formed Unicode strings.
@end deftypefun
+@node Length
+@subsection Length of a NUL terminated Unicode string
+
The following functions determine the length of a Unicode string.
@deftypefun size_t u8_strlen (const uint8_t *@var{s})
@@ -293,6 +359,9 @@ This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except
that it operates on Unicode strings.
@end deftypefun
+@node Copying a NUL terminated Unicode string
+@subsection Copying a NUL terminated Unicode string
+
@cindex copying
The following functions copy portions of Unicode strings in memory.
@@ -355,6 +424,9 @@ This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except
that it operates on Unicode strings.
@end deftypefun
+@node Comparing NUL terminated Unicode strings
+@subsection Comparing NUL terminated Unicode strings
+
@cindex comparing
The following functions compare two Unicode strings.
@@ -396,6 +468,9 @@ This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except
that it operates on Unicode strings.
@end deftypefun
+@node Duplicating a NUL terminated Unicode string
+@subsection Duplicating a NUL terminated Unicode string
+
@cindex duplicating
The following function allocates a duplicate of a Unicode string.
@@ -408,6 +483,9 @@ This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except
that it operates on Unicode strings.
@end deftypefun
+@node Searching for a character in a NUL terminated Unicode string
+@subsection Searching for a character in a NUL terminated Unicode string
+
@cindex searching, for a character
The following functions search for a given Unicode character.
@@ -461,6 +539,9 @@ This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except
that it operates on Unicode strings.
@end deftypefun
+@node Searching for a substring
+@subsection Searching for a substring in a NUL terminated Unicode string
+
@cindex searching, for a substring
The following functions search whether a given Unicode string is a substring
of another Unicode string.
@@ -486,6 +567,9 @@ Tests whether @var{str} starts with @var{prefix}.
Tests whether @var{str} ends with @var{suffix}.
@end deftypefun
+@node Tokenizing
+@subsection Tokenizing a NUL terminated Unicode string
+
The following function does one step in tokenizing a Unicode string.
@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr})