summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorSimon Cozens <simon@netthink.co.uk>2001-01-16 13:42:30 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-16 15:42:04 +0000
commiteebe148573e5a07582cda46391148ca89c563ade (patch)
treed9e4d82d250996b9854a3b3c7c3a61cde15b9283 /utf8.c
parent77003bb1f5b79e478d4e39dbf22a7d33aacd2fd5 (diff)
downloadperl-eebe148573e5a07582cda46391148ca89c563ade.tar.gz
Re: API Cleanup
To: perl5-porters@perl.org Date: Tue, 16 Jan 2001 13:42:30 +0000 Message-ID: <20010116134230.A13420@pembro26.pmb.ox.ac.uk> Subject: [PATCH] utf8.c documentation Date: Tue, 16 Jan 2001 13:52:48 +0000 Message-ID: <20010116135248.A13496@pembro26.pmb.ox.ac.uk> Subject: Re: API Cleanup From: Simon Cozens <simon@cozens.net> Date: Tue, 16 Jan 2001 14:58:55 +0000 Message-ID: <20010116145855.A13794@pembro26.pmb.ox.ac.uk> UTF-8 doc patches. p4raw-id: //depot/perl@8452
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c47
1 files changed, 35 insertions, 12 deletions
diff --git a/utf8.c b/utf8.c
index 65f1096183..156e63f717 100644
--- a/utf8.c
+++ b/utf8.c
@@ -26,8 +26,25 @@
/* Unicode support */
+/*
+=for apidoc A|U8*|uv_to_utf8|U8 *d|UV uv
+
+Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uv_to_utf8(d, uv);
+
+is the recommended Unicode-aware way of saying
+
+ *(d++) = uv;
+
+=cut
+*/
+
U8 *
-Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
+Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
{
if (uv < 0x80) {
*d++ = uv;
@@ -101,9 +118,15 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
#endif
}
-/* Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
- * The actual number of bytes in the UTF-8 character will be returned if it
- * is valid, otherwise 0. */
+/*
+=for apidoc A|STRLEN|is_utf8_char|U8 *s
+
+Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
+The actual number of bytes in the UTF-8 character will be returned if it
+is valid, otherwise 0.
+
+=cut
+*/
STRLEN
Perl_is_utf8_char(pTHX_ U8 *s)
{
@@ -143,7 +166,7 @@ Perl_is_utf8_char(pTHX_ U8 *s)
}
/*
-=for apidoc Am|is_utf8_string|U8 *s|STRLEN len
+=for apidoc A|bool|is_utf8_string|U8 *s|STRLEN len
Returns true if first C<len> bytes of the given string form valid a UTF8
string, false otherwise.
@@ -175,7 +198,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
}
/*
-=for apidoc Am|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags
+=for apidoc A|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding and no longer than C<curlen>;
@@ -390,7 +413,7 @@ malformed:
}
/*
-=for apidoc Am|U8* s|utf8_to_uv_simple|STRLEN *retlen
+=for apidoc A|U8* s|utf8_to_uv_simple|STRLEN *retlen
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
@@ -409,7 +432,7 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
}
/*
-=for apidoc Am|STRLEN|utf8_length|U8* s|U8 *e
+=for apidoc A|STRLEN|utf8_length|U8* s|U8 *e
Return the length of the UTF-8 char encoded string C<s> in characters.
Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end
@@ -442,7 +465,7 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
}
/*
-=for apidoc Am|IV|utf8_distance|U8 *a|U8 *b
+=for apidoc A|IV|utf8_distance|U8 *a|U8 *b
Returns the number of UTF8 characters between the UTF-8 pointers C<a>
and C<b>.
@@ -486,7 +509,7 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
}
/*
-=for apidoc Am|U8*|utf8_hop|U8 *s|I32 off
+=for apidoc A|U8*|utf8_hop|U8 *s|I32 off
Return the UTF-8 pointer C<s> displaced by C<off> characters, either
forward or backward.
@@ -519,7 +542,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off)
}
/*
-=for apidoc Am|U8 *|utf8_to_bytes|U8 *s|STRLEN *len
+=for apidoc A|U8 *|utf8_to_bytes|U8 *s|STRLEN *len
Converts a string C<s> of length C<len> from UTF8 into byte encoding.
Unlike C<bytes_to_utf8>, this over-writes the original string, and
@@ -560,7 +583,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
}
/*
-=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len
+=for apidoc A|U8 *|bytes_to_utf8|U8 *s|STRLEN *len
Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
Returns a pointer to the newly-created string, and sets C<len> to