summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-11-26 19:01:05 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-11-26 19:01:05 +0000
commit02eb7b47b8a6793752e5b001af6e62c374b2c440 (patch)
tree07c212ac1a760e5468e7b769b861a1fe00d96718 /utf8.c
parent21477fb41342ef2f0f21af5ef95caf64eee65dee (diff)
downloadperl-02eb7b47b8a6793752e5b001af6e62c374b2c440.tar.gz
Make utf8_length() and utf8_distance() (the latter of which
is unused at the moment) to be less forgiving about bad UTF-8. p4raw-id: //depot/perl@7869
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c28
1 files changed, 19 insertions, 9 deletions
diff --git a/utf8.c b/utf8.c
index fc625dc464..d25b43bbe7 100644
--- a/utf8.c
+++ b/utf8.c
@@ -357,8 +357,8 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
=for apidoc|utf8_length|U8 *s|U8 *e
Return the length of the UTF-8 char encoded string C<s> in characters.
-Stops at string C<e>. If C<e E<lt> s> or if the scan would end up
-past C<e>, return -1.
+Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end
+up past C<e>, croaks.
=cut
*/
@@ -369,12 +369,12 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
STRLEN len = 0;
if (e < s)
- return -1;
+ Perl_croak(aTHX_ "panic: utf8_length: unexpected end");
while (s < e) {
- STRLEN t = UTF8SKIP(s);
+ U8 t = UTF8SKIP(s);
if (e - s < t)
- return -1;
+ Perl_croak(aTHX_ "panic: utf8_length: unaligned end");
s += t;
len++;
}
@@ -385,22 +385,32 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */
-I32
+IV
Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
{
- I32 off = 0;
+ IV off = 0;
+
if (a < b) {
while (a < b) {
- a += UTF8SKIP(a);
+ U8 c = UTF8SKIP(a);
+
+ if (b - a < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ a += c;
off--;
}
}
else {
while (b < a) {
- b += UTF8SKIP(b);
+ U8 c = UTF8SKIP(b);
+
+ if (a - b < c)
+ Perl_croak(aTHX_ "panic: utf8_distance: unaligned end");
+ b += c;
off++;
}
}
+
return off;
}