diff options
author | Gisle Aas <gisle@aas.no> | 2009-11-07 18:44:16 +0100 |
---|---|---|
committer | Gisle Aas <gisle@aas.no> | 2009-11-07 18:44:16 +0100 |
commit | b1a770688af71aab31f77b935b0a5019726caa75 (patch) | |
tree | 9b993afd7d95d0c50fb5577279200e9515d21cd7 | |
parent | 280962b2bcbe0e2bd71b4d90ba6777caa7178361 (diff) | |
download | uri-iri.tar.gz |
Make as_iri leave escapes not forming valid UTF-8 sequencesiri
-rw-r--r-- | URI.pm | 18 | ||||
-rw-r--r-- | t/iri.t | 5 |
2 files changed, 21 insertions, 2 deletions
@@ -270,8 +270,24 @@ sub as_iri } } if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) { + # All this crap because the more obvious: + # + # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) + # + # doesn't work. Apparently passing a sub as CHECK only works + # for 'ascii' and similar direct encodings. + require Encode; - return Encode::decode("UTF-8", $str); + my $enc = Encode::find_encoding("UTF-8"); + my $u = ""; + while (length $str) { + $u .= $enc->decode($str, Encode::FB_QUIET()); + if (length $str) { + # escape next char + $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); + } + } + $str = $u; } return $str; } @@ -2,7 +2,7 @@ use utf8; use strict; -use Test::More tests => 10; +use Test::More tests => 11; use URI; @@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher"); is $u, "http://example.com/B%C3%BCcher"; is $u->as_iri, "http://example.com/Bücher"; +$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff +is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded + $u = URI->new("http://➡.ws/"); is $u, "http://xn--hgi.ws/"; is $u->host, "xn--hgi.ws"; |