summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGisle Aas <gisle@aas.no>2009-11-07 18:44:16 +0100
committerGisle Aas <gisle@aas.no>2009-11-07 18:44:16 +0100
commitb1a770688af71aab31f77b935b0a5019726caa75 (patch)
tree9b993afd7d95d0c50fb5577279200e9515d21cd7
parent280962b2bcbe0e2bd71b4d90ba6777caa7178361 (diff)
downloaduri-iri.tar.gz
Make as_iri leave escapes not forming valid UTF-8 sequencesiri
-rw-r--r--URI.pm18
-rw-r--r--t/iri.t5
2 files changed, 21 insertions, 2 deletions
diff --git a/URI.pm b/URI.pm
index 200104e..9eb5395 100644
--- a/URI.pm
+++ b/URI.pm
@@ -270,8 +270,24 @@ sub as_iri
}
}
if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) {
+ # All this crap because the more obvious:
+ #
+ # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
+ #
+ # doesn't work. Apparently passing a sub as CHECK only works
+ # for 'ascii' and similar direct encodings.
+
require Encode;
- return Encode::decode("UTF-8", $str);
+ my $enc = Encode::find_encoding("UTF-8");
+ my $u = "";
+ while (length $str) {
+ $u .= $enc->decode($str, Encode::FB_QUIET());
+ if (length $str) {
+ # escape next char
+ $u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
+ }
+ }
+ $str = $u;
}
return $str;
}
diff --git a/t/iri.t b/t/iri.t
index a42317e..876b4a4 100644
--- a/t/iri.t
+++ b/t/iri.t
@@ -2,7 +2,7 @@
use utf8;
use strict;
-use Test::More tests => 10;
+use Test::More tests => 11;
use URI;
@@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher");
is $u, "http://example.com/B%C3%BCcher";
is $u->as_iri, "http://example.com/Bücher";
+$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff
+is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded
+
$u = URI->new("http://➡.ws/");
is $u, "http://xn--hgi.ws/";
is $u->host, "xn--hgi.ws";