Make as_iri leave escapes not forming valid UTF-8 sequencesiri

author: Gisle Aas <gisle@aas.no> 2009-11-07 18:44:16 +0100
committer: Gisle Aas <gisle@aas.no> 2009-11-07 18:44:16 +0100
commit: b1a770688af71aab31f77b935b0a5019726caa75 (patch)
tree: 9b993afd7d95d0c50fb5577279200e9515d21cd7
parent: 280962b2bcbe0e2bd71b4d90ba6777caa7178361 (diff)
download: uri-iri.tar.gz
2 files changed, 21 insertions, 2 deletions
diff --git a/URI.pm b/URI.pm
index 200104e..9eb5395 100644
--- a/URI.pm
+++ b/URI.pm
@@ -270,8 +270,24 @@ sub as_iri
 	}
     }
     if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) {
+	# All this crap because the more obvious:
+	#
+	#   Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
+	#
+	# doesn't work.  Apparently passing a sub as CHECK only works
+	# for 'ascii' and similar direct encodings.
+
 	require Encode;
-	return Encode::decode("UTF-8", $str);
+	my $enc = Encode::find_encoding("UTF-8");
+	my $u = "";
+	while (length $str) {
+	    $u .= $enc->decode($str, Encode::FB_QUIET());
+	    if (length $str) {
+		# escape next char
+		$u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
+	    }
+	}
+	$str = $u;
     }
     return $str;
 }
diff --git a/t/iri.t b/t/iri.t
index a42317e..876b4a4 100644
--- a/t/iri.t
+++ b/t/iri.t
@@ -2,7 +2,7 @@
 
 use utf8;
 use strict;
-use Test::More tests => 10;
+use Test::More tests => 11;
 
 use URI;
 
@@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher");
 is $u, "http://example.com/B%C3%BCcher";
 is $u->as_iri, "http://example.com/Bücher";
 
+$u = URI->new("http://example.com/B%FCcher");  # latin1 encoded stuff
+is $u->as_iri, "http://example.com/B%FCcher";  # ...should not be decoded
+
 $u = URI->new("http://➡.ws/");
 is $u, "http://xn--hgi.ws/";
 is $u->host, "xn--hgi.ws";
author	Gisle Aas <gisle@aas.no>	2009-11-07 18:44:16 +0100
committer	Gisle Aas <gisle@aas.no>	2009-11-07 18:44:16 +0100
commit	b1a770688af71aab31f77b935b0a5019726caa75 (patch)
tree	9b993afd7d95d0c50fb5577279200e9515d21cd7
parent	280962b2bcbe0e2bd71b4d90ba6777caa7178361 (diff)
download	uri-iri.tar.gz