From b1a770688af71aab31f77b935b0a5019726caa75 Mon Sep 17 00:00:00 2001
From: Gisle Aas <gisle@aas.no>
Date: Sat, 7 Nov 2009 18:44:16 +0100
Subject: Make as_iri leave escapes not forming valid UTF-8 sequences

---
 URI.pm  | 18 +++++++++++++++++-
 t/iri.t |  5 ++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/URI.pm b/URI.pm
index 200104e..9eb5395 100644
--- a/URI.pm
+++ b/URI.pm
@@ -270,8 +270,24 @@ sub as_iri
 	}
     }
     if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) {
+	# All this crap because the more obvious:
+	#
+	#   Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
+	#
+	# doesn't work.  Apparently passing a sub as CHECK only works
+	# for 'ascii' and similar direct encodings.
+
 	require Encode;
-	return Encode::decode("UTF-8", $str);
+	my $enc = Encode::find_encoding("UTF-8");
+	my $u = "";
+	while (length $str) {
+	    $u .= $enc->decode($str, Encode::FB_QUIET());
+	    if (length $str) {
+		# escape next char
+		$u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
+	    }
+	}
+	$str = $u;
     }
     return $str;
 }
diff --git a/t/iri.t b/t/iri.t
index a42317e..876b4a4 100644
--- a/t/iri.t
+++ b/t/iri.t
@@ -2,7 +2,7 @@
 
 use utf8;
 use strict;
-use Test::More tests => 10;
+use Test::More tests => 11;
 
 use URI;
 
@@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher");
 is $u, "http://example.com/B%C3%BCcher";
 is $u->as_iri, "http://example.com/Bücher";
 
+$u = URI->new("http://example.com/B%FCcher");  # latin1 encoded stuff
+is $u->as_iri, "http://example.com/B%FCcher";  # ...should not be decoded
+
 $u = URI->new("http://➡.ws/");
 is $u, "http://xn--hgi.ws/";
 is $u->host, "xn--hgi.ws";
-- 
cgit v1.2.1