summaryrefslogtreecommitdiff
path: root/ext/Encode/Unicode
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2004-10-23 19:58:07 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2004-10-23 19:58:07 +0000
commitf9d05ba35dc7d01260b38a6dc93f199c3b1d2c39 (patch)
treef357401c49ca2fc654007f03333d3f758fc23e2b /ext/Encode/Unicode
parent3b2f3eeb7db34430d0f2a4bbc1b52e70a34987d0 (diff)
downloadperl-f9d05ba35dc7d01260b38a6dc93f199c3b1d2c39.tar.gz
Upgrade to Encode 2.07
p4raw-id: //depot/perl@23417
Diffstat (limited to 'ext/Encode/Unicode')
-rw-r--r--ext/Encode/Unicode/Unicode.pm20
1 files changed, 19 insertions, 1 deletions
diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm
index 8c661a457e..bd9c1885d4 100644
--- a/ext/Encode/Unicode/Unicode.pm
+++ b/ext/Encode/Unicode/Unicode.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
XSLoader::load(__PACKAGE__,$VERSION);
@@ -234,6 +234,24 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>.
(*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit
integer support!
+=head1 Error Checking
+
+Unlike most encodings which accept various ways to handle errors,
+Unicode encodings simply croaks.
+
+ % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \
+ -e 'Encode::from_to($_, "utf16","shift_jis", 0); print'
+ UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184.
+ % perl -MEncode -e '$a = "BOM missing"' \
+ -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print'
+ UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184.
+
+Unlike other encodings where mappings are not one-to-one against
+Unicode, UTFs are supposed to map 100% against one another. So Encode
+is more strict on UTFs.
+
+Consider that "division by zero" of Encode :)
+
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,