diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2004-10-23 19:58:07 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2004-10-23 19:58:07 +0000 |
commit | f9d05ba35dc7d01260b38a6dc93f199c3b1d2c39 (patch) | |
tree | f357401c49ca2fc654007f03333d3f758fc23e2b /ext/Encode/Unicode | |
parent | 3b2f3eeb7db34430d0f2a4bbc1b52e70a34987d0 (diff) | |
download | perl-f9d05ba35dc7d01260b38a6dc93f199c3b1d2c39.tar.gz |
Upgrade to Encode 2.07
p4raw-id: //depot/perl@23417
Diffstat (limited to 'ext/Encode/Unicode')
-rw-r--r-- | ext/Encode/Unicode/Unicode.pm | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index 8c661a457e..bd9c1885d4 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load(__PACKAGE__,$VERSION); @@ -234,6 +234,24 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>. (*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit integer support! +=head1 Error Checking + +Unlike most encodings which accept various ways to handle errors, +Unicode encodings simply croaks. + + % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \ + -e 'Encode::from_to($_, "utf16","shift_jis", 0); print' + UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184. + % perl -MEncode -e '$a = "BOM missing"' \ + -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print' + UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184. + +Unlike other encodings where mappings are not one-to-one against +Unicode, UTFs are supposed to map 100% against one another. So Encode +is more strict on UTFs. + +Consider that "division by zero" of Encode :) + =head1 SEE ALSO L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>, |