diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/encoding.pm | 10 | ||||
-rw-r--r-- | lib/encoding.t | 23 |
2 files changed, 26 insertions, 7 deletions
diff --git a/lib/encoding.pm b/lib/encoding.pm index 2f4b059839..6f5970f2ca 100644 --- a/lib/encoding.pm +++ b/lib/encoding.pm @@ -43,7 +43,7 @@ encoding - pragma to control the conversion of legacy data into Unicode print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; - # but pack/unpack C are not, in case you still + # but pack/unpack are not affected, in case you still # want back to your native encoding print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; @@ -58,13 +58,13 @@ The pragma is a per script, not a per block lexical. Only the last C<use encoding> matters, and it affects B<the whole script>. If no encoding is specified, the environment variable L<PERL_ENCODING> -is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. -If no encoding can be found, C<Unknown encoding '...'> error will be thrown. +is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. If no +encoding can be found, C<Unknown encoding '...'> error will be thrown. =head1 KNOWN PROBLEMS -The C<\x..> and C<\0...> in regular expressions are not affected by -this pragma. They very probably should. +Literals in regular expressions are not affected by this pragma. +They very probably should. =head1 SEE ALSO diff --git a/lib/encoding.t b/lib/encoding.t index 923baa7d85..03634410f4 100644 --- a/lib/encoding.t +++ b/lib/encoding.t @@ -1,4 +1,4 @@ -print "1..10\n"; +print "1..15\n"; use encoding "latin1"; # ignored (overwritten by the next line) use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) @@ -44,8 +44,27 @@ print "ok 8\n"; print "not " unless unpack("C", chr(0xdf)) == 0xce; print "ok 9\n"; +print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; +print "ok 10\n"; + +print "not " unless unpack("U", chr(0xdf)) == 0x3af; +print "ok 11\n"; + # charnames must still work use charnames ':full'; print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; -print "ok 10\n"; +print "ok 12\n"; + +# combine + +$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); + +print "not " unless ord($c) == 0x3af; +print "ok 13\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0xdf; +print "ok 14\n"; + +print "not " unless ord(substr($c, 2, 1)) == 0x3af; +print "ok 15\n"; |