summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-02 15:19:35 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-02 15:19:35 +0000
commit3de8ed06f96286478ecf8f3810596152fa21b27b (patch)
treea4869ab5d3cd7affdcf1aa319fc107a621e8a4fb
parent8be1be9027535daad1bce57b01a65406b28b3a19 (diff)
downloadperl-3de8ed06f96286478ecf8f3810596152fa21b27b.tar.gz
More encoding testing.
p4raw-id: //depot/perl@12813
-rw-r--r--lib/encoding.pm10
-rw-r--r--lib/encoding.t23
2 files changed, 26 insertions, 7 deletions
diff --git a/lib/encoding.pm b/lib/encoding.pm
index 2f4b059839..6f5970f2ca 100644
--- a/lib/encoding.pm
+++ b/lib/encoding.pm
@@ -43,7 +43,7 @@ encoding - pragma to control the conversion of legacy data into Unicode
print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
- # but pack/unpack C are not, in case you still
+ # but pack/unpack are not affected, in case you still
# want back to your native encoding
print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
@@ -58,13 +58,13 @@ The pragma is a per script, not a per block lexical. Only the last
C<use encoding> matters, and it affects B<the whole script>.
If no encoding is specified, the environment variable L<PERL_ENCODING>
-is consulted. If that fails, "latin1" (ISO 8859-1) is assumed.
-If no encoding can be found, C<Unknown encoding '...'> error will be thrown.
+is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. If no
+encoding can be found, C<Unknown encoding '...'> error will be thrown.
=head1 KNOWN PROBLEMS
-The C<\x..> and C<\0...> in regular expressions are not affected by
-this pragma. They very probably should.
+Literals in regular expressions are not affected by this pragma.
+They very probably should.
=head1 SEE ALSO
diff --git a/lib/encoding.t b/lib/encoding.t
index 923baa7d85..03634410f4 100644
--- a/lib/encoding.t
+++ b/lib/encoding.t
@@ -1,4 +1,4 @@
-print "1..10\n";
+print "1..15\n";
use encoding "latin1"; # ignored (overwritten by the next line)
use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
@@ -44,8 +44,27 @@ print "ok 8\n";
print "not " unless unpack("C", chr(0xdf)) == 0xce;
print "ok 9\n";
+print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
+print "ok 10\n";
+
+print "not " unless unpack("U", chr(0xdf)) == 0x3af;
+print "ok 11\n";
+
# charnames must still work
use charnames ':full';
print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
-print "ok 10\n";
+print "ok 12\n";
+
+# combine
+
+$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
+
+print "not " unless ord($c) == 0x3af;
+print "ok 13\n";
+
+print "not " unless ord(substr($c, 1, 1)) == 0xdf;
+print "ok 14\n";
+
+print "not " unless ord(substr($c, 2, 1)) == 0x3af;
+print "ok 15\n";