diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-02-15 12:27:23 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-15 18:02:35 -0700 |
commit | 20adcf7c80fc3ad8cffa7d52b95a40d87bd781d0 (patch) | |
tree | 42b6ebc3be70e41366f78282be3e658182a06468 | |
parent | 29050de536086566fb4d4a9db818b8068dd118a0 (diff) | |
download | perl-20adcf7c80fc3ad8cffa7d52b95a40d87bd781d0.tar.gz |
pp_quote_meta: in locale, quote all UTF-8 Latin1 non-ASCII
Under locale rules, this commit quotes all non-ASCII Latin1 characters
in UTF-8 encoded strings. This provides consistency with this function
and other functions, such as lc().
-rw-r--r-- | pod/perlfunc.pod | 6 | ||||
-rw-r--r-- | pp.c | 6 | ||||
-rw-r--r-- | t/op/quotemeta.t | 48 |
3 files changed, 58 insertions, 2 deletions
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 7cec3bbcc2..8d97258a8d 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -4976,6 +4976,12 @@ compatibility for old programs which do not use Unicode. (Note that C<unicode_strings> is automatically enabled within the scope of a S<C<use v5.12>> or greater.) +Within the scope of C<use locale>, all non-ASCII Latin1 code points +are quoted whether the string is encoded as UTF-8 or not. As mentioned +above, locale does not affect the quoting of ASCII-range characters. +This protects against those locales where characters such as C<"|"> are +considered to be word characters. + Otherwise, Perl quotes non-ASCII characters using an adaptation from Unicode (see L<http://www.unicode.org/reports/tr31/>.) The only code points that are quoted are those that have any of the @@ -4097,7 +4097,11 @@ PP(pp_quotemeta) } } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - if (_isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)))) + + /* In locale, we quote all non-ASCII Latin1 chars. + * Otherwise use the quoting rules */ + if (IN_LOCALE_RUNTIME + || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)))) { to_quote = TRUE; } diff --git a/t/op/quotemeta.t b/t/op/quotemeta.t index 9cec0bdd89..d62563c309 100644 --- a/t/op/quotemeta.t +++ b/t/op/quotemeta.t @@ -7,7 +7,7 @@ BEGIN { require "test.pl"; } -plan tests => 40; +plan tests => 60; if ($Config{ebcdic} eq 'define') { $_ = join "", map chr($_), 129..233; @@ -75,6 +75,25 @@ is(length(quotemeta($char)), 1, "quotemeta '\\N{U+D8}' in UTF-8 length"); is(length(quotemeta("\x{d7}")), 2, "quotemeta Latin1 no unicode_strings quoted length"); is(quotemeta("\x{d8}"), "\\\x{d8}", "quotemeta Latin1 no unicode_strings quoted"); is(length(quotemeta("\x{d8}")), 2, "quotemeta Latin1 no unicode_strings quoted length"); + + use locale; + + my $char = ":"; + is(quotemeta($char), "\\$char", "quotemeta '$char' locale"); + is(length(quotemeta($char)), 2, "quotemeta '$char' locale"); + + $char = "M"; + utf8::upgrade($char); + is(quotemeta($char), "$char", "quotemeta '$char' locale"); + is(length(quotemeta($char)), 1, "quotemeta '$char' locale"); + + my $char = "\x{D7}"; + is(quotemeta($char), "\\$char", "quotemeta '\\x{D7}' locale"); + is(length(quotemeta($char)), 2, "quotemeta '\\x{D7}' locale length"); + + $char = "\x{D8}"; # Every non-ASCII Latin1 is quoted in locale. + is(quotemeta($char), "\\$char", "quotemeta '\\x{D8}' locale"); + is(length(quotemeta($char)), 2, "quotemeta '\\x{D8}' locale length"); } { use feature 'unicode_strings'; @@ -82,6 +101,33 @@ is(length(quotemeta($char)), 1, "quotemeta '\\N{U+D8}' in UTF-8 length"); is(length(quotemeta("\x{d7}")), 2, "quotemeta Latin1 unicode_strings quoted length"); is(quotemeta("\x{d8}"), "\x{d8}", "quotemeta Latin1 unicode_strings nonquoted"); is(length(quotemeta("\x{d8}")), 1, "quotemeta Latin1 unicode_strings nonquoted length"); + + use locale; + + my $char = ":"; + utf8::upgrade($char); + is(quotemeta($char), "\\$char", "quotemeta '$char' locale in UTF-8"); + is(length(quotemeta($char)), 2, "quotemeta '$char' locale in UTF-8 length"); + + $char = "M"; + utf8::upgrade($char); + is(quotemeta($char), "$char", "quotemeta '$char' locale in UTF-8"); + is(length(quotemeta($char)), 1, "quotemeta '$char' locale in UTF-8 length"); + + my $char = "\N{U+D7}"; + utf8::upgrade($char); + is(quotemeta($char), "\\$char", "quotemeta '\\N{U+D7}' locale in UTF-8"); + is(length(quotemeta($char)), 2, "quotemeta '\\N{U+D7}' locale in UTF-8 length"); + + $char = "\N{U+D8}"; # Every non-ASCII Latin1 is quoted in locale. + utf8::upgrade($char); + is(quotemeta($char), "\\$char", "quotemeta '\\N{U+D8}' locale in UTF-8"); + is(length(quotemeta($char)), 2, "quotemeta '\\N{U+D8}' locale in UTF-8 length"); + + is(quotemeta("\x{263a}"), "\\\x{263a}", "quotemeta locale Unicode quoted"); + is(length(quotemeta("\x{263a}")), 2, "quotemeta locale Unicode quoted length"); + is(quotemeta("\x{100}"), "\x{100}", "quotemeta locale Unicode nonquoted"); + is(length(quotemeta("\x{100}")), 1, "quotemeta locale Unicode nonquoted length"); } $a = "foo|bar"; |