summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/Data-Dumper/Dumper.pm2
-rw-r--r--dist/Data-Dumper/Dumper.xs23
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--ext/POSIX/lib/POSIX.pod2
-rw-r--r--lib/locale.pm84
-rw-r--r--locale.c21
-rw-r--r--numeric.c4
-rw-r--r--op.c10
-rw-r--r--perl.h27
-rw-r--r--pod/perldelta.pod6
-rw-r--r--pod/perlembed.pod2
-rw-r--r--pod/perlfunc.pod13
-rw-r--r--pod/perllocale.pod153
-rw-r--r--pp.c36
-rw-r--r--pp_sort.c2
-rw-r--r--pp_sys.c2
-rw-r--r--proto.h1
-rw-r--r--t/run/locale.t49
-rw-r--r--utf8.h7
20 files changed, 318 insertions, 128 deletions
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 6f7a0d8226..9afeac77b7 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -1398,7 +1398,7 @@ modify it under the same terms as Perl itself.
=head1 VERSION
-Version 2.152 (March 7 2014)
+Version 2.153 (June 5 2014)
=head1 SEE ALSO
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index e98c6d70d0..03515aec53 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -838,15 +838,24 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
av_push(keys, sv);
}
# ifdef USE_LOCALE_NUMERIC
- sortsv(AvARRAY(keys),
- av_len(keys)+1,
- IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
-# else
- sortsv(AvARRAY(keys),
- av_len(keys)+1,
- Perl_sv_cmp);
+# ifdef IN_LC /* Use this if available */
+ if (IN_LC(LC_COLLATE))
+# else
+ if (IN_LOCALE)
+# endif
+ {
+ sortsv(AvARRAY(keys),
+ av_len(keys)+1,
+ Perl_sv_cmp_locale);
+ }
+ else
# endif
#endif
+ {
+ sortsv(AvARRAY(keys),
+ av_len(keys)+1,
+ Perl_sv_cmp);
+ }
}
if (sortkeys != &PL_sv_yes) {
dSP; ENTER; SAVETMPS; PUSHMARK(sp);
diff --git a/embed.fnc b/embed.fnc
index 5284cc9efe..5d98ec8d6a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1102,6 +1102,7 @@ ApOM |void |new_numeric |NULLOK const char* newcoll
Ap |void |set_numeric_local
Ap |void |set_numeric_radix
Ap |void |set_numeric_standard
+ApM |bool |_is_in_locale_category|const bool compiling|const int category
ApdO |void |require_pv |NN const char* pv
Apd |void |pack_cat |NN SV *cat|NN const char *pat|NN const char *patend \
|NN SV **beglist|NN SV **endlist|NN SV ***next_in_list|U32 flags
diff --git a/embed.h b/embed.h
index 9389c3da4e..06470adf5e 100644
--- a/embed.h
+++ b/embed.h
@@ -27,6 +27,7 @@
/* Hide global symbols */
#define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b)
+#define _is_in_locale_category(a,b) Perl__is_in_locale_category(aTHX_ a,b)
#define _is_uni_FOO(a,b) Perl__is_uni_FOO(aTHX_ a,b)
#define _is_uni_perl_idcont(a) Perl__is_uni_perl_idcont(aTHX_ a)
#define _is_uni_perl_idstart(a) Perl__is_uni_perl_idstart(aTHX_ a)
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index 59cbd22fc2..61b0f71c45 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -1186,7 +1186,7 @@ L<a section devoted to this function|perllocale/The setlocale function>.
The discussion here is merely a summary reference for C<setlocale()>.
Note that Perl itself is almost entirely unaffected by the locale
except within the scope of S<C<"use locale">>. (Exceptions are listed
-in L<perllocale/Not within the scope of any "use locale" variant>.)
+in L<perllocale/Not within the scope of "use locale">.)
The following examples assume
diff --git a/lib/locale.pm b/lib/locale.pm
index f7575f5007..52279a1693 100644
--- a/lib/locale.pm
+++ b/lib/locale.pm
@@ -1,6 +1,6 @@
package locale;
-our $VERSION = '1.03';
+our $VERSION = '1.04';
use Config;
$Carp::Internal{ (__PACKAGE__) } = 1;
@@ -35,40 +35,84 @@ to behave as if in the "C" locale; attempts to change the locale will fail.
=cut
-# A separate bit is used for each of the two forms of the pragma, as they are
-# mostly independent, and interact with each other and the unicode_strings
-# feature. This allows for fast determination of which one(s) of the three
-# are to be used at any given point, and no code has to be written to deal
-# with coming in and out of scopes--it falls automatically out from the hint
-# handling
+# A separate bit is used for each of the two forms of the pragma, to save
+# having to look at %^H for the normal case of a plain 'use locale' without an
+# argument.
$locale::hint_bits = 0x4;
-$locale::not_chars_hint_bits = 0x10;
+$locale::partial_hint_bits = 0x10; # If pragma has an argument
+
+# The pseudo-category :characters consists of 2 real ones; but it also is
+# given its own number, -1, because in the complement form it also has the
+# side effect of "use feature 'unicode_strings'"
sub import {
shift; # should be 'locale'; not checked
- my $found_not_chars = 0;
- while (defined (my $arg = shift)) {
- if ($arg eq ":not_characters") {
- $^H |= $locale::not_chars_hint_bits;
+ $^H{locale} = 0 unless defined $^H{locale};
+ if (! @_) { # If no parameter, use the plain form that changes all categories
+ $^H |= $locale::hint_bits;
+
+ }
+ else {
+ my @categories = ( qw(:ctype :collate :messages
+ :numeric :monetary :time) );
+ for (my $i = 0; $i < @_; $i++) {
+ my $arg = $_[$i];
+ $complement = $arg =~ s/ : ( ! | not_ ) /:/x;
+ if (! grep { $arg eq $_ } @categories, ":characters") {
+ require Carp;
+ Carp::croak("Unknown parameter '$_[$i]' to 'use locale'");
+ }
+
+ if ($complement) {
+ if ($i != 0 || $i < @_ - 1) {
+ require Carp;
+ Carp::croak("Only one argument to 'use locale' allowed"
+ . "if is $complement");
+ }
+
+ if ($arg eq ':characters') {
+ push @_, grep { $_ ne ':ctype' && $_ ne ':collate' }
+ @categories;
+ # We add 1 to the category number; This category number
+ # is -1
+ $^H{locale} |= (1 << 0);
+ }
+ else {
+ push @_, grep { $_ ne $arg } @categories;
+ }
+ next;
+ }
+ elsif ($arg eq ':characters') {
+ push @_, ':ctype', ':collate';
+ next;
+ }
+
+ $^H |= $locale::partial_hint_bits;
# This form of the pragma overrides the other
$^H &= ~$locale::hint_bits;
- $found_not_chars = 1;
- }
- else {
- require Carp;
- Carp::croak("Unknown parameter '$arg' to 'use locale'");
+
+ $arg =~ s/^://;
+
+ # Map our names to the ones defined by POSIX
+ $arg = "LC_" . uc($arg);
+ use POSIX 'locale_h';
+ my $bit = eval "&POSIX::$arg";
+ if (defined $bit) {
+ # 1 is added so that the pseudo-category :characters, which is
+ # -1, comes out 0.
+ $^H{locale} |= 1 << ($bit + 1);
+ }
}
}
- # Use the plain form if not doing the :not_characters one.
- $^H |= $locale::hint_bits unless $found_not_chars;
}
sub unimport {
- $^H &= ~($locale::hint_bits|$locale::not_chars_hint_bits);
+ $^H &= ~($locale::hint_bits|$locale::partial_hint_bits);
+ $^H{locale} = 0;
}
1;
diff --git a/locale.c b/locale.c
index 929a249a81..adaa294952 100644
--- a/locale.c
+++ b/locale.c
@@ -1385,6 +1385,27 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
#endif
+
+bool
+Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
+{
+ /* Internal function which returns if we are in the scope of a pragma that
+ * enables the locale category 'category'. 'compiling' should indicate if
+ * this is during the compilation phase (TRUE) or not (FALSE). */
+
+ const COP * const cop = (compiling) ? &PL_compiling : PL_curcop;
+
+ SV *categories = cop_hints_fetch_pvs(cop, "locale", 0);
+ if (! categories || categories == &PL_sv_placeholder) {
+ return FALSE;
+ }
+
+ /* The pseudo-category 'not_characters' is -1, so just add 1 to each to get
+ * a valid unsigned */
+ assert(category >= -1);
+ return cBOOL(SvUV(categories) & (1U << (category + 1)));
+}
+
/*
* Local variables:
* c-indentation-style: bsd
diff --git a/numeric.c b/numeric.c
index 7e95b4673b..3383cfa190 100644
--- a/numeric.c
+++ b/numeric.c
@@ -528,7 +528,7 @@ Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
- if (IN_SOME_LOCALE_FORM) {
+ if (IN_LC(LC_NUMERIC)) {
DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
if (PL_numeric_radix_sv) {
STRLEN len;
@@ -860,7 +860,7 @@ Perl_my_atof(pTHX_ const char* s)
{
DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
+ if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
const char *standard = NULL, *local = NULL;
bool use_standard_radix;
diff --git a/op.c b/op.c
index e5122f4721..6ada0385ca 100644
--- a/op.c
+++ b/op.c
@@ -3523,14 +3523,20 @@ S_fold_constants(pTHX_ OP *o)
case OP_UC:
case OP_LC:
case OP_FC:
+ if (IN_LC_COMPILETIME(LC_CTYPE))
+ goto nope;
+ break;
case OP_SLT:
case OP_SGT:
case OP_SLE:
case OP_SGE:
case OP_SCMP:
+ if (IN_LC_COMPILETIME(LC_COLLATE))
+ goto nope;
+ break;
case OP_SPRINTF:
/* XXX what about the numeric ops? */
- if (IN_LOCALE_COMPILETIME)
+ if (IN_LC_COMPILETIME(LC_NUMERIC))
goto nope;
break;
case OP_PACK:
@@ -4731,7 +4737,7 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags)
if (PL_hints & HINT_RE_TAINT)
pmop->op_pmflags |= PMf_RETAINT;
- if (IN_LOCALE_COMPILETIME) {
+ if (IN_LC_COMPILETIME(LC_CTYPE)) {
set_regex_charset(&(pmop->op_pmflags), REGEX_LOCALE_CHARSET);
}
else if (IN_UNI_8_BIT) {
diff --git a/perl.h b/perl.h
index 3ee2cd49af..eaf59115ce 100644
--- a/perl.h
+++ b/perl.h
@@ -4794,12 +4794,18 @@ typedef enum {
However, bitops store HINT_INTEGER in their op_private.
NOTE: The typical module using these has the bit value hard-coded, so don't
- blindly change the values of these */
+ blindly change the values of these.
+
+ If we run out of bits, the 2 locale ones could be combined. The PARTIAL one
+ is for "use locale 'FOO'" which excludes some categories. It requires going
+ to %^H to find out which are in and which are out. This could be extended
+ for the normal case of a plain HINT_LOCALE, so that %^H would be used for
+ any locale form. */
#define HINT_INTEGER 0x00000001 /* integer pragma */
#define HINT_STRICT_REFS 0x00000002 /* strict pragma */
#define HINT_LOCALE 0x00000004 /* locale pragma */
#define HINT_BYTES 0x00000008 /* bytes pragma */
-#define HINT_LOCALE_NOT_CHARS 0x00000010 /* locale ':not_characters' pragma */
+#define HINT_LOCALE_PARTIAL 0x00000010 /* locale, but a subset of categories */
#define HINT_EXPLICIT_STRICT_REFS 0x00000020 /* strict.pm */
#define HINT_EXPLICIT_STRICT_SUBS 0x00000040 /* strict.pm */
@@ -5303,17 +5309,18 @@ typedef struct am_table_short AMTS;
#define PERLDB_SAVESRC_NOSUBS (PL_perldb && (PL_perldb & PERLDBf_SAVESRC_NOSUBS))
#define PERLDB_SAVESRC_INVALID (PL_perldb && (PL_perldb & PERLDBf_SAVESRC_INVALID))
+/* These locale things are all subject to change */
/* Returns TRUE if the plain locale pragma without a parameter is in effect
*/
#define IN_LOCALE_RUNTIME cBOOL(CopHINTS_get(PL_curcop) & HINT_LOCALE)
/* Returns TRUE if either form of the locale pragma is in effect */
#define IN_SOME_LOCALE_FORM_RUNTIME \
- cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_NOT_CHARS))
+ cBOOL(CopHINTS_get(PL_curcop) & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
#define IN_LOCALE_COMPILETIME cBOOL(PL_hints & HINT_LOCALE)
#define IN_SOME_LOCALE_FORM_COMPILETIME \
- cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_NOT_CHARS))
+ cBOOL(PL_hints & (HINT_LOCALE|HINT_LOCALE_PARTIAL))
#define IN_LOCALE \
(IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
@@ -5321,6 +5328,16 @@ typedef struct am_table_short AMTS;
(IN_PERL_COMPILETIME ? IN_SOME_LOCALE_FORM_COMPILETIME \
: IN_SOME_LOCALE_FORM_RUNTIME)
+#define IN_LC_ALL_COMPILETIME IN_LOCALE_COMPILETIME
+#define IN_LC_ALL_RUNTIME IN_LOCALE_RUNTIME
+
+#define IN_LC_PARTIAL_COMPILETIME cBOOL(PL_hints & HINT_LOCALE_PARTIAL)
+#define IN_LC_PARTIAL_RUNTIME cBOOL(CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL)
+
+#define IN_LC_COMPILETIME(category) (IN_LC_ALL_COMPILETIME || (IN_LC_PARTIAL_COMPILETIME && _is_in_locale_category(TRUE, (category))))
+#define IN_LC_RUNTIME(category) (IN_LC_ALL_RUNTIME || (IN_LC_PARTIAL_RUNTIME && _is_in_locale_category(FALSE, (category))))
+#define IN_LC(category) (IN_LC_COMPILETIME(category) || IN_LC_RUNTIME(category))
+
#ifdef USE_LOCALE_NUMERIC
/* These macros are for toggling between the underlying locale (LOCAL) and the
@@ -5345,7 +5362,7 @@ typedef struct am_table_short AMTS;
void (*_restore_LC_NUMERIC_function)(pTHX) = NULL;
#define STORE_LC_NUMERIC_SET_TO_NEEDED() \
- if (IN_SOME_LOCALE_FORM) { \
+ if (IN_LC(LC_NUMERIC)) { \
if (_NOT_IN_NUMERIC_LOCAL) { \
set_numeric_local(); \
_restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 21c59b626e..79de1c44db 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -41,6 +41,12 @@ U+2028 LINE SEPARATOR,
and
U+2029 PARAGRAPH SEPARATOR.
+=head2 S<C<use locale>> can restrict which locale categories are affected
+
+It is now possible to pass a parameter to S<C<use locale>> to specify
+a subset of locale categories to be locale-aware, with the remaining
+ones unaffected. See L<perllocale/The "use locale" pragma> for details.
+
=head1 Security
XXX Any security-related notices go here. In particular, any security
diff --git a/pod/perlembed.pod b/pod/perlembed.pod
index 4560ecc786..596f28781a 100644
--- a/pod/perlembed.pod
+++ b/pod/perlembed.pod
@@ -1089,7 +1089,7 @@ When a Perl interpreter normally starts up, it tells the system it wants
to use the system's default locale. This is often, but not necessarily,
the "C" or "POSIX" locale. Absent a S<C<"use locale">> within the perl
code, this mostly has no effect (but see L<perllocale/Not within the
-scope of any "use locale" variant>). Also, there is not a problem if the
+scope of "use locale">). Also, there is not a problem if the
locale you want to use in your embedded Perl is the same as the system
default. However, this doesn't work if you have set up and want to use
a locale that isn't the system default one. Starting in Perl v5.20, you
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index aff2cd5660..5b9c0058d6 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -3337,9 +3337,9 @@ What gets returned depends on several factors:
The results follow ASCII rules. Only the characters C<A-Z> change,
to C<a-z> respectively.
-=item Otherwise, if C<use locale> (but not C<use locale ':not_characters'>) is in effect:
+=item Otherwise, if C<use locale> for C<LC_CTYPE> is in effect:
-Respects current LC_CTYPE locale for code points < 256; and uses Unicode
+Respects current C<LC_CTYPE> locale for code points < 256; and uses Unicode
rules for the remaining code points (this last can only happen if
the UTF8 flag is also set). See L<perllocale>.
@@ -5240,10 +5240,11 @@ LIST are actually parsed as a single list. The first argument
of the list will be interpreted as the C<printf> format. This
means that C<printf(@_)> will use C<$_[0]> as the format. See
L<sprintf|/sprintf FORMAT, LIST> for an
-explanation of the format argument. If C<use locale> (including
-C<use locale ':not_characters'>) is in effect and
+explanation of the format argument. If C<use locale> for C<LC_NUMERIC>
+Look for this throught pod
+is in effect and
POSIX::setlocale() has been called, the character used for the decimal
-separator in formatted floating-point numbers is affected by the LC_NUMERIC
+separator in formatted floating-point numbers is affected by the C<LC_NUMERIC>
locale setting. See L<perllocale> and L<POSIX>.
For historical reasons, if you omit the list, C<$_> is used as the format;
@@ -7373,7 +7374,7 @@ index, the C<$> may need escaping:
If C<use locale> (including C<use locale 'not_characters'>) is in effect
and POSIX::setlocale() has been called,
the character used for the decimal separator in formatted floating-point
-numbers is affected by the LC_NUMERIC locale. See L<perllocale>
+numbers is affected by the C<LC_NUMERIC> locale. See L<perllocale>
and L<POSIX>.
=item sqrt EXPR
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index 19ec397b9e..f371b9b66c 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -91,7 +91,7 @@ This indicates, for example if a character is an uppercase letter.
Some platforms have other categories, dealing with such things as
measurement units and paper sizes. None of these are used directly by
Perl, but outside operations that Perl interacts with may use
-these. See L</Not within the scope of any "use locale" variant> below.
+these. See L</Not within the scope of "use locale"> below.
=back
@@ -140,7 +140,7 @@ C<define>.
If you want a Perl application to process and present your data
according to a particular locale, the application code should include
-the S<C<use locale>> pragma (see L<The use locale pragma>) where
+the S<C<use locale>> pragma (see L<The "use locale" pragma>) where
appropriate, and B<at least one> of the following must be true:
=over 4
@@ -160,24 +160,12 @@ L<The setlocale function>.
=head1 USING LOCALES
-=head2 The use locale pragma
+=head2 The C<"use locale"> pragma
By default, Perl itself ignores the current locale. The S<C<use locale>>
pragma tells Perl to use the current locale for some operations.
-Starting in v5.16, there is an optional parameter to this pragma:
-
- use locale ':not_characters';
-
-This parameter allows better mixing of locales and Unicode (less useful
-in v5.20 and later), and is
-described fully in L</Unicode and UTF-8>, but briefly, it tells Perl to
-not use the character portions of the locale definition, that is
-the C<LC_CTYPE> and C<LC_COLLATE> categories. Instead it will use the
-native character set (extended by Unicode). When using this parameter,
-you are responsible for getting the external character set translated
-into the native/Unicode one (which it already will be if it is one of
-the increasingly popular UTF-8 locales). There are convenient ways of
-doing this, as described in L</Unicode and UTF-8>.
+Starting in v5.16, there are optional parameters to this pragma,
+described below, which restrict which operations are affected by it.
The current locale is set at execution time by
L<setlocale()|/The setlocale function> described below. If that function
@@ -194,9 +182,10 @@ The operations that are affected by locale are:
=over 4
-=item B<Not within the scope of any C<"use locale"> variant>
+=item B<Not within the scope of C<"use locale">>
-Only operations originating outside Perl should be affected, as follows:
+Only certain operations originating outside Perl should be affected, as
+follows:
=over 4
@@ -252,7 +241,7 @@ E<160>
=item B<Lingering effects of C<S<use locale>>>
Certain Perl operations that are set-up within the scope of a
-C<use locale> variant retain that effect even outside the scope.
+C<use locale> retain that effect even outside the scope.
These include:
=over 4
@@ -262,7 +251,7 @@ These include:
The output format of a L<write()|perlfunc/write> is determined by an
earlier format declaration (L<perlfunc/format>), so whether or not the
output is affected by locale is determined by if the C<format()> is
-within the scope of a C<use locale> variant, not whether the C<write()>
+within the scope of a C<use locale>, not whether the C<write()>
is.
=item *
@@ -278,15 +267,16 @@ behavior, not if the matches are done within such a scope or not.
=for comment
The nbsp below makes this look better (though not great)
+
E<160>
-=item B<Under C<"use locale ':not_characters';">>
+=item B<Under C<"use locale";>>
=over 4
=item *
-All the non-Perl operations.
+All the above operations
=item *
@@ -303,21 +293,6 @@ C<say()>,
and
C<sprintf()>.
-=back
-
-=for comment
-The nbsp below makes this look better (though not great)
-
-E<160>
-
-=item B<Under just plain C<"use locale";>>
-
-=over 4
-
-=item *
-
-All the above operations
-
=item *
B<The comparison operators> (C<lt>, C<le>, C<cmp>, C<ge>, and C<gt>) use
@@ -353,6 +328,66 @@ The string result of any operation that uses locale
information is tainted, as it is possible for a locale to be
untrustworthy. See L<"SECURITY">.
+Starting in Perl v5.16 in a very limited way, and more generally in
+v5.22, you can restrict which category or categories are enabled by this
+particular instance of the pragma by adding parameters to it. For
+example,
+
+ use locale qw(:ctype :numeric);
+
+enables locale awareness within its scope of only those operations
+(listed above) that are affected by C<LC_CTYPE> and C<LC_NUMERIC>.
+
+The possible categories are: C<:collate>, C<:ctype>, C<:messages>,
+C<:monetary>, C<:numeric>, C<:time>, and the pseudo category
+C<:characters> (described below).
+
+Thus you can say
+
+ use locale ':messages';
+
+and only L<$!|perlvar/$ERRNO> and L<$^E|perlvar/$EXTENDED_OS_ERROR>
+will be locale aware. Everything else is unaffected.
+
+Since Perl doesn't currently do anything with the C<LC_MONETARY>
+category, specifying C<:monetary> does effectively nothing. Some
+systems have other categories, such as C<LC_PAPER_SIZE>, but Perl
+also doesn't know anything about them, and there is no way to specify
+them in this pragma's arguments.
+
+You can also easily say to use all categories but one, by either, for
+example,
+
+ use locale ':!ctype';
+ use locale ':not_ctype';
+
+both of which mean to enable locale awarness of all categories but
+C<LC_CTYPE>. Only one category argument may be specified in a
+S<C<use locale>> if it is of the negated form.
+
+Prior to v5.22 only one form of the pragma with arguments is available:
+
+ use locale ':not_characters';
+
+(and you have to say C<not_>; you can't use the bang C<!> form). This
+pseudo category is a shorthand for specifying both C<:collate> and
+C<:ctype>. Hence, in the negated form, it is nearly the same thing as
+saying
+
+ use locale qw(:messages :monetary :numeric :time);
+
+We use the term "nearly", because C<:not_characters> also turns on
+S<C<use feature 'unicode_strings'>> within its scope. This form is
+less useful in v5.20 and later, and is described fully in
+L</Unicode and UTF-8>, but briefly, it tells Perl to not use the
+character portions of the locale definition, that is the C<LC_CTYPE> and
+C<LC_COLLATE> categories. Instead it will use the native character set
+(extended by Unicode). When using this parameter, you are responsible
+for getting the external character set translated into the
+native/Unicode one (which it already will be if it is one of the
+increasingly popular UTF-8 locales). There are convenient ways of doing
+this, as described in L</Unicode and UTF-8>.
+
=head2 The setlocale function
You can switch locales as often as you wish at run time with the
@@ -419,8 +454,8 @@ return to the default that was in force when Perl started up: changes
to the environment made by the application after startup may or may not
be noticed, depending on your system's C library.
-Note that Perl ignores the current C<LC_CTYPE> and C<LC_COLLATE> locales
-within the scope of a C<use locale ':not_characters'>.
+Note that when a form of C<use locale> that doesn't include all
+categories is specified, Perl ignores the excluded categories.
If C<set_locale()> fails for some reason (for example, an attempt to set
to a locale unknown to the system), the locale for the category is not
@@ -720,8 +755,8 @@ basic category at a time. See L<"ENVIRONMENT"> for a discussion of these.
=head2 Category C<LC_COLLATE>: Collation
-In the scope of S<C<use locale>> (but not a
-C<use locale ':not_characters'>), Perl looks to the C<LC_COLLATE>
+In the scope of a S<C<use locale>> form that includes collation, Perl
+looks to the C<LC_COLLATE>
environment variable to determine the application's notions on collation
(ordering) of characters. For example, "b" follows "a" in Latin
alphabets, but where do "E<aacute>" and "E<aring>" belong? And while
@@ -807,8 +842,8 @@ always obey the current C<LC_COLLATE> locale.
=head2 Category C<LC_CTYPE>: Character Types
-In the scope of S<C<use locale>> (but not a
-C<use locale ':not_characters'>), Perl obeys the C<LC_CTYPE> locale
+In the scope of a S<C<use locale>> form that includes C<LC_CTYPE>, Perl
+obeys the C<LC_CTYPE> locale
setting. This controls the application's notion of which characters are
alphabetic, numeric, punctuation, I<etc>. This affects Perl's C<\w>
regular expression metanotation,
@@ -866,10 +901,10 @@ should use C<\w> with the C</a> regular expression modifier. See L<"SECURITY">.
=head2 Category C<LC_NUMERIC>: Numeric Formatting
-After a proper C<POSIX::setlocale()> call, and within the scope of one
-of the C<use locale> variants, Perl obeys the C<LC_NUMERIC>
-locale information, which controls an application's idea of how numbers
-should be formatted for human readability.
+After a proper C<POSIX::setlocale()> call, and within the scope of
+of a C<use locale> form that includes numerics, Perl obeys the
+C<LC_NUMERIC> locale information, which controls an application's idea
+of how numbers should be formatted for human readability.
In most implementations the only effect is to
change the character used for the decimal point--perhaps from "." to ",".
The functions aren't aware of such niceties as thousands separation and
@@ -1011,8 +1046,8 @@ Scalar true/false (or less/equal/greater) result is never tainted.
B<Case-mapping interpolation> (with C<\l>, C<\L>, C<\u>, C<\U>, or C<\F>)
-Result string containing interpolated material is tainted if
-C<use locale> (but not S<C<use locale ':not_characters'>>) is in effect.
+The result string containing interpolated material is tainted if
+a C<use locale> form that includes C<LC_CTYPE> is in effect.
=item *
@@ -1021,8 +1056,8 @@ B<Matching operator> (C<m//>):
Scalar true/false result never tainted.
All subpatterns, either delivered as a list-context result or as C<$1>
-I<etc>., are tainted if C<use locale> (but not
-S<C<use locale ':not_characters'>>) is in effect, and the subpattern
+I<etc>., are tainted if a C<use locale> form that includes
+C<LC_CTYPE> is in effect, and the subpattern
regular expression contains a locale-dependent construct. These
constructs include C<\w> (to match an alphanumeric character), C<\W>
(non-alphanumeric character), C<\b> and C<\B> (word-boundary and
@@ -1046,8 +1081,8 @@ The matched-pattern variables, C<$&>, C<$`> (pre-match), C<$'>
B<Substitution operator> (C<s///>):
Has the same behavior as the match operator. Also, the left
-operand of C<=~> becomes tainted when C<use locale>
-(but not S<C<use locale ':not_characters'>>) is in effect if modified as
+operand of C<=~> becomes tainted when a C<use locale>
+form that includes C<LC_CTYPE> is in effect, if modified as
a result of a substitution based on a regular
expression match involving any of the things mentioned in the previous
item, or of case-mapping, such as C<\l>, C<\L>,C<\u>, C<\U>, or C<\F>.
@@ -1064,8 +1099,8 @@ effect.
B<Case-mapping functions> (C<lc()>, C<lcfirst()>, C<uc()>, C<ucfirst()>):
-Results are tainted if C<use locale> (but not
-S<C<use locale ':not_characters'>>) is in effect.
+Results are tainted if a C<use locale> form that includes C<LC_CTYPE> is
+in effect.
=item *
@@ -1274,6 +1309,10 @@ something like:
This prints C<2.7>.
+You could also exclude C<LC_NUMERIC>, if you don't need it, by
+
+ use locale ':!numeric';
+
=head2 Backward compatibility
Versions of Perl prior to 5.004 B<mostly> ignored locale information,
@@ -1282,7 +1321,7 @@ always in force, even if the program environment suggested otherwise
(see L<The setlocale function>). By default, Perl still behaves this
way for backward compatibility. If you want a Perl application to pay
attention to locale information, you B<must> use the S<C<use locale>>
-pragma (see L<The use locale pragma>) or, in the unlikely event
+pragma (see L<The "use locale" pragma>) or, in the unlikely event
that you want to do so for just pattern matching, the
C</l> regular expression modifier (see L<perlre/Character set
modifiers>) to instruct it to do so.
diff --git a/pp.c b/pp.c
index 11119a25b4..e3d601f609 100644
--- a/pp.c
+++ b/pp.c
@@ -2133,7 +2133,7 @@ PP(pp_sle)
tryAMAGICbin_MG(amg_type, AMGf_set);
{
dPOPTOPssrl;
- const int cmp = (IN_LOCALE_RUNTIME
+ const int cmp = (IN_LC_RUNTIME(LC_COLLATE)
? sv_cmp_locale_flags(left, right, 0)
: sv_cmp_flags(left, right, 0));
SETs(boolSV(cmp * multiplier < rhs));
@@ -2169,7 +2169,7 @@ PP(pp_scmp)
tryAMAGICbin_MG(scmp_amg, 0);
{
dPOPTOPssrl;
- const int cmp = (IN_LOCALE_RUNTIME
+ const int cmp = (IN_LC_RUNTIME(LC_COLLATE)
? sv_cmp_locale_flags(left, right, 0)
: sv_cmp_flags(left, right, 0));
SETi( cmp );
@@ -3506,10 +3506,10 @@ PP(pp_ucfirst)
doing_utf8 = TRUE;
ulen = UTF8SKIP(s);
if (op_type == OP_UCFIRST) {
- _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
+ _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
}
else {
- _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
+ _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
}
/* we can't do in-place if the length changes. */
@@ -3527,11 +3527,11 @@ PP(pp_ucfirst)
if (op_type == OP_LCFIRST) {
/* lower case the first letter: no trickiness for any character */
- *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
+ *tmpbuf = (IN_LC_RUNTIME(LC_CTYPE)) ? toLOWER_LC(*s) :
((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
}
/* is ucfirst() */
- else if (IN_LOCALE_RUNTIME) {
+ else if (IN_LC_RUNTIME(LC_CTYPE)) {
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_rules;
}
@@ -3683,7 +3683,7 @@ PP(pp_ucfirst)
SvCUR_set(dest, need - 1);
}
}
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
TAINT;
SvTAINTED_on(dest);
}
@@ -3714,7 +3714,7 @@ PP(pp_uc)
(SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
&& !SvREADONLY(source) && SvPOK(source)
&& !DO_UTF8(source)
- && ((IN_LOCALE_RUNTIME)
+ && ((IN_LC_RUNTIME(LC_CTYPE))
? ! IN_UTF8_CTYPE_LOCALE
: ! IN_UNI_8_BIT))
{
@@ -3781,7 +3781,7 @@ PP(pp_uc)
* and copy it to the output buffer */
u = UTF8SKIP(s);
- uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
+ uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
#define GREEK_CAPITAL_LETTER_IOTA 0x0399
#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
if (uv == GREEK_CAPITAL_LETTER_IOTA
@@ -3824,7 +3824,7 @@ PP(pp_uc)
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_rules;
}
@@ -3926,7 +3926,7 @@ PP(pp_uc)
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
} /* End of isn't utf8 */
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
TAINT;
SvTAINTED_on(dest);
}
@@ -3987,7 +3987,7 @@ PP(pp_lc)
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
+ _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
/* Here is where we would do context-sensitive actions. See the
* commit message for 86510fb15 for why there isn't any */
@@ -4024,7 +4024,7 @@ PP(pp_lc)
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
}
@@ -4044,7 +4044,7 @@ PP(pp_lc)
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
}
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
TAINT;
SvTAINTED_on(dest);
}
@@ -4081,7 +4081,7 @@ PP(pp_quotemeta)
/* In locale, we quote all non-ASCII Latin1 chars.
* Otherwise use the quoting rules */
- if (IN_LOCALE_RUNTIME
+ if (IN_LC_RUNTIME(LC_CTYPE)
|| _isQUOTEMETA(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s + 1))))
{
to_quote = TRUE;
@@ -4143,7 +4143,7 @@ PP(pp_fc)
U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
const bool full_folding = TRUE;
const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
- | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
+ | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 );
/* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
* You are welcome(?) -Hugmeir
@@ -4191,7 +4191,7 @@ PP(pp_fc)
SvUTF8_on(dest);
} /* Unflagged string */
else if (len) {
- if ( IN_LOCALE_RUNTIME ) { /* Under locale */
+ if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_folding;
}
@@ -4270,7 +4270,7 @@ PP(pp_fc)
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (IN_LOCALE_RUNTIME) {
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
TAINT;
SvTAINTED_on(dest);
}
diff --git a/pp_sort.c b/pp_sort.c
index 0fe0411347..391480be7d 100644
--- a/pp_sort.c
+++ b/pp_sort.c
@@ -1727,7 +1727,7 @@ PP(pp_sort)
? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
: ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
- : ( IN_LOCALE_RUNTIME
+ : ( IN_LC_RUNTIME(LC_COLLATE)
? ( overloading
? (SVCOMPARE_t)S_amagic_cmp_locale
: (SVCOMPARE_t)sv_cmp_locale_static)
diff --git a/pp_sys.c b/pp_sys.c
index 41a315d73b..c8b340845b 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3470,7 +3470,7 @@ PP(pp_fttext)
#else
else if (*s & 128) {
#ifdef USE_LOCALE
- if (IN_LOCALE_RUNTIME && isALPHA_LC(*s))
+ if (IN_LC_RUNTIME(LC_CTYPE) && isALPHA_LC(*s))
continue;
#endif
/* utf8 characters don't count as odd */
diff --git a/proto.h b/proto.h
index 4e6c8bec38..526d5c1e34 100644
--- a/proto.h
+++ b/proto.h
@@ -42,6 +42,7 @@ PERL_CALLCONV void Perl_Slab_Free(pTHX_ void *op)
#define PERL_ARGS_ASSERT_SLAB_FREE \
assert(op)
+PERL_CALLCONV bool Perl__is_in_locale_category(pTHX_ const bool compiling, const int category);
PERL_CALLCONV bool Perl__is_uni_FOO(pTHX_ const U8 classnum, const UV c)
__attribute__warn_unused_result__;
diff --git a/t/run/locale.t b/t/run/locale.t
index e662e3ca32..4ecc2b24f1 100644
--- a/t/run/locale.t
+++ b/t/run/locale.t
@@ -111,7 +111,8 @@ EOF
"format() does not look at LC_NUMERIC without 'use locale'");
{
- fresh_perl_is(<<'EOF', $difference, {},
+ fresh_perl_is(<<'EOF', "$difference\n", {},
+use POSIX;
use locale;
format STDOUT =
@.#
@@ -133,15 +134,55 @@ EOF
}
{
+ my $categories = ":collate :characters :collate :ctype :monetary :time";
+ fresh_perl_is(<<"EOF", "4.2", {},
+use locale qw($categories);
+format STDOUT =
+@.#
+4.179
+.
+write;
+EOF
+ "format() does not look at LC_NUMERIC with 'use locale qw($categories)'");
+ }
+
+ {
+ fresh_perl_is(<<'EOF', $difference, {},
+use locale;
+format STDOUT =
+@.#
+4.179
+.
+write;
+EOF
+ "format() looks at LC_NUMERIC with 'use locale'");
+ }
+
+ for my $category (qw(collate characters collate ctype monetary time)) {
+ for my $negation ("!", "not_") {
+ fresh_perl_is(<<"EOF", $difference, {},
+use locale ":$negation$category";
+format STDOUT =
+@.#
+4.179
+.
+write;
+EOF
+ "format() looks at LC_NUMERIC with 'use locale \":"
+ . "$negation$category\"'");
+ }
+ }
+
+ {
fresh_perl_is(<<'EOF', $difference, {},
-use locale ":not_characters";
+use locale ":numeric";
format STDOUT =
@.#
4.179
.
write;
EOF
- "format() looks at LC_NUMERIC with 'use locale \":not_characters\"'");
+ "format() looks at LC_NUMERIC with 'use locale \":numeric\"'");
}
{
@@ -371,4 +412,4 @@ EOF
}
-sub last { 21 }
+sub last { 35 }
diff --git a/utf8.h b/utf8.h
index 8945663e1a..c150d65852 100644
--- a/utf8.h
+++ b/utf8.h
@@ -422,8 +422,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
#define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES)
#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES)
#define IN_UNI_8_BIT \
- (CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT|HINT_LOCALE_NOT_CHARS) \
- && ! IN_LOCALE_RUNTIME && ! IN_BYTES)
+ (((CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT)) \
+ || (CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL \
+ /* -1 below is for :not_characters */ \
+ && _is_in_locale_category(FALSE, -1))) \
+ && ! IN_BYTES)
#define UTF8_ALLOW_EMPTY 0x0001 /* Allow a zero length string */