summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-12-18 13:29:51 -0700
committerKarl Williamson <khw@cpan.org>2014-12-29 13:52:56 -0700
commit780fcc9fd03dbbd16715e2b6ecd020f9e50b7cc7 (patch)
treee5f04281794fe574a278cbbe72d4718737c1c482
parenta4525e789871d3846f20d0ea7d2d239c6a21a5a4 (diff)
downloadperl-780fcc9fd03dbbd16715e2b6ecd020f9e50b7cc7.tar.gz
Don't raise 'poorly supported' locale warning unnecessarily
Commit 8c6180a91de91a1194f427fc639694f43a903a78 added a warning message for when Perl determines that the program's underlying locale just switched into is poorly supported. At the time it was thought that this would be an extremely rare occurrence. However, a bug in HP-UX - B.11.00/64 causes this message to be raised for the "C" locale. A workaround was done that silenced those. However, before it got fixed, this message would occur gobs of times executing the test suite. It was raised even if the script is not locale-aware, so that the underlying locale was completely irrelevant. There is a good prospect that someone using an older Asian locale as their default would get this message inappropriately, even if they don't use locales, or switch to a supported one before using them. This commit causes the message to be raised only if it actually is relevant. When not in the scope of 'use locale', the message is stored, not raised. Upon the first locale-dependent operation within a bad locale, the saved message is raised, and the storage cleared. I was able to do this without adding extra branching to the main-line non-locale execution code. This was done by adding regnodes which get jumped to by switch statements, and refactoring some existing C tests so they exclude non-locale right off the bat. These changes would have been necessary for another locale warning that I previously agreed to implement, and which is coming a few commits from now. I do not know of any way to add tests in the test suite for this. It is in fact rare for modern locales to have these issues. The way I tested this was to temporarily change the C code so that all locales are viewed as defective, and manually note that the warnings came out where expected, and only where expected. I chose not to try to output this warning on any POSIX functions called. I believe that all that are affected are deprecated or scheduled to be deprecated anyway. And POSIX is closer to the hardware of the machine. For convenience, I also don't output the message for some zero-length pattern matches. If something is going to be matched, the message will likely very soon be raised anyway.
-rw-r--r--embedvar.h1
-rw-r--r--intrpvar.h1
-rw-r--r--locale.c35
-rw-r--r--perl.c2
-rw-r--r--perl.h23
-rw-r--r--pod/perldelta.pod9
-rw-r--r--pod/perldiag.pod20
-rw-r--r--pp.c23
-rw-r--r--regexec.c34
-rw-r--r--sv.c3
-rw-r--r--utf8.c51
11 files changed, 166 insertions, 36 deletions
diff --git a/embedvar.h b/embedvar.h
index 32a8b9b327..da3c331634 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -352,6 +352,7 @@
#define PL_utf8_xidstart (vTHX->Iutf8_xidstart)
#define PL_utf8cache (vTHX->Iutf8cache)
#define PL_utf8locale (vTHX->Iutf8locale)
+#define PL_warn_locale (vTHX->Iwarn_locale)
#define PL_warnhook (vTHX->Iwarnhook)
#define PL_watchaddr (vTHX->Iwatchaddr)
#define PL_watchok (vTHX->Iwatchok)
diff --git a/intrpvar.h b/intrpvar.h
index 3bb1c9af8b..eb962836ca 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -238,6 +238,7 @@ PERLVAR(I, exit_flags, U8) /* was exit() unexpected, etc. */
PERLVAR(I, utf8locale, bool) /* utf8 locale detected */
PERLVAR(I, in_utf8_CTYPE_locale, bool)
+PERLVAR(I, warn_locale, SV *)
PERLVARA(I, colors,6, char *) /* values from PERL_RE_COLORS env var */
diff --git a/locale.c b/locale.c
index 429fdb7b1f..2577ed2816 100644
--- a/locale.c
+++ b/locale.c
@@ -292,6 +292,8 @@ Perl_new_ctype(pTHX_ const char *newctype)
to start */
unsigned int bad_count = 0; /* Count of bad characters */
+ SvREFCNT_dec(PL_warn_locale); /* We are about to overwrite this */
+
for (i = 0; i < 256; i++) {
if (isUPPER_LC((U8) i))
PL_fold_locale[i] = (U8) toLOWER_LC((U8) i);
@@ -360,17 +362,9 @@ Perl_new_ctype(pTHX_ const char *newctype)
#endif
if (bad_count || multi_byte_locale) {
-
- /* We have to save 'newctype' because the setlocale() just below
- * may destroy it. The next setlocale() further down should
- * restore it properly so that the intermediate change here is
- * transparent to this function's caller */
- const char * const badlocale = savepv(newctype);
-
- setlocale(LC_CTYPE, "C");
- Perl_warner(aTHX_ packWARN(WARN_LOCALE),
+ PL_warn_locale = Perl_newSVpvf(aTHX_
"Locale '%s' may not work well.%s%s%s\n",
- badlocale,
+ newctype,
(multi_byte_locale)
? " Some characters in it are not recognized by"
" Perl."
@@ -384,7 +378,26 @@ Perl_new_ctype(pTHX_ const char *newctype)
? bad_chars_list
: ""
);
- setlocale(LC_CTYPE, badlocale);
+ /* If we are actually in the scope of the locale, output the
+ * message now. Otherwise we save it to be output at the first
+ * operation using this locale, if that actually happens. Most
+ * programs don't use locales, so they are immune to bad ones */
+ if (IN_LC(LC_CTYPE)) {
+
+ /* We have to save 'newctype' because the setlocale() just
+ * below may destroy it. The next setlocale() further down
+ * should restore it properly so that the intermediate change
+ * here is transparent to this function's caller */
+ const char * const badlocale = savepv(newctype);
+
+ setlocale(LC_CTYPE, "C");
+
+ /* The '0' below suppresses a bogus gcc compiler warning */
+ Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0);
+ setlocale(LC_CTYPE, badlocale);
+ SvREFCNT_dec_NN(PL_warn_locale);
+ PL_warn_locale = NULL;
+ }
}
}
diff --git a/perl.c b/perl.c
index be9932d27d..2ebc4f7f77 100644
--- a/perl.c
+++ b/perl.c
@@ -1040,6 +1040,7 @@ perl_destruct(pTHXx)
SvREFCNT_dec(PL_Latin1);
SvREFCNT_dec(PL_NonL1NonFinalFold);
SvREFCNT_dec(PL_HasMultiCharFold);
+ SvREFCNT_dec(PL_warn_locale);
PL_utf8_mark = NULL;
PL_utf8_toupper = NULL;
PL_utf8_totitle = NULL;
@@ -1051,6 +1052,7 @@ perl_destruct(pTHXx)
PL_AboveLatin1 = NULL;
PL_InBitmap = NULL;
PL_HasMultiCharFold = NULL;
+ PL_warn_locale = NULL;
PL_Latin1 = NULL;
PL_NonL1NonFinalFold = NULL;
PL_UpperLatin1 = NULL;
diff --git a/perl.h b/perl.h
index a3f63b0cce..35624b5918 100644
--- a/perl.h
+++ b/perl.h
@@ -5779,6 +5779,27 @@ typedef struct am_table_short AMTS;
# define IN_LC(category) \
(IN_LC_COMPILETIME(category) || IN_LC_RUNTIME(category))
+# if defined (PERL_CORE) || defined (PERL_IN_XSUB_RE)
+
+ /* This internal macro should be called from places that operate under
+ * locale rules. It there is a problem with the current locale that
+ * hasn't been raised yet, it will output a warning this time */
+# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE \
+ STMT_START { \
+ if (PL_warn_locale) { \
+ /*GCC_DIAG_IGNORE(-Wformat-security); Didn't work */ \
+ Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), \
+ SvPVX(PL_warn_locale), \
+ 0 /* dummy to avoid comp warning */ ); \
+ /* GCC_DIAG_RESTORE; */ \
+ SvREFCNT_dec_NN(PL_warn_locale); \
+ PL_warn_locale = NULL; \
+ } \
+ } STMT_END
+
+
+# endif /* PERL_CORE or PERL_IN_XSUB_RE */
+
#else /* No locale usage */
# define IN_LOCALE_RUNTIME 0
# define IN_SOME_LOCALE_FORM_RUNTIME 0
@@ -5793,6 +5814,8 @@ typedef struct am_table_short AMTS;
# define IN_LC_COMPILETIME(category) 0
# define IN_LC_RUNTIME(category) 0
# define IN_LC(category) 0
+
+# define _CHECK_AND_WARN_PROBLEMATIC_LOCALE
#endif
#ifdef USE_LOCALE_NUMERIC
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 6eecc008f8..a49456576a 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -221,6 +221,15 @@ XXX Changes (i.e. rewording) of diagnostic messages go here
XXX Describe change here
+The message
+L<Locale '%s' may not work well.%s|perldiag/"Locale '%s' may not work well.%s">
+is no longer raised unless the problemtatic locale is actually used in
+the Perl program. Previously it was raised if it merely was the
+underlying locale. All Perl programs have an underlying locale at all
+times, but something like a C<S<use locale>> is needed for that locale
+to actually have some effect. This message will not be raised when
+the underlying locale is hidden.
+
=back
=head1 Utility Changes
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 1c845dde44..63df68d591 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2982,16 +2982,16 @@ likely fix this error.
=item Locale '%s' may not work well.%s
-(W locale) The named locale that Perl is now trying to use is not fully
-compatible with Perl. The second C<%s> gives a reason.
+(W locale) You are using the named locale, which is a non-UTF-8 one, and
+which Perl has determined is not fully compatible with Perl. The second
+C<%s> gives a reason.
By far the most common reason is that the locale has characters in it
that are represented by more than one byte. The only such locales that
Perl can handle are the UTF-8 locales. Most likely the specified locale
is a non-UTF-8 one for an East Asian language such as Chinese or
Japanese. If the locale is a superset of ASCII, the ASCII portion of it
-may work in Perl. Read on for problems when it isn't a superset of
-ASCII.
+may work in Perl.
Some essentially obsolete locales that aren't supersets of ASCII, mainly
those in ISO 646 or other 7-bit locales, such as ASMO 449, can also have
@@ -2999,6 +2999,18 @@ problems, depending on what portions of the ASCII character set get
changed by the locale and are also used by the program.
The warning message lists the determinable conflicting characters.
+Note that not all incompatibilities are found.
+
+If this happens to you, there's not much you can do except switch to use a
+different locale or use L<Encode> to translate from the locale into
+UTF-8; if that's impracticable, you have been warned that some things
+may break.
+
+This message is output once each time a bad locale is switched into
+within the scope of C<S<use locale>>, or on the first possibly-affected
+operation if the C<S<use locale>> inherits a bad one. It is not raised
+for any operations from the L<POSIX> module.
+
=item localtime(%f) failed
(W overflow) You called C<localtime> with a number that it could not handle:
diff --git a/pp.c b/pp.c
index 182fa7175b..08e0999f3e 100644
--- a/pp.c
+++ b/pp.c
@@ -3588,23 +3588,27 @@ PP(pp_ucfirst)
if (op_type == OP_LCFIRST) {
/* lower case the first letter: no trickiness for any character */
- *tmpbuf =
#ifdef USE_LOCALE_CTYPE
- (IN_LC_RUNTIME(LC_CTYPE))
- ? toLOWER_LC(*s)
- :
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ *tmpbuf = toLOWER_LC(*s);
+ }
+ else
#endif
- (IN_UNI_8_BIT)
- ? toLOWER_LATIN1(*s)
- : toLOWER(*s);
+ {
+ *tmpbuf = (IN_UNI_8_BIT)
+ ? toLOWER_LATIN1(*s)
+ : toLOWER(*s);
+ }
}
- /* is ucfirst() */
#ifdef USE_LOCALE_CTYPE
+ /* is ucfirst() */
else if (IN_LC_RUNTIME(LC_CTYPE)) {
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_rules;
}
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
*tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
locales have upper and title case
different */
@@ -3909,6 +3913,7 @@ PP(pp_uc)
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_rules;
}
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
for (; s < send; d++, s++)
*d = (U8) toUPPER_LC(*s);
}
@@ -4116,6 +4121,7 @@ PP(pp_lc)
* whole thing in a tight loop, for speed, */
#ifdef USE_LOCALE_CTYPE
if (IN_LC_RUNTIME(LC_CTYPE)) {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
}
@@ -4298,6 +4304,7 @@ PP(pp_fc)
if (IN_UTF8_CTYPE_LOCALE) {
goto do_uni_folding;
}
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
for (; s < send; d++, s++)
*d = (U8) toFOLD_LC(*s);
}
diff --git a/regexec.c b/regexec.c
index 4526d23e14..ec970f94a9 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1456,6 +1456,7 @@ STMT_START {
U8 flags = FOLD_FLAGS_FULL; \
switch (trie_type) { \
case trie_flu8: \
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
goto do_trie_utf8_fold; \
case trie_utf8_exactfa_fold: \
flags |= FOLD_FLAGS_NOMIX_ASCII; \
@@ -1493,6 +1494,8 @@ STMT_START {
} \
break; \
case trie_utf8l: \
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
+ /* FALLTHROUGH */ \
case trie_utf8: \
uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
break; \
@@ -1753,6 +1756,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
/* We know what class it must start with. */
switch (OP(c)) {
case ANYOFL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case ANYOF:
if (utf8_target) {
REXEC_FBC_UTF8_CLASS_SCAN(
@@ -1794,6 +1799,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
goto do_exactf_non_utf8;
case EXACTFL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (is_utf8_pat || utf8_target || IN_UTF8_CTYPE_LOCALE) {
utf8_fold_flags = FOLDEQ_LOCALE;
goto do_exactf_utf8;
@@ -1921,9 +1927,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
}
case BOUNDL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
FBC_BOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8);
break;
case NBOUNDL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
FBC_NBOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8);
break;
case BOUND:
@@ -1958,6 +1966,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
/* FALLTHROUGH */
case POSIXL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
break;
@@ -4174,6 +4183,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
U32 state = trie->startstate;
+ if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
if ( trie->bitmap
&& (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
{
@@ -4448,6 +4460,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
#undef ST
case EXACTL: /* /abc/l */
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case EXACT: { /* /abc/ */
char *s = STRING(scan);
ln = STR_LEN(scan);
@@ -4534,6 +4548,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
const char * s;
U32 fold_utf8_flags;
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
folder = foldEQ_locale;
fold_array = PL_fold_locale;
fold_utf8_flags = FOLDEQ_LOCALE;
@@ -4615,6 +4630,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
* have to set the FLAGS fields of these */
case BOUNDL: /* /\b/l */
case NBOUNDL: /* /\B/l */
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case BOUND: /* /\b/ */
case BOUNDU: /* /\b/u */
case BOUNDA: /* /\b/a */
@@ -4694,6 +4711,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
break;
case ANYOFL: /* /[abc]/l */
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case ANYOF: /* /[abc]/ */
if (NEXTCHR_IS_EOS)
sayNO;
@@ -4718,6 +4737,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* FALLTHROUGH */
case POSIXL: /* \w or [:punct:] etc. under /l */
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (NEXTCHR_IS_EOS)
sayNO;
@@ -5094,6 +5114,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
const U8 *fold_array;
UV utf8_fold_flags;
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
folder = foldEQ_locale;
fold_array = PL_fold_locale;
type = REFFL;
@@ -5138,6 +5159,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
goto do_nref_ref_common;
case REFFL: /* /\1/il */
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
folder = foldEQ_locale;
fold_array = PL_fold_locale;
utf8_fold_flags = FOLDEQ_LOCALE;
@@ -7208,6 +7230,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
}
break;
case EXACTL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case EXACT:
assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
@@ -7281,6 +7305,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
goto do_exactf;
case EXACTFL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
utf8_flags = FOLDEQ_LOCALE;
goto do_exactf;
@@ -7360,6 +7385,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
break;
}
case ANYOFL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case ANYOF:
if (utf8_target) {
while (hardcount < max
@@ -7382,6 +7409,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
/* FALLTHROUGH */
case POSIXL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
if (! utf8_target) {
while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
*scan)))
@@ -7601,16 +7629,18 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
}
break;
+ case BOUNDL:
+ case NBOUNDL:
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ /* FALLTHROUGH */
case BOUND:
case BOUNDA:
- case BOUNDL:
case BOUNDU:
case EOS:
case GPOS:
case KEEPS:
case NBOUND:
case NBOUNDA:
- case NBOUNDL:
case NBOUNDU:
case OPFAIL:
case SBOL:
diff --git a/sv.c b/sv.c
index 1f9ea87c2a..94740d3528 100644
--- a/sv.c
+++ b/sv.c
@@ -14588,6 +14588,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
/* Unicode features (see perlrun/-C) */
PL_unicode = proto_perl->Iunicode;
+ /* Should we warn if uses locale? */
+ PL_warn_locale = proto_perl->Iwarn_locale;
+
/* Pre-5.8 signals control */
PL_signals = proto_perl->Isignals;
diff --git a/utf8.c b/utf8.c
index 5ba5517832..7985bc92a9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1600,9 +1600,14 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS;
- /* Treat a UTF-8 locale as not being in locale at all */
- if (IN_UTF8_CTYPE_LOCALE) {
- flags &= ~FOLD_FLAGS_LOCALE;
+ if (flags & FOLD_FLAGS_LOCALE) {
+ /* Treat a UTF-8 locale as not being in locale at all */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ flags &= ~FOLD_FLAGS_LOCALE;
+ }
+ else {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
}
if (c < 256) {
@@ -1949,8 +1954,14 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS;
- if (flags && IN_UTF8_CTYPE_LOCALE) {
- flags = FALSE;
+ if (flags) {
+ /* Treat a UTF-8 locale as not being in locale at all */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ flags = FALSE;
+ }
+ else {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
}
if (UTF8_IS_INVARIANT(*p)) {
@@ -2014,8 +2025,14 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
PERL_ARGS_ASSERT__TO_UTF8_TITLE_FLAGS;
- if (flags && IN_UTF8_CTYPE_LOCALE) {
- flags = FALSE;
+ if (flags) {
+ /* Treat a UTF-8 locale as not being in locale at all */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ flags = FALSE;
+ }
+ else {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
}
if (UTF8_IS_INVARIANT(*p)) {
@@ -2078,8 +2095,14 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
PERL_ARGS_ASSERT__TO_UTF8_LOWER_FLAGS;
- if (flags && IN_UTF8_CTYPE_LOCALE) {
- flags = FALSE;
+ if (flags) {
+ /* Treat a UTF-8 locale as not being in locale at all */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ flags = FALSE;
+ }
+ else {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
}
if (UTF8_IS_INVARIANT(*p)) {
@@ -2153,8 +2176,14 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
assert(p != ustrp); /* Otherwise overwrites */
- if (flags & FOLD_FLAGS_LOCALE && IN_UTF8_CTYPE_LOCALE) {
- flags &= ~FOLD_FLAGS_LOCALE;
+ if (flags & FOLD_FLAGS_LOCALE) {
+ /* Treat a UTF-8 locale as not being in locale at all */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ flags &= ~FOLD_FLAGS_LOCALE;
+ }
+ else {
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+ }
}
if (UTF8_IS_INVARIANT(*p)) {