summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/open.pm3
-rw-r--r--locale.c49
-rw-r--r--pod/perl58delta.pod12
3 files changed, 32 insertions, 32 deletions
diff --git a/lib/open.pm b/lib/open.pm
index 2dc1d2130c..007b66712d 100644
--- a/lib/open.pm
+++ b/lib/open.pm
@@ -27,6 +27,7 @@ sub _get_locale_encoding {
} elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
($country_language, $locale_encoding) = ($1, $2);
}
+ # LANGUAGE affects only LC_MESSAGES only on glibc
} elsif (not $locale_encoding) {
if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
$ENV{LANG} =~ /\butf-?8\b/i) {
@@ -250,7 +251,7 @@ pragma.
=back
-If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
the default encoding of your STDIN, STDOUT, and STDERR, and of
B<any subsequent file open>, is UTF-8.
diff --git a/locale.c b/locale.c
index 0986614794..c03451ba1d 100644
--- a/locale.c
+++ b/locale.c
@@ -478,10 +478,15 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
/* Set PL_wantutf8 to TRUE if using PerlIO _and_
any of the following are true:
- nl_langinfo(CODESET) contains /^utf-?8/i
- - $ENV{LANGUAGE} contains /^utf-?8/i (only if using glibc)
- - $ENV{LC_CALL} contains /^utf-?8/i
+ - $ENV{LC_ALL} contains /^utf-?8/i
- $ENV{LC_CTYPE} contains /^utf-?8/i
- - $ENV{LANG} contains /^utf-?8/i
+ - $ENV{LANG} contains /^utf-?8/i
+ The LC_ALL, LC_CTYPE, LANG obey the usual override
+ hierarchy of locale environment variables. (LANGUAGE
+ affects only LC_MESSAGES only under glibc.) (If present,
+ it overrides LC_MESSAGES for GNU gettext, and it also
+ can have more than one locale, separated by spaces,
+ in case you need to know.)
If PL_wantutf8 is true, perl.c:S_parse_body()
will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
STDERR, _and_ the default open discipline.
@@ -491,32 +496,26 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
#if defined(HAS_NL_LANGINFO) && defined(CODESET)
codeset = nl_langinfo(CODESET);
#endif
- if (codeset &&
- (ibcmp(codeset, "UTF-8", 5) == 0 ||
- ibcmp(codeset, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (codeset)
+ wantutf8 = (ibcmp(codeset, "UTF-8", 5) == 0 ||
+ ibcmp(codeset, "UTF8", 4) == 0);
#if defined(USE_LOCALE)
-#ifdef __GLIBC__
- if (!wantutf8 && language &&
- (ibcmp(language, "UTF-8", 5) == 0 ||
- ibcmp(language, "UTF8", 4) == 0))
- wantutf8 = TRUE;
-#endif
- if (!wantutf8 && lc_all &&
- (ibcmp(lc_all, "UTF-8", 5) == 0 ||
- ibcmp(lc_all, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ else { /* nl_langinfo(CODESET) is supposed to correctly
+ * interpret the locale environment variables,
+ * but just in case it fails, let's do this manually. */
+ if (lang)
+ wantutf8 = (ibcmp(lang, "UTF-8", 5) == 0 ||
+ ibcmp(lang, "UTF8", 4) == 0);
#ifdef USE_LOCALE_CTYPE
- if (!wantutf8 && curctype &&
- (ibcmp(curctype, "UTF-8", 5) == 0 ||
- ibcmp(curctype, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (curctype)
+ wantutf8 = (ibcmp(curctype, "UTF-8", 5) == 0 ||
+ ibcmp(curctype, "UTF8", 4) == 0);
#endif
- if (!wantutf8 && lang &&
- (ibcmp(lang, "UTF-8", 5) == 0 ||
- ibcmp(lang, "UTF8", 4) == 0))
- wantutf8 = TRUE;
+ if (lc_all)
+ wantutf8 = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
+ ibcmp(lc_all, "UTF8", 4) == 0);
#endif /* USE_LOCALE */
+ }
if (wantutf8)
PL_wantutf8 = TRUE;
}
diff --git a/pod/perl58delta.pod b/pod/perl58delta.pod
index 9b9e529593..3bef17ba1f 100644
--- a/pod/perl58delta.pod
+++ b/pod/perl58delta.pod
@@ -415,12 +415,12 @@ for more information about UTF-8.
=item *
-If your environment variables (LC_ALL, LC_CTYPE, LANG, LANGUAGE) look
-like you want to use UTF-8 (any of the the variables match C</utf-?8/i>),
-your STDIN, STDOUT, STDERR handles and the default open layer
-(see L<open>) are marked as UTF-8. (This feature, like other new
-features that combine Unicode and I/O, work only if you are using
-PerlIO, but that's the default.)
+If your environment variables (LC_ALL, LC_CTYPE, LANG) look like you
+want to use UTF-8 (any of the the variables match C</utf-?8/i>), your
+STDIN, STDOUT, STDERR handles and the default open layer (see L<open>)
+are marked as UTF-8. (This feature, like other new features that
+combine Unicode and I/O, work only if you are using PerlIO, but that's
+the default.)
Note that after this Perl really does assume that everything is UTF-8:
for example if some input handle is not, Perl will probably very soon