summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changes107
-rw-r--r--MANIFEST12
-rw-r--r--META.json52
-rw-r--r--META.yml29
-rw-r--r--Makefile.PL60
-rw-r--r--README11
-rw-r--r--lib/Encode/Locale.pm373
-rw-r--r--t/alias.t22
-rw-r--r--t/arg.t45
-rw-r--r--t/env.t25
-rw-r--r--t/tain.t19
-rw-r--r--t/warn_once.t20
12 files changed, 775 insertions, 0 deletions
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..dc0366a
--- /dev/null
+++ b/Changes
@@ -0,0 +1,107 @@
+## 2015-06-09 Release 1.05
+
+Mats Peterson (1):
+ Use GetACP() to get the ANSI code page like before
+
+Thomas Sibley (1):
+ Conditionalize the Win32::Console recommendation
+
+
+
+## 2015-01-12 Release 1.04
+
+Ed J (5):
+
+* Actually include all the tests in the MANIFEST
+* use Test::More and warnings
+* Tidy t/alias.t
+* t/arg.t TODO some actual ARGV testing
+* Use Win32::GetConsoleCP/GetConsoleOutputCP if available
+
+Gisle Aas (3):
+
+* Documentation spell fix
+* SEE ALSO Term::Encoding [RT#98138]
+
+David Steinbrunner (1):
+
+* typo fix
+
+
+
+## 2012-01-11 Release 1.03
+
+Documentation spelling fixes and tweaks to improve testing on Windows.
+
+
+## 2011-03-28 Release 1.02
+
+Added supporting hacks for GB18030 and roman8. [RT#66373] [RT#67010]
+
+
+## 2011-02-22 Release 1.01
+
+Avoid compilation warnings (RT#65975). Thanks to Goro Fuji.
+
+
+## 2011-01-23 Release 1.00
+
+Documentation tweaks.
+
+
+## 2010-10-23 Release 0.04
+
+Look up the ANSI code page on Windows and alias this one as the "locale"
+encoding.
+
+Providide the reinit() function to be called if something is changed
+in the locale and you need to re-initialize the encodings set up by
+this module.
+
+Improved documentation.
+
+
+## 2010-10-20 Release 0.03
+
+Changed the default for decode_argv() to match Encode's default.
+It became too strange to tell people to pass FB_DEFAULT to get the
+non-default behaviour.
+
+Changed $ENCODING_FS into $ENCODING_LOCALE_FS (as already documented below),
+but not implemented as such.
+
+Workaround for test failure where the Encode does not know about the "646"
+encoding alias.
+
+Documentation tweaks.
+
+
+## 2010-10-13 Release 0.02
+
+...where I realized that I could not get away with a single locale encoding.
+Now `Encode::Locale` provides 4 encoding names that often will map to the same
+underlying encoding. I've used the following names:
+
+ locale $ENCODING_LOCALE
+ locale_fs $ENCODING_LOCALE_FS
+ console_in $ENCODING_CONSOLE_IN
+ console_out $ENCODING_CONSOLE_OUT
+
+The first one is the encoding specified by the POSIX locale (or the equivalent
+on Windows). This can be set by the user. The second one (`locale_fs`) is the
+encoding that should be used when interfacing with the file system, that is the
+encoding of file names. For some systems (like Mac OS X) this is fixed system
+wide and the same for all users. Last; some systems allow the input and output
+encoding for data aimed at the console to differ so there are separate entries
+for these. For classic POSIX systems all 4 of these will all denote the same
+encoding.
+
+This release also introduce the function env() as a Unicode interface to the
+%ENV hash (the process environment variables). We don't want to decode the ENV
+%values in-place because this also affects what the child processes
+observes. The %ENV hash should always contain byte strings.
+
+
+## 2010-10-11 Release 0.01
+
+Initial release
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..8c5ebfa
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,12 @@
+Changes
+lib/Encode/Locale.pm
+Makefile.PL
+MANIFEST This list of files
+README
+t/alias.t
+t/arg.t
+t/env.t
+t/tain.t
+t/warn_once.t
+META.yml Module YAML meta-data (added by MakeMaker)
+META.json Module JSON meta-data (added by MakeMaker)
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..317ad1b
--- /dev/null
+++ b/META.json
@@ -0,0 +1,52 @@
+{
+ "abstract" : "Determine the locale encoding",
+ "author" : [
+ "Gisle Aas <gisle@activestate.com>"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "Encode-Locale",
+ "no_index" : {
+ "directory" : [
+ "t",
+ "inc"
+ ]
+ },
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0",
+ "Test::More" : "0"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0"
+ }
+ },
+ "runtime" : {
+ "recommends" : {
+ "I18N::Langinfo" : "0"
+ },
+ "requires" : {
+ "Encode" : "2",
+ "Encode::Alias" : "0",
+ "perl" : "5.008"
+ }
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "repository" : {
+ "url" : "http://github.com/gisle/encode-locale"
+ }
+ },
+ "version" : "1.05"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..5e730f8
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,29 @@
+---
+abstract: 'Determine the locale encoding'
+author:
+ - 'Gisle Aas <gisle@activestate.com>'
+build_requires:
+ ExtUtils::MakeMaker: '0'
+ Test::More: '0'
+configure_requires:
+ ExtUtils::MakeMaker: '0'
+dynamic_config: 1
+generated_by: 'ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: '1.4'
+name: Encode-Locale
+no_index:
+ directory:
+ - t
+ - inc
+recommends:
+ I18N::Langinfo: '0'
+requires:
+ Encode: '2'
+ Encode::Alias: '0'
+ perl: '5.008'
+resources:
+ repository: http://github.com/gisle/encode-locale
+version: '1.05'
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..8489295
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,60 @@
+require 5.008;
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+ NAME => 'Encode::Locale',
+ VERSION_FROM => 'lib/Encode/Locale.pm',
+ ABSTRACT_FROM => "lib/Encode/Locale.pm",
+ AUTHOR => 'Gisle Aas <gisle@activestate.com>',
+ LICENSE => "perl",
+ MIN_PERL_VERSION => 5.008,
+ PREREQ_PM => {
+ 'Encode' => 2,
+ 'Encode::Alias' => 0,
+ },
+ META_MERGE => {
+ resources => {
+ repository => 'http://github.com/gisle/encode-locale',
+ },
+ recommends => {
+ 'I18N::Langinfo' => 0,
+ ($^O eq 'MSWin32'
+ ? ('Win32::Console' => 0)
+ : ()),
+ },
+ },
+ TEST_REQUIRES => {
+ 'Test::More' => 0,
+ },
+);
+
+BEGIN {
+ # compatibility with older versions of MakeMaker
+ my $developer = -d ".git";
+ my %mm_req = (
+ LICENCE => 6.31,
+ META_MERGE => 6.45,
+ META_ADD => 6.45,
+ MIN_PERL_VERSION => 6.48,
+ BUILD_REQUIRES => 6.56,
+ TEST_REQUIRES => 6.64,
+ );
+ undef(*WriteMakefile);
+ *WriteMakefile = sub {
+ my %arg = @_;
+ unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{TEST_REQUIRES}) }) {
+ warn "Downgrading TEST_REQUIRES" if $developer;
+ $arg{BUILD_REQUIRES} = {
+ %{ $arg{BUILD_REQUIRES} },
+ %{ delete $arg{TEST_REQUIRES} },
+ };
+ }
+ for (keys %mm_req) {
+ unless (eval { ExtUtils::MakeMaker->VERSION($mm_req{$_}) }) {
+ warn "$_ $@" if $developer;
+ delete $arg{$_};
+ }
+ }
+ ExtUtils::MakeMaker::WriteMakefile(%arg);
+ };
+}
diff --git a/README b/README
new file mode 100644
index 0000000..bb99df9
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+Encode-Locale
+==============
+
+The purpose of this Perl module is try determine what encodings should be used
+when interfacing to various external interfaces. You will need perl-5.8 or
+better to use this module.
+
+© 2010 Gisle Aas `<gisle@aas.no>`.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
diff --git a/lib/Encode/Locale.pm b/lib/Encode/Locale.pm
new file mode 100644
index 0000000..1933778
--- /dev/null
+++ b/lib/Encode/Locale.pm
@@ -0,0 +1,373 @@
+package Encode::Locale;
+
+use strict;
+our $VERSION = "1.05";
+
+use base 'Exporter';
+our @EXPORT_OK = qw(
+ decode_argv env
+ $ENCODING_LOCALE $ENCODING_LOCALE_FS
+ $ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT
+);
+
+use Encode ();
+use Encode::Alias ();
+
+our $ENCODING_LOCALE;
+our $ENCODING_LOCALE_FS;
+our $ENCODING_CONSOLE_IN;
+our $ENCODING_CONSOLE_OUT;
+
+sub DEBUG () { 0 }
+
+sub _init {
+ if ($^O eq "MSWin32") {
+ unless ($ENCODING_LOCALE) {
+ # Try to obtain what the Windows ANSI code page is
+ eval {
+ unless (defined &GetACP) {
+ require Win32;
+ eval { Win32::GetACP() };
+ *GetACP = sub { &Win32::GetACP } unless $@;
+ }
+ unless (defined &GetACP) {
+ require Win32::API;
+ Win32::API->Import('kernel32', 'int GetACP()');
+ }
+ if (defined &GetACP) {
+ my $cp = GetACP();
+ $ENCODING_LOCALE = "cp$cp" if $cp;
+ }
+ };
+ }
+
+ unless ($ENCODING_CONSOLE_IN) {
+ # only test one since set together
+ unless (defined &GetInputCP) {
+ eval {
+ require Win32;
+ eval { Win32::GetConsoleCP() };
+ # manually "import" it since Win32->import refuses
+ *GetInputCP = sub { &Win32::GetConsoleCP } unless $@;
+ *GetOutputCP = sub { &Win32::GetConsoleOutputCP } unless $@;
+ };
+ unless (defined &GetInputCP) {
+ eval {
+ # try Win32::Console module for codepage to use
+ require Win32::Console;
+ eval { Win32::Console::InputCP() };
+ *GetInputCP = sub { &Win32::Console::InputCP }
+ unless $@;
+ *GetOutputCP = sub { &Win32::Console::OutputCP }
+ unless $@;
+ };
+ }
+ unless (defined &GetInputCP) {
+ # final fallback
+ *GetInputCP = *GetOutputCP = sub {
+ # another fallback that could work is:
+ # reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP
+ ((qx(chcp) || '') =~ /^Active code page: (\d+)/)
+ ? $1 : ();
+ };
+ }
+ }
+ my $cp = GetInputCP();
+ $ENCODING_CONSOLE_IN = "cp$cp" if $cp;
+ $cp = GetOutputCP();
+ $ENCODING_CONSOLE_OUT = "cp$cp" if $cp;
+ }
+ }
+
+ unless ($ENCODING_LOCALE) {
+ eval {
+ require I18N::Langinfo;
+ $ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+
+ # Workaround of Encode < v2.25. The "646" encoding alias was
+ # introduced in Encode-2.25, but we don't want to require that version
+ # quite yet. Should avoid the CPAN testers failure reported from
+ # openbsd-4.7/perl-5.10.0 combo.
+ $ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646";
+
+ # https://rt.cpan.org/Ticket/Display.html?id=66373
+ $ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8";
+ };
+ $ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN;
+ }
+
+ if ($^O eq "darwin") {
+ $ENCODING_LOCALE_FS ||= "UTF-8";
+ }
+
+ # final fallback
+ $ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8";
+ $ENCODING_LOCALE_FS ||= $ENCODING_LOCALE;
+ $ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE;
+ $ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN;
+
+ unless (Encode::find_encoding($ENCODING_LOCALE)) {
+ my $foundit;
+ if (lc($ENCODING_LOCALE) eq "gb18030") {
+ eval {
+ require Encode::HanExtra;
+ };
+ if ($@) {
+ die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped";
+ }
+ $foundit++ if Encode::find_encoding($ENCODING_LOCALE);
+ }
+ die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped"
+ unless $foundit;
+
+ }
+
+ # use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT;
+}
+
+_init();
+Encode::Alias::define_alias(sub {
+ no strict 'refs';
+ no warnings 'once';
+ return ${"ENCODING_" . uc(shift)};
+}, "locale");
+
+sub _flush_aliases {
+ no strict 'refs';
+ for my $a (keys %Encode::Alias::Alias) {
+ if (defined ${"ENCODING_" . uc($a)}) {
+ delete $Encode::Alias::Alias{$a};
+ warn "Flushed alias cache for $a" if DEBUG;
+ }
+ }
+}
+
+sub reinit {
+ $ENCODING_LOCALE = shift;
+ $ENCODING_LOCALE_FS = shift;
+ $ENCODING_CONSOLE_IN = $ENCODING_LOCALE;
+ $ENCODING_CONSOLE_OUT = $ENCODING_LOCALE;
+ _init();
+ _flush_aliases();
+}
+
+sub decode_argv {
+ die if defined wantarray;
+ for (@ARGV) {
+ $_ = Encode::decode(locale => $_, @_);
+ }
+}
+
+sub env {
+ my $k = Encode::encode(locale => shift);
+ my $old = $ENV{$k};
+ if (@_) {
+ my $v = shift;
+ if (defined $v) {
+ $ENV{$k} = Encode::encode(locale => $v);
+ }
+ else {
+ delete $ENV{$k};
+ }
+ }
+ return Encode::decode(locale => $old) if defined wantarray;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+Encode::Locale - Determine the locale encoding
+
+=head1 SYNOPSIS
+
+ use Encode::Locale;
+ use Encode;
+
+ $string = decode(locale => $bytes);
+ $bytes = encode(locale => $string);
+
+ if (-t) {
+ binmode(STDIN, ":encoding(console_in)");
+ binmode(STDOUT, ":encoding(console_out)");
+ binmode(STDERR, ":encoding(console_out)");
+ }
+
+ # Processing file names passed in as arguments
+ my $uni_filename = decode(locale => $ARGV[0]);
+ open(my $fh, "<", encode(locale_fs => $uni_filename))
+ || die "Can't open '$uni_filename': $!";
+ binmode($fh, ":encoding(locale)");
+ ...
+
+=head1 DESCRIPTION
+
+In many applications it's wise to let Perl use Unicode for the strings it
+processes. Most of the interfaces Perl has to the outside world are still byte
+based. Programs therefore need to decode byte strings that enter the program
+from the outside and encode them again on the way out.
+
+The POSIX locale system is used to specify both the language conventions
+requested by the user and the preferred character set to consume and
+output. The C<Encode::Locale> module looks up the charset and encoding (called
+a CODESET in the locale jargon) and arranges for the L<Encode> module to know
+this encoding under the name "locale". It means bytes obtained from the
+environment can be converted to Unicode strings by calling C<<
+Encode::encode(locale => $bytes) >> and converted back again with C<<
+Encode::decode(locale => $string) >>.
+
+Where file systems interfaces pass file names in and out of the program we also
+need care. The trend is for operating systems to use a fixed file encoding
+that don't actually depend on the locale; and this module determines the most
+appropriate encoding for file names. The L<Encode> module will know this
+encoding under the name "locale_fs". For traditional Unix systems this will
+be an alias to the same encoding as "locale".
+
+For programs running in a terminal window (called a "Console" on some systems)
+the "locale" encoding is usually a good choice for what to expect as input and
+output. Some systems allows us to query the encoding set for the terminal and
+C<Encode::Locale> will do that if available and make these encodings known
+under the C<Encode> aliases "console_in" and "console_out". For systems where
+we can't determine the terminal encoding these will be aliased as the same
+encoding as "locale". The advice is to use "console_in" for input known to
+come from the terminal and "console_out" for output to the terminal.
+
+In addition to arranging for various Encode aliases the following functions and
+variables are provided:
+
+=over
+
+=item decode_argv( )
+
+=item decode_argv( Encode::FB_CROAK )
+
+This will decode the command line arguments to perl (the C<@ARGV> array) in-place.
+
+The function will by default replace characters that can't be decoded by
+"\x{FFFD}", the Unicode replacement character.
+
+Any argument provided is passed as CHECK to underlying Encode::decode() call.
+Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the
+command line arguments can be decoded. See L<Encode/"Handling Malformed Data">
+for details on other options for CHECK.
+
+=item env( $uni_key )
+
+=item env( $uni_key => $uni_value )
+
+Interface to get/set environment variables. Returns the current value as a
+Unicode string. The $uni_key and $uni_value arguments are expected to be
+Unicode strings as well. Passing C<undef> as $uni_value deletes the
+environment variable named $uni_key.
+
+The returned value will have the characters that can't be decoded replaced by
+"\x{FFFD}", the Unicode replacement character.
+
+There is no interface to request alternative CHECK behavior as for
+decode_argv(). If you need that you need to call encode/decode yourself.
+For example:
+
+ my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK);
+ my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK);
+
+=item reinit( )
+
+=item reinit( $encoding )
+
+Reinitialize the encodings from the locale. You want to call this function if
+you changed anything in the environment that might influence the locale.
+
+This function will croak if the determined encoding isn't recognized by
+the Encode module.
+
+With argument force $ENCODING_... variables to set to the given value.
+
+=item $ENCODING_LOCALE
+
+The encoding name determined to be suitable for the current locale.
+L<Encode> know this encoding as "locale".
+
+=item $ENCODING_LOCALE_FS
+
+The encoding name determined to be suitable for file system interfaces
+involving file names.
+L<Encode> know this encoding as "locale_fs".
+
+=item $ENCODING_CONSOLE_IN
+
+=item $ENCODING_CONSOLE_OUT
+
+The encodings to be used for reading and writing output to the a console.
+L<Encode> know these encodings as "console_in" and "console_out".
+
+=back
+
+=head1 NOTES
+
+This table summarizes the mapping of the encodings set up
+by the C<Encode::Locale> module:
+
+ Encode | | |
+ Alias | Windows | Mac OS X | POSIX
+ ------------+---------+--------------+------------
+ locale | ANSI | nl_langinfo | nl_langinfo
+ locale_fs | ANSI | UTF-8 | nl_langinfo
+ console_in | OEM | nl_langinfo | nl_langinfo
+ console_out | OEM | nl_langinfo | nl_langinfo
+
+=head2 Windows
+
+Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16
+strings) and a byte based API based a character set called ANSI. The
+regular Perl interfaces to the OS currently only uses the ANSI APIs.
+Unfortunately ANSI is not a single character set.
+
+The encoding that corresponds to ANSI varies between different editions of
+Windows. For many western editions of Windows ANSI corresponds to CP-1252
+which is a character set similar to ISO-8859-1. Conceptually the ANSI
+character set is a similar concept to the POSIX locale CODESET so this module
+figures out what the ANSI code page is and make this available as
+$ENCODING_LOCALE and the "locale" Encoding alias.
+
+Windows systems also operate with another byte based character set.
+It's called the OEM code page. This is the encoding that the Console
+takes as input and output. It's common for the OEM code page to
+differ from the ANSI code page.
+
+=head2 Mac OS X
+
+On Mac OS X the file system encoding is always UTF-8 while the locale
+can otherwise be set up as normal for POSIX systems.
+
+File names on Mac OS X will at the OS-level be converted to
+NFD-form. A file created by passing a NFC-filename will come
+in NFD-form from readdir(). See L<Unicode::Normalize> for details
+of NFD/NFC.
+
+Actually, Apple does not follow the Unicode NFD standard since not all
+character ranges are decomposed. The claim is that this avoids problems with
+round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for
+details.
+
+=head2 POSIX (Linux and other Unixes)
+
+File systems might vary in what encoding is to be used for
+filenames. Since this module has no way to actually figure out
+what the is correct it goes with the best guess which is to
+assume filenames are encoding according to the current locale.
+Users are advised to always specify UTF-8 as the locale charset.
+
+=head1 SEE ALSO
+
+L<I18N::Langinfo>, L<Encode>, L<Term::Encoding>
+
+=head1 AUTHOR
+
+Copyright 2010 Gisle Aas <gisle@aas.no>.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
diff --git a/t/alias.t b/t/alias.t
new file mode 100644
index 0000000..c18d027
--- /dev/null
+++ b/t/alias.t
@@ -0,0 +1,22 @@
+#!perl -w
+
+use strict;
+use warnings;
+use Test::More tests => 8;
+
+use Encode::Locale;
+use Encode qw(find_encoding);
+
+sub cmp_encoding {
+ my ($arg, $var) = @_;
+ my $lcarg = lc $arg;
+ is find_encoding($lcarg), find_encoding(${ $Encode::Locale::{$var} }),
+ "$lcarg eq $var";
+ is find_encoding($arg), find_encoding(${ $Encode::Locale::{$var} }),
+ "$arg eq $var";
+}
+
+cmp_encoding 'Locale', 'ENCODING_LOCALE';
+cmp_encoding 'Locale_FS', 'ENCODING_LOCALE_FS';
+cmp_encoding 'Console_IN', 'ENCODING_CONSOLE_IN';
+cmp_encoding 'Console_OUT', 'ENCODING_CONSOLE_OUT';
diff --git a/t/arg.t b/t/arg.t
new file mode 100644
index 0000000..a754541
--- /dev/null
+++ b/t/arg.t
@@ -0,0 +1,45 @@
+#!perl -w
+
+use strict;
+use warnings;
+use Test::More;
+
+use Encode::Locale qw($ENCODING_LOCALE decode_argv);
+use Encode;
+use utf8;
+
+diag "ENCODING_LOCALE is $ENCODING_LOCALE\n";
+my @chars = qw(funny chars š ™);
+my @octets = map { Encode::encode(locale => $_) } @chars;
+@ARGV = @octets;
+
+plan tests => scalar(@ARGV);
+
+decode_argv();
+
+TODO: {
+ local $TODO = "ARGV decoding";
+ for (my $i = 0; $i < @ARGV; $i++) {
+ is $chars[$i], $ARGV[$i],
+ "chars(" . prettify($chars[$i]) .
+ ") octets(" . prettify($octets[$i]) .
+ ") argv(" . prettify($ARGV[$i]) . ")";
+ }
+}
+
+sub prettify {
+ my $text = shift;
+ my @r;
+ for (split(//, $text)) {
+ if (ord() > 32 && ord() < 128) {
+ push @r, $_;
+ }
+ elsif (ord() < 256) {
+ push @r, sprintf "\\x%02X", ord();
+ }
+ else {
+ push @r, sprintf "\\x{%04X}", ord();
+ }
+ }
+ join '', @r;
+}
diff --git a/t/env.t b/t/env.t
new file mode 100644
index 0000000..dfc7203
--- /dev/null
+++ b/t/env.t
@@ -0,0 +1,25 @@
+#!perl -w
+
+use strict;
+use warnings;
+use Test::More tests => 13;
+
+use Encode::Locale qw(env);
+
+$ENV{foo} = "bar";
+is env("foo"), "bar", 'env read';
+is env("foo", "baz"), "bar", 'env write retval old value';
+is env("foo"), "baz", 'env write worked';
+is $ENV{foo}, "baz", 'env affected %ENV';
+is env("foo", undef), "baz", 'env write retval old value';
+is env("foo"), undef, 'env write worked';
+ok !exists $ENV{foo}, 'env write undef deletes from %ENV';
+
+Encode::Locale::reinit("cp1252");
+$ENV{"m\xf6ney"} = "\x80uro";
+is env("m\xf6ney", "\x{20AC}"), "\x{20AC}uro", 'env write retval encoded';
+is env("m\xf6ney"), "\x{20AC}", 'env write worked';
+is $ENV{"m\xf6ney"}, "\x80", 'env affected %ENV';
+is env("\x{20AC}", 1), undef, 'env write retval old value';
+is env("\x{20AC}"), 1, 'env write worked';
+is $ENV{"\x80"}, 1, 'env affected %ENV';
diff --git a/t/tain.t b/t/tain.t
new file mode 100644
index 0000000..f38cacf
--- /dev/null
+++ b/t/tain.t
@@ -0,0 +1,19 @@
+#!perl -Tw
+
+use strict;
+use warnings;
+
+# taint mode testing as seen in WWW::Mechanize
+
+use Test::More tests => 1;
+my @warns;
+BEGIN {
+ $SIG{__WARN__} = sub { push @warns, @_ };
+}
+BEGIN {
+ delete @ENV{qw( PATH IFS CDPATH ENV BASH_ENV )}; # Placates taint-unsafe Cwd.pm in 5.6.1
+}
+
+require Encode::Locale;
+
+is "@warns", "", 'no warnings';
diff --git a/t/warn_once.t b/t/warn_once.t
new file mode 100644
index 0000000..e2dc61c
--- /dev/null
+++ b/t/warn_once.t
@@ -0,0 +1,20 @@
+#!perl -w
+
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+my @warns;
+BEGIN {
+ $SIG{__WARN__} = sub { push @warns, @_ };
+}
+
+use Encode::Locale;
+
+BEGIN {
+ use Encode;
+ my $a = encode("UTF-8", "foo\xFF");
+ ok $a, "foo\xC3\xBF";
+}
+
+is "@warns", "", 'no warnings';