diff options
| author | Nick Coghlan <ncoghlan@gmail.com> | 2017-06-18 12:29:42 +1000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-06-18 12:29:42 +1000 | 
| commit | eb81795d7d3a8c898fa89a376d63fc3bbfb9a081 (patch) | |
| tree | d991860d1d359c87dc4b609c421ad746fbc06d21 /Lib/test | |
| parent | 6a98a04e21fc3209b2170c3930ce4488e89680e3 (diff) | |
| download | cpython-git-eb81795d7d3a8c898fa89a376d63fc3bbfb9a081.tar.gz | |
bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)
- removes PY_WARN_ON_C_LOCALE build time flag
- locale coercion and compatibility warnings are now always compiled
  in, but are off by default
- adds PYTHONCOERCECLOCALE=warn runtime option to aid in
  debugging potentially locale related compatibility problems
Due to not-yet-resolved test failures on *BSD systems (including
Mac OS X), this also temporarily disables UTF-8 as a locale coercion
target, and skips testing the interpreter's behavior in the POSIX locale.
Diffstat (limited to 'Lib/test')
| -rw-r--r-- | Lib/test/test_c_locale_coercion.py | 208 | 
1 files changed, 130 insertions, 78 deletions
| diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index aa0771c798..a4b4626756 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -22,13 +22,23 @@ if sys.platform == "darwin":  else:      C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING -# XXX (ncoghlan): The above is probably still wrong for: +# Note that the above is probably still wrong in some cases, such as:  # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set  # * AIX and any other platforms that use latin-1 in the C locale +# +# Options for dealing with this: +# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't) +# * Fix the test expectations to match the actual platform behaviour  # In order to get the warning messages to match up as expected, the candidate  # order here must much the target locale order in Python/pylifecycle.c -_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") +_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") + +# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to +#                 problems encountered on *BSD systems with those test cases +# For additional details see: +#     nl_langinfo CODESET error: https://bugs.python.org/issue30647 +#     locale handling differences: https://bugs.python.org/issue30672  # There's no reliable cross-platform way of checking locale alias  # lists, so the only way of knowing which of these locales will work @@ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name):      result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)      return result.rc == 0 -_EncodingDetails = namedtuple("EncodingDetails", -                              "fsencoding stdin_info stdout_info stderr_info") +_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" +_EncodingDetails = namedtuple("EncodingDetails", _fields)  class EncodingDetails(_EncodingDetails): +    # XXX (ncoghlan): Using JSON for child state reporting may be less fragile      CHILD_PROCESS_SCRIPT = ";".join([ -        "import sys", +        "import sys, os",          "print(sys.getfilesystemencoding())",          "print(sys.stdin.encoding + ':' + sys.stdin.errors)",          "print(sys.stdout.encoding + ':' + sys.stdout.errors)",          "print(sys.stderr.encoding + ':' + sys.stderr.errors)", +        "print(os.environ.get('LANG', 'not set'))", +        "print(os.environ.get('LC_CTYPE', 'not set'))", +        "print(os.environ.get('LC_ALL', 'not set'))",      ])      @classmethod -    def get_expected_details(cls, fs_encoding, stream_encoding): +    def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):          """Returns expected child process details for a given encoding"""          _stream = stream_encoding + ":{}"          # stdin and stdout should use surrogateescape either because the @@ -61,7 +75,14 @@ class EncodingDetails(_EncodingDetails):          stream_info = 2*[_stream.format("surrogateescape")]          # stderr should always use backslashreplace          stream_info.append(_stream.format("backslashreplace")) -        return dict(cls(fs_encoding, *stream_info)._asdict()) +        expected_lang = env_vars.get("LANG", "not set").lower() +        if coercion_expected: +            expected_lc_ctype = CLI_COERCION_TARGET.lower() +        else: +            expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() +        expected_lc_all = env_vars.get("LC_ALL", "not set").lower() +        env_info = expected_lang, expected_lc_ctype, expected_lc_all +        return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())      @staticmethod      def _handle_output_variations(data): @@ -97,64 +118,20 @@ class EncodingDetails(_EncodingDetails):              result.fail(py_cmd)          # All subprocess outputs in this test case should be pure ASCII          adjusted_output = cls._handle_output_variations(result.out) -        stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() +        stdout_lines = adjusted_output.decode("ascii").splitlines()          child_encoding_details = dict(cls(*stdout_lines)._asdict())          stderr_lines = result.err.decode("ascii").rstrip().splitlines()          return child_encoding_details, stderr_lines -class _ChildProcessEncodingTestCase(unittest.TestCase): -    # Base class to check for expected encoding details in a child process - -    def _check_child_encoding_details(self, -                                      env_vars, -                                      expected_fs_encoding, -                                      expected_stream_encoding, -                                      expected_warning): -        """Check the C locale handling for the given process environment - -        Parameters: -            expected_fs_encoding: expected sys.getfilesystemencoding() result -            expected_stream_encoding: expected encoding for standard streams -            expected_warning: stderr output to expect (if any) -        """ -        result = EncodingDetails.get_child_details(env_vars) -        encoding_details, stderr_lines = result -        self.assertEqual(encoding_details, -                         EncodingDetails.get_expected_details( -                             expected_fs_encoding, -                             expected_stream_encoding)) -        self.assertEqual(stderr_lines, expected_warning) -  # Details of the shared library warning emitted at runtime -LIBRARY_C_LOCALE_WARNING = ( +LEGACY_LOCALE_WARNING = (      "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "      "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "      "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "      "locales is recommended."  ) -@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"), -                     "C locale runtime warning disabled at build time") -class LocaleWarningTests(_ChildProcessEncodingTestCase): -    # Test warning emitted when running in the C locale - -    def test_library_c_locale_warning(self): -        self.maxDiff = None -        for locale_to_set in ("C", "POSIX", "invalid.ascii"): -            # XXX (ncoghlan): Mac OS X doesn't behave as expected in the -            #                 POSIX locale, so we skip that for now -            if sys.platform == "darwin" and locale_to_set == "POSIX": -                continue -            var_dict = { -                "LC_ALL": locale_to_set -            } -            with self.subTest(forced_locale=locale_to_set): -                self._check_child_encoding_details(var_dict, -                                                   C_LOCALE_FS_ENCODING, -                                                   C_LOCALE_STREAM_ENCODING, -                                                   [LIBRARY_C_LOCALE_WARNING]) -  # Details of the CLI locale coercion warning emitted at runtime  CLI_COERCION_WARNING_FMT = (      "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " @@ -163,9 +140,13 @@ CLI_COERCION_WARNING_FMT = (  AVAILABLE_TARGETS = None +CLI_COERCION_TARGET = None +CLI_COERCION_WARNING = None  def setUpModule():      global AVAILABLE_TARGETS +    global CLI_COERCION_TARGET +    global CLI_COERCION_WARNING      if AVAILABLE_TARGETS is not None:          # initialization already done @@ -177,26 +158,57 @@ def setUpModule():          if _set_locale_in_subprocess(target_locale):              AVAILABLE_TARGETS.append(target_locale) +    if AVAILABLE_TARGETS: +        # Coercion is expected to use the first available target locale +        CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] +        CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) -class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): -    # Base class for test cases that rely on coercion targets being defined +class _LocaleHandlingTestCase(unittest.TestCase): +    # Base class to check expected locale handling behaviour -    @classmethod -    def setUpClass(cls): -        if not AVAILABLE_TARGETS: -            raise unittest.SkipTest("No C-with-UTF-8 locale available") +    def _check_child_encoding_details(self, +                                      env_vars, +                                      expected_fs_encoding, +                                      expected_stream_encoding, +                                      expected_warnings, +                                      coercion_expected): +        """Check the C locale handling for the given process environment +        Parameters: +            expected_fs_encoding: expected sys.getfilesystemencoding() result +            expected_stream_encoding: expected encoding for standard streams +            expected_warning: stderr output to expect (if any) +        """ +        result = EncodingDetails.get_child_details(env_vars) +        encoding_details, stderr_lines = result +        expected_details = EncodingDetails.get_expected_details( +            coercion_expected, +            expected_fs_encoding, +            expected_stream_encoding, +            env_vars +        ) +        self.assertEqual(encoding_details, expected_details) +        if expected_warnings is None: +            expected_warnings = [] +        self.assertEqual(stderr_lines, expected_warnings) -class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): + +class LocaleConfigurationTests(_LocaleHandlingTestCase):      # Test explicit external configuration via the process environment +    def setUpClass(): +        # This relies on setupModule() having been run, so it can't be +        # handled via the @unittest.skipUnless decorator +        if not AVAILABLE_TARGETS: +            raise unittest.SkipTest("No C-with-UTF-8 locale available") +      def test_external_target_locale_configuration(self): +          # Explicitly setting a target locale should give the same behaviour as          # is seen when implicitly coercing to that target locale          self.maxDiff = None -        expected_warning = []          expected_fs_encoding = "utf-8"          expected_stream_encoding = "utf-8" @@ -209,6 +221,7 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):              for locale_to_set in AVAILABLE_TARGETS:                  # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as                  #                 expected, so skip that combination for now +                # See https://bugs.python.org/issue30672 for discussion                  if env_var == "LANG" and locale_to_set == "UTF-8":                      continue @@ -219,17 +232,23 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):                      self._check_child_encoding_details(var_dict,                                                         expected_fs_encoding,                                                         expected_stream_encoding, -                                                       expected_warning) +                                                       expected_warnings=None, +                                                       coercion_expected=False)  @test.support.cpython_only  @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),                       "C locale coercion disabled at build time") -class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): +class LocaleCoercionTests(_LocaleHandlingTestCase):      # Test implicit reconfiguration of the environment during CLI startup -    def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale): +    def _check_c_locale_coercion(self, +                                 fs_encoding, stream_encoding, +                                 coerce_c_locale, +                                 expected_warnings=None, +                                 coercion_expected=True, +                                 **extra_vars):          """Check the C locale handling for various configurations          Parameters: @@ -238,27 +257,31 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):              coerce_c_locale: setting to use for PYTHONCOERCECLOCALE                None: don't set the variable at all                str: the value set in the child's environment +            expected_warnings: expected warning lines on stderr +            extra_vars: additional environment variables to set in subprocess          """ - -        # Check for expected warning on stderr if C locale is coerced          self.maxDiff = None -        expected_warning = [] -        if coerce_c_locale != "0": -            # Expect coercion to use the first available locale -            warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0]) -            expected_warning.append(warning_msg) +        if not AVAILABLE_TARGETS: +            # Locale coercion is disabled when there aren't any target locales +            fs_encoding = C_LOCALE_FS_ENCODING +            stream_encoding = C_LOCALE_STREAM_ENCODING +            coercion_expected = False +            if expected_warnings: +                expected_warnings = [LEGACY_LOCALE_WARNING]          base_var_dict = {              "LANG": "",              "LC_CTYPE": "",              "LC_ALL": "",          } +        base_var_dict.update(extra_vars)          for env_var in ("LANG", "LC_CTYPE"):              for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): -                # XXX (ncoghlan): Mac OS X doesn't behave as expected in the +                # XXX (ncoghlan): *BSD platforms don't behave as expected in the                  #                 POSIX locale, so we skip that for now -                if sys.platform == "darwin" and locale_to_set == "POSIX": +                # See https://bugs.python.org/issue30672 for discussion +                if locale_to_set == "POSIX":                      continue                  with self.subTest(env_var=env_var,                                    nominal_locale=locale_to_set, @@ -267,33 +290,62 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):                      var_dict[env_var] = locale_to_set                      if coerce_c_locale is not None:                          var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale +                    # Check behaviour on successful coercion                      self._check_child_encoding_details(var_dict,                                                         fs_encoding,                                                         stream_encoding, -                                                       expected_warning) +                                                       expected_warnings, +                                                       coercion_expected)      def test_test_PYTHONCOERCECLOCALE_not_set(self):          # This should coerce to the first available target locale by default          self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)      def test_PYTHONCOERCECLOCALE_not_zero(self): -        # *Any* string other that "0" is considered "set" for our purposes +        # *Any* string other than "0" is considered "set" for our purposes          # and hence should result in the locale coercion being enabled          for setting in ("", "1", "true", "false"):              self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) +    def test_PYTHONCOERCECLOCALE_set_to_warn(self): +        # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales +        self._check_c_locale_coercion("utf-8", "utf-8", +                                      coerce_c_locale="warn", +                                      expected_warnings=[CLI_COERCION_WARNING]) + +      def test_PYTHONCOERCECLOCALE_set_to_zero(self):          # The setting "0" should result in the locale coercion being disabled          self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,                                        C_LOCALE_STREAM_ENCODING, -                                      coerce_c_locale="0") +                                      coerce_c_locale="0", +                                      coercion_expected=False) +        # Setting LC_ALL=C shouldn't make any difference to the behaviour +        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, +                                      C_LOCALE_STREAM_ENCODING, +                                      coerce_c_locale="0", +                                      LC_ALL="C", +                                      coercion_expected=False) +    def test_LC_ALL_set_to_C(self): +        # Setting LC_ALL should render the locale coercion ineffective +        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, +                                      C_LOCALE_STREAM_ENCODING, +                                      coerce_c_locale=None, +                                      LC_ALL="C", +                                      coercion_expected=False) +        # And result in a warning about a lack of locale compatibility +        self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, +                                      C_LOCALE_STREAM_ENCODING, +                                      coerce_c_locale="warn", +                                      LC_ALL="C", +                                      expected_warnings=[LEGACY_LOCALE_WARNING], +                                      coercion_expected=False)  def test_main():      test.support.run_unittest(          LocaleConfigurationTests, -        LocaleCoercionTests, -        LocaleWarningTests +        LocaleCoercionTests      )      test.support.reap_children() | 
