diff options
| -rw-r--r-- | Doc/library/codecs.rst | 4 | ||||
| -rw-r--r-- | Doc/library/os.rst | 4 | ||||
| -rw-r--r-- | Lib/test/test_codecs.py | 20 | ||||
| -rw-r--r-- | Lib/test/test_os.py | 4 | ||||
| -rw-r--r-- | Modules/_io/fileio.c | 2 | ||||
| -rw-r--r-- | Modules/posixmodule.c | 10 | ||||
| -rw-r--r-- | Modules/python.c | 2 | ||||
| -rw-r--r-- | Objects/unicodeobject.c | 2 | ||||
| -rw-r--r-- | Python/codecs.c | 12 | 
9 files changed, 30 insertions, 30 deletions
| diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index b7bd125401..e3f98efc68 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -322,7 +322,7 @@ and implemented by all standard Python codecs:  | ``'backslashreplace'``  | Replace with backslashed escape sequences     |  |                         | (only for encoding).                          |  +-------------------------+-----------------------------------------------+ -| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           | +| ``'surrogateescape'``   | Replace byte with surrogate U+DCxx.           |  +-------------------------+-----------------------------------------------+  In addition, the following error handlers are specific to a single codec: @@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:  +-------------------+---------+-------------------------------------------+  .. versionadded:: 3.1 -   The ``'utf8b'`` and ``'surrogatepass'`` error handlers. +   The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.  The set of allowed values can be extended via :meth:`register_error`. diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 83f5ee9dc0..221374048c 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).  .. versionchanged:: 3.1     On some systems, conversion using the file system encoding may -   fail. In this case, Python uses the ``utf8b`` encoding error -   handler, which means that undecodable bytes are replaced by a +   fail. In this case, Python uses the ``surrogateescape`` encoding +   error handler, which means that undecodable bytes are replaced by a     Unicode character U+DCxx on decoding, and these are again     translated to the original byte on encoding. diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 9ca769910b..4ec7b5865c 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1521,32 +1521,32 @@ class TypesTest(unittest.TestCase):          self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))          self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) -class Utf8bTest(unittest.TestCase): +class SurrogateEscapeTest(unittest.TestCase):      def test_utf8(self):          # Bad byte -        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"), +        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),                           "foo\udc80bar") -        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"), +        self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),                           b"foo\x80bar")          # bad-utf-8 encoded surrogate -        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"), +        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),                           "\udced\udcb0\udc80") -        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"), +        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),                           b"\xed\xb0\x80")      def test_ascii(self):          # bad byte -        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"), +        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),                           "foo\udc80bar") -        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"), +        self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),                           b"foo\x80bar")      def test_charmap(self):          # bad byte: \xa5 is unmapped in iso-8859-3 -        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"), +        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),                           "foo\udca5bar") -        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"), +        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),                           b"foo\xa5bar") @@ -1576,7 +1576,7 @@ def test_main():          CharmapTest,          WithStmtTest,          TypesTest, -        Utf8bTest, +        SurrogateEscapeTest,      ) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 014d874f4d..c680d8d77a 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -708,13 +708,13 @@ if sys.platform != 'win32':              self.fsencoding = sys.getfilesystemencoding()              sys.setfilesystemencoding("utf-8")              self.dir = support.TESTFN -            self.bdir = self.dir.encode("utf-8", "utf8b") +            self.bdir = self.dir.encode("utf-8", "surrogateescape")              os.mkdir(self.dir)              self.unicodefn = []              for fn in self.filenames:                  f = open(os.path.join(self.bdir, fn), "w")                  f.close() -                self.unicodefn.append(fn.decode("utf-8", "utf8b")) +                self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))          def tearDown(self):              shutil.rmtree(self.dir) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 164f7e46d1..555dc12c69 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)  				return -1;  			stringobj = PyUnicode_AsEncodedString( -				u, Py_FileSystemDefaultEncoding, "utf8b"); +				u, Py_FileSystemDefaultEncoding, "surrogateescape");  			Py_DECREF(u);  			if (stringobj == NULL)  				return -1; diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2050d5a1a7..21dcb4d963 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -494,13 +494,13 @@ convertenviron(void)  		if (p == NULL)  			continue;  		k = PyUnicode_Decode(*e, (int)(p-*e), -				     Py_FileSystemDefaultEncoding, "utf8b"); +				     Py_FileSystemDefaultEncoding, "surrogateescape");  		if (k == NULL) {  			PyErr_Clear();  			continue;  		}  		v = PyUnicode_Decode(p+1, strlen(p+1), -				     Py_FileSystemDefaultEncoding, "utf8b"); +				     Py_FileSystemDefaultEncoding, "surrogateescape");  		if (v == NULL) {  			PyErr_Clear();  			Py_DECREF(k); @@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)  		return posix_error();  	if (use_bytes)  		return PyBytes_FromStringAndSize(buf, strlen(buf)); -	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b"); +	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");  }  PyDoc_STRVAR(posix_getcwd__doc__, @@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)  			w = PyUnicode_FromEncodedObject(v,  					Py_FileSystemDefaultEncoding, -					"utf8b"); +					"surrogateescape");  			Py_DECREF(v);  			if (w != NULL)  				v = w; @@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)  		w = PyUnicode_FromEncodedObject(v,  				Py_FileSystemDefaultEncoding, -				"utf8b"); +				"surrogateescape");  		if (w != NULL) {  			Py_DECREF(v);  			v = w; diff --git a/Modules/python.c b/Modules/python.c index 4c0a55bb1f..13c6d5b82a 100644 --- a/Modules/python.c +++ b/Modules/python.c @@ -42,7 +42,7 @@ char2wchar(char* arg)  			return res;  		PyMem_Free(res);  	} -	/* Conversion failed. Fall back to escaping with utf8b. */ +	/* Conversion failed. Fall back to escaping with surrogateescape. */  #ifdef HAVE_MBRTOWC  	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3740892e67..3bd1efd939 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)              return 0;          output = PyUnicode_AsEncodedObject(arg,                                              Py_FileSystemDefaultEncoding, -                                           "utf8b"); +                                           "surrogateescape");          Py_DECREF(arg);          if (!output)              return 0; diff --git a/Python/codecs.c b/Python/codecs.c index cd6b7f0f60..d1915f181d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)  }  static PyObject * -PyCodec_UTF8bErrors(PyObject *exc) +PyCodec_SurrogateEscapeErrors(PyObject *exc)  {      PyObject *restuple;      PyObject *object; @@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)      return PyCodec_SurrogatePassErrors(exc);  } -static PyObject *utf8b_errors(PyObject *self, PyObject *exc) +static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)  { -    return PyCodec_UTF8bErrors(exc); +    return PyCodec_SurrogateEscapeErrors(exc);  }  static int _PyCodecRegistry_Init(void) @@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)  	    }  	},  	{ -	    "utf8b", +	    "surrogateescape",  	    { -		"utf8b", -		utf8b_errors, +		"surrogateescape", +		surrogateescape_errors,  		METH_O  	    }  	} | 
