From f6066573d25508f5cbbc5c12254086d419bb8828 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Mon, 25 Jun 2012 17:01:51 +0100 Subject: fd.o #40817: validate UTF-8 according to the same rules as libdbus --- NEWS | 8 ++++ _dbus_bindings/message-append.c | 102 ++++++++++++++++++++++++++++------------ configure.ac | 4 ++ test/test-standalone.py | 59 ++++++++++++++++++++++- 4 files changed, 143 insertions(+), 30 deletions(-) diff --git a/NEWS b/NEWS index 178809f..2d579af 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,16 @@ D-Bus Python Bindings 1.1.1 (UNRELEASED) ======================================== +Dependencies: + +* libdbus 1.6 or later is now recommended. It is not strictly required yet. + Fixes: +• Validate UTF-8 according to the rules libdbus uses, falling back to our + own (inefficient) implementation if not compiled against dbus >= 1.6 + (fd.o #40817) + • Under Python 3, in the absence of introspection or signature='...', pass dbus.ObjectPath or dbus.Signature arguments with the obvious signature 'o' or 'g', not 's'. This previously only worked in Python 2. diff --git a/_dbus_bindings/message-append.c b/_dbus_bindings/message-append.c index df3190d..e519ae2 100644 --- a/_dbus_bindings/message-append.c +++ b/_dbus_bindings/message-append.c @@ -531,6 +531,7 @@ _message_iter_append_string(DBusMessageIter *appender, dbus_bool_t allow_object_path_attr) { char *s; + PyObject *utf8; if (sig_type == DBUS_TYPE_OBJECT_PATH && allow_object_path_attr) { PyObject *object_path = get_object_path (obj); @@ -550,44 +551,87 @@ _message_iter_append_string(DBusMessageIter *appender, } if (PyBytes_Check(obj)) { - PyObject *unicode; - - /* Raise TypeError if the string has embedded NULs */ - if (PyBytes_AsStringAndSize(obj, &s, NULL) < 0) return -1; - /* Surely there's a faster stdlib way to validate UTF-8... */ - unicode = PyUnicode_DecodeUTF8(s, PyBytes_GET_SIZE(obj), NULL); - if (!unicode) { - PyErr_SetString(PyExc_UnicodeError, "String parameters " - "to be sent over D-Bus must be valid UTF-8"); - return -1; - } - Py_CLEAR(unicode); - - DBG("Performing actual append: string %s", s); - if (!dbus_message_iter_append_basic(appender, sig_type, - &s)) { - PyErr_NoMemory(); - return -1; - } + utf8 = obj; + Py_INCREF(obj); } else if (PyUnicode_Check(obj)) { - PyObject *utf8 = PyUnicode_AsUTF8String(obj); + utf8 = PyUnicode_AsUTF8String(obj); if (!utf8) return -1; - /* Raise TypeError if the string has embedded NULs */ - if (PyBytes_AsStringAndSize(utf8, &s, NULL) < 0) return -1; - DBG("Performing actual append: string (from unicode) %s", s); - if (!dbus_message_iter_append_basic(appender, sig_type, &s)) { - Py_CLEAR(utf8); - PyErr_NoMemory(); - return -1; - } - Py_CLEAR(utf8); } else { PyErr_SetString(PyExc_TypeError, "Expected a string or unicode object"); return -1; } + + /* Raise TypeError if the string has embedded NULs */ + if (PyBytes_AsStringAndSize(utf8, &s, NULL) < 0) + return -1; + + /* Validate UTF-8, strictly */ +#ifdef HAVE_DBUS_VALIDATE_UTF8 + if (!dbus_validate_utf8(s, NULL)) { + PyErr_SetString(PyExc_UnicodeError, "String parameters " + "to be sent over D-Bus must be valid UTF-8 " + "with no noncharacter code points"); + return -1; + } +#else + { + PyObject *back_to_unicode; + PyObject *utf32; + Py_ssize_t i; + + /* This checks for syntactically valid UTF-8, but does not check + * for noncharacters (U+nFFFE, U+nFFFF for any n, or U+FDD0..U+FDEF). + */ + back_to_unicode = PyUnicode_DecodeUTF8(s, PyBytes_GET_SIZE(utf8), + "strict"); + + if (!back_to_unicode) { + return -1; + } + + utf32 = PyUnicode_AsUTF32String(back_to_unicode); + Py_CLEAR(back_to_unicode); + + if (!utf32) { + return -1; + } + + for (i = 0; i < PyBytes_GET_SIZE(utf32) / 4; i++) { + dbus_uint32_t *p; + + p = (dbus_uint32_t *) (PyBytes_AS_STRING(utf32)) + i; + + if (/* noncharacters U+nFFFE, U+nFFFF */ + (*p & 0xFFFF) == 0xFFFE || + (*p & 0xFFFF) == 0xFFFF || + /* noncharacters U+FDD0..U+FDEF */ + (*p >= 0xFDD0 && *p <= 0xFDEF) || + /* surrogates U+D800..U+DBFF (low), U+DC00..U+DFFF (high) */ + (*p >= 0xD800 && *p <= 0xDFFF) || + (*p >= 0x110000)) { + Py_CLEAR(utf32); + PyErr_SetString(PyExc_UnicodeError, "String parameters " + "to be sent over D-Bus must be valid UTF-8 " + "with no noncharacter code points"); + return -1; + } + } + + Py_CLEAR(utf32); + } +#endif + + DBG("Performing actual append: string (from unicode) %s", s); + if (!dbus_message_iter_append_basic(appender, sig_type, &s)) { + Py_CLEAR(utf8); + PyErr_NoMemory(); + return -1; + } + + Py_CLEAR(utf8); return 0; } diff --git a/configure.ac b/configure.ac index ea99996..ad644d9 100644 --- a/configure.ac +++ b/configure.ac @@ -158,9 +158,13 @@ PKG_CHECK_MODULES(DBUS, [dbus-1 >= 1.4]) PKG_CHECK_MODULES(DBUS_GLIB, [dbus-glib-1 >= 0.70]) dbuspy_save_CFLAGS="$CFLAGS" +dbuspy_save_LIBS="$LIBS" CFLAGS="$CFLAGS $DBUS_CFLAGS" +LIBS="$CFLAGS $DBUS_LIBS" AC_CHECK_TYPES([DBusBasicValue], [], [], [#include ]) +AC_CHECK_FUNCS([dbus_validate_utf8]) CFLAGS="$dbuspy_save_CFLAGS" +LIBS="$dbuspy_save_LIBS" TP_COMPILER_WARNINGS([CFLAGS_WARNINGS], [test] dbus_python_released [= 0], [all \ diff --git a/test/test-standalone.py b/test/test-standalone.py index 6f403ee..584ba4f 100755 --- a/test/test-standalone.py +++ b/test/test-standalone.py @@ -423,6 +423,64 @@ class TestMessageMarshalling(unittest.TestCase): raise AssertionError('Appending too many things in a struct ' 'should fail') + def test_utf8(self): + from _dbus_bindings import SignalMessage + if is_py3: + def utf8(*xs): + return bytes(xs) + def uni(x): + return chr(x) + else: + def utf8(*xs): + return str('').join(map(chr, xs)) + def uni(x): + return unichr(x) + for bad in [ + uni(0xD800), + utf8(0xed, 0xa0, 0x80), + uni(0xFDD0), + utf8(0xef, 0xb7, 0x90), + uni(0xFDD7), + utf8(0xef, 0xb7, 0x97), + uni(0xFDEF), + utf8(0xef, 0xb7, 0xaf), + uni(0xFFFE), + utf8(0xef, 0xbf, 0xbe), + uni(0xFFFF), + utf8(0xef, 0xbf, 0xbf), + uni(0x0001FFFE), + utf8(0xf0, 0x9f, 0xbf, 0xbe), + uni(0x0001FFFF), + utf8(0xf0, 0x9f, 0xbf, 0xbf), + uni(0x0007FFFE), + utf8(0xf1, 0xbf, 0xbf, 0xbe), + uni(0x0007FFFF), + utf8(0xf1, 0xbf, 0xbf, 0xbf), + uni(0x0010FFFE), + utf8(0xf4, 0x8f, 0xbf, 0xbe), + uni(0x0010FFFF), + utf8(0xf4, 0x8f, 0xbf, 0xbf), + ]: + s = SignalMessage('/', 'foo.bar', 'baz') + try: + s.append(bad, signature='s') + except UnicodeError: + pass + else: + raise AssertionError('Appending %r should fail' % bad) + for good in [ + uni(0xfdcf), + uni(0xfdf0), + uni(0xfeff), + uni(0x0001feff), + uni(0x00020000), + uni(0x0007feff), + uni(0x00080000), + uni(0x0010feff), + ]: + s = SignalMessage('/', 'foo.bar', 'baz') + s.append(good, signature='s') + s.append(good.encode('utf-8'), signature='s') class TestMatching(unittest.TestCase): def setUp(self): @@ -442,7 +500,6 @@ class TestMatching(unittest.TestCase): self._message.append('/', signature='o') self.assertFalse(self._match.maybe_handle_message(self._message)) - if __name__ == '__main__': # Python 2.6 doesn't accept a `verbosity` keyword. kwargs = {} -- cgit v1.2.1