summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon McVittie <simon.mcvittie@collabora.co.uk>2012-06-25 17:01:51 +0100
committerSimon McVittie <simon.mcvittie@collabora.co.uk>2012-06-25 17:01:51 +0100
commitf6066573d25508f5cbbc5c12254086d419bb8828 (patch)
tree470a9a7eb3dc658b2886d5539ba6363792cc7698
parent4a0f4379d4a5783d576aec90019a39459eff007d (diff)
downloaddbus-python-f6066573d25508f5cbbc5c12254086d419bb8828.tar.gz
fd.o #40817: validate UTF-8 according to the same rules as libdbusdbus-python-1.1.1
-rw-r--r--NEWS8
-rw-r--r--_dbus_bindings/message-append.c102
-rw-r--r--configure.ac4
-rwxr-xr-xtest/test-standalone.py59
4 files changed, 143 insertions, 30 deletions
diff --git a/NEWS b/NEWS
index 178809f..2d579af 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,16 @@
D-Bus Python Bindings 1.1.1 (UNRELEASED)
========================================
+Dependencies:
+
+* libdbus 1.6 or later is now recommended. It is not strictly required yet.
+
Fixes:
+• Validate UTF-8 according to the rules libdbus uses, falling back to our
+ own (inefficient) implementation if not compiled against dbus >= 1.6
+ (fd.o #40817)
+
• Under Python 3, in the absence of introspection or signature='...',
pass dbus.ObjectPath or dbus.Signature arguments with the obvious
signature 'o' or 'g', not 's'. This previously only worked in Python 2.
diff --git a/_dbus_bindings/message-append.c b/_dbus_bindings/message-append.c
index df3190d..e519ae2 100644
--- a/_dbus_bindings/message-append.c
+++ b/_dbus_bindings/message-append.c
@@ -531,6 +531,7 @@ _message_iter_append_string(DBusMessageIter *appender,
dbus_bool_t allow_object_path_attr)
{
char *s;
+ PyObject *utf8;
if (sig_type == DBUS_TYPE_OBJECT_PATH && allow_object_path_attr) {
PyObject *object_path = get_object_path (obj);
@@ -550,44 +551,87 @@ _message_iter_append_string(DBusMessageIter *appender,
}
if (PyBytes_Check(obj)) {
- PyObject *unicode;
-
- /* Raise TypeError if the string has embedded NULs */
- if (PyBytes_AsStringAndSize(obj, &s, NULL) < 0) return -1;
- /* Surely there's a faster stdlib way to validate UTF-8... */
- unicode = PyUnicode_DecodeUTF8(s, PyBytes_GET_SIZE(obj), NULL);
- if (!unicode) {
- PyErr_SetString(PyExc_UnicodeError, "String parameters "
- "to be sent over D-Bus must be valid UTF-8");
- return -1;
- }
- Py_CLEAR(unicode);
-
- DBG("Performing actual append: string %s", s);
- if (!dbus_message_iter_append_basic(appender, sig_type,
- &s)) {
- PyErr_NoMemory();
- return -1;
- }
+ utf8 = obj;
+ Py_INCREF(obj);
}
else if (PyUnicode_Check(obj)) {
- PyObject *utf8 = PyUnicode_AsUTF8String(obj);
+ utf8 = PyUnicode_AsUTF8String(obj);
if (!utf8) return -1;
- /* Raise TypeError if the string has embedded NULs */
- if (PyBytes_AsStringAndSize(utf8, &s, NULL) < 0) return -1;
- DBG("Performing actual append: string (from unicode) %s", s);
- if (!dbus_message_iter_append_basic(appender, sig_type, &s)) {
- Py_CLEAR(utf8);
- PyErr_NoMemory();
- return -1;
- }
- Py_CLEAR(utf8);
}
else {
PyErr_SetString(PyExc_TypeError,
"Expected a string or unicode object");
return -1;
}
+
+ /* Raise TypeError if the string has embedded NULs */
+ if (PyBytes_AsStringAndSize(utf8, &s, NULL) < 0)
+ return -1;
+
+ /* Validate UTF-8, strictly */
+#ifdef HAVE_DBUS_VALIDATE_UTF8
+ if (!dbus_validate_utf8(s, NULL)) {
+ PyErr_SetString(PyExc_UnicodeError, "String parameters "
+ "to be sent over D-Bus must be valid UTF-8 "
+ "with no noncharacter code points");
+ return -1;
+ }
+#else
+ {
+ PyObject *back_to_unicode;
+ PyObject *utf32;
+ Py_ssize_t i;
+
+ /* This checks for syntactically valid UTF-8, but does not check
+ * for noncharacters (U+nFFFE, U+nFFFF for any n, or U+FDD0..U+FDEF).
+ */
+ back_to_unicode = PyUnicode_DecodeUTF8(s, PyBytes_GET_SIZE(utf8),
+ "strict");
+
+ if (!back_to_unicode) {
+ return -1;
+ }
+
+ utf32 = PyUnicode_AsUTF32String(back_to_unicode);
+ Py_CLEAR(back_to_unicode);
+
+ if (!utf32) {
+ return -1;
+ }
+
+ for (i = 0; i < PyBytes_GET_SIZE(utf32) / 4; i++) {
+ dbus_uint32_t *p;
+
+ p = (dbus_uint32_t *) (PyBytes_AS_STRING(utf32)) + i;
+
+ if (/* noncharacters U+nFFFE, U+nFFFF */
+ (*p & 0xFFFF) == 0xFFFE ||
+ (*p & 0xFFFF) == 0xFFFF ||
+ /* noncharacters U+FDD0..U+FDEF */
+ (*p >= 0xFDD0 && *p <= 0xFDEF) ||
+ /* surrogates U+D800..U+DBFF (low), U+DC00..U+DFFF (high) */
+ (*p >= 0xD800 && *p <= 0xDFFF) ||
+ (*p >= 0x110000)) {
+ Py_CLEAR(utf32);
+ PyErr_SetString(PyExc_UnicodeError, "String parameters "
+ "to be sent over D-Bus must be valid UTF-8 "
+ "with no noncharacter code points");
+ return -1;
+ }
+ }
+
+ Py_CLEAR(utf32);
+ }
+#endif
+
+ DBG("Performing actual append: string (from unicode) %s", s);
+ if (!dbus_message_iter_append_basic(appender, sig_type, &s)) {
+ Py_CLEAR(utf8);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ Py_CLEAR(utf8);
return 0;
}
diff --git a/configure.ac b/configure.ac
index ea99996..ad644d9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -158,9 +158,13 @@ PKG_CHECK_MODULES(DBUS, [dbus-1 >= 1.4])
PKG_CHECK_MODULES(DBUS_GLIB, [dbus-glib-1 >= 0.70])
dbuspy_save_CFLAGS="$CFLAGS"
+dbuspy_save_LIBS="$LIBS"
CFLAGS="$CFLAGS $DBUS_CFLAGS"
+LIBS="$CFLAGS $DBUS_LIBS"
AC_CHECK_TYPES([DBusBasicValue], [], [], [#include <dbus/dbus.h>])
+AC_CHECK_FUNCS([dbus_validate_utf8])
CFLAGS="$dbuspy_save_CFLAGS"
+LIBS="$dbuspy_save_LIBS"
TP_COMPILER_WARNINGS([CFLAGS_WARNINGS], [test] dbus_python_released [= 0],
[all \
diff --git a/test/test-standalone.py b/test/test-standalone.py
index 6f403ee..584ba4f 100755
--- a/test/test-standalone.py
+++ b/test/test-standalone.py
@@ -423,6 +423,64 @@ class TestMessageMarshalling(unittest.TestCase):
raise AssertionError('Appending too many things in a struct '
'should fail')
+ def test_utf8(self):
+ from _dbus_bindings import SignalMessage
+ if is_py3:
+ def utf8(*xs):
+ return bytes(xs)
+ def uni(x):
+ return chr(x)
+ else:
+ def utf8(*xs):
+ return str('').join(map(chr, xs))
+ def uni(x):
+ return unichr(x)
+ for bad in [
+ uni(0xD800),
+ utf8(0xed, 0xa0, 0x80),
+ uni(0xFDD0),
+ utf8(0xef, 0xb7, 0x90),
+ uni(0xFDD7),
+ utf8(0xef, 0xb7, 0x97),
+ uni(0xFDEF),
+ utf8(0xef, 0xb7, 0xaf),
+ uni(0xFFFE),
+ utf8(0xef, 0xbf, 0xbe),
+ uni(0xFFFF),
+ utf8(0xef, 0xbf, 0xbf),
+ uni(0x0001FFFE),
+ utf8(0xf0, 0x9f, 0xbf, 0xbe),
+ uni(0x0001FFFF),
+ utf8(0xf0, 0x9f, 0xbf, 0xbf),
+ uni(0x0007FFFE),
+ utf8(0xf1, 0xbf, 0xbf, 0xbe),
+ uni(0x0007FFFF),
+ utf8(0xf1, 0xbf, 0xbf, 0xbf),
+ uni(0x0010FFFE),
+ utf8(0xf4, 0x8f, 0xbf, 0xbe),
+ uni(0x0010FFFF),
+ utf8(0xf4, 0x8f, 0xbf, 0xbf),
+ ]:
+ s = SignalMessage('/', 'foo.bar', 'baz')
+ try:
+ s.append(bad, signature='s')
+ except UnicodeError:
+ pass
+ else:
+ raise AssertionError('Appending %r should fail' % bad)
+ for good in [
+ uni(0xfdcf),
+ uni(0xfdf0),
+ uni(0xfeff),
+ uni(0x0001feff),
+ uni(0x00020000),
+ uni(0x0007feff),
+ uni(0x00080000),
+ uni(0x0010feff),
+ ]:
+ s = SignalMessage('/', 'foo.bar', 'baz')
+ s.append(good, signature='s')
+ s.append(good.encode('utf-8'), signature='s')
class TestMatching(unittest.TestCase):
def setUp(self):
@@ -442,7 +500,6 @@ class TestMatching(unittest.TestCase):
self._message.append('/', signature='o')
self.assertFalse(self._match.maybe_handle_message(self._message))
-
if __name__ == '__main__':
# Python 2.6 doesn't accept a `verbosity` keyword.
kwargs = {}