1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# coding=utf-8
"""
Test that incoming messages containing invalid UTF-8
don't make Idle fall off the bus. This is a regression test for
bugs similar to <https://bugs.freedesktop.org/show_bug.cgi?id=30741>.
"""
from idletest import exec_test
from servicetest import assertEquals
import re
def test(q, bus, conn, stream):
conn.Connect()
q.expect('dbus-signal', signal='StatusChanged', args=[0, 1])
test_with_message(q, stream, ["I'm no ", " Buddhist"])
test_with_message(q, stream, ["björk"] * 3)
test_with_message(q, stream, ["", "lolllllll"])
test_with_message(q, stream, ["hello", ""])
test_with_message(q, stream, "I am a stabbing robot".split(" "))
# This is the UTF-8 encoding of U+D800, which is not valid
# (not even as a noncharacter). We previously did this test with
# noncharacters, but Unicode Corrigendum #9 explicitly allows noncharacters
# to be interchanged, GLib 2.36 allows them when validating UTF-8,
# and D-Bus 1.6.10 will do likewise.
WELL_FORMED_BUT_INVALID_UTF8_BYTES = "\xed\xa0\x80"
def test_with_message(q, stream, parts):
invalid_utf8 = WELL_FORMED_BUT_INVALID_UTF8_BYTES.join(parts)
# Idle's default character set is UTF-8. We send it a message which is
# basically UTF-8, except that one of its code points is invalid.
stream.sendMessage('PRIVMSG', stream.nick, ':%s' % invalid_utf8,
prefix='remoteuser')
# Idle should signal that *something* was received. If it hasn't validated
# & sanitized the message properly, the dbus-daemon will kick it off.
signal = q.expect('dbus-signal', signal='MessageReceived')
message_parts = signal.args[0]
text_plain = message_parts[1]
content = text_plain['content']
# Don't make any assumption about how many U+FFFD REPLACEMENT CHARACTERs
# are used to replace surprising bytes.
received_parts = [ part for part in re.split("\ufffd|\\?", content)
if part != ''
]
if parts[0] == 'björk':
# The valid UTF-8 gets lost in transit, because we fall back
# to assuming ASCII when g_convert() fails (this didn't happen
# when we tested with noncharacters - oh well).
assertEquals(['bj', 'rk', 'bj', 'rk', 'bj', 'rk'], received_parts)
else:
assertEquals([s for s in parts if s != ''], received_parts)
if __name__ == '__main__':
exec_test(test)
|