From b3ecd02236e58386ac4d7566ef70e751ff0d7e26 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Fri, 16 Oct 2015 23:49:02 +0200 Subject: distbuild: Avoid UnicodeEncodeError when writing build output to log files Text encoding in Python 2 is a total mess so I can only pretend to understand what's going on. The 'stdout' and 'stderr' messages are Python 'unicode' instances, which isn't really important, but I know that because when we try to write them the log file, and they contain non-ASCII data, we see this error: File "/usr/lib/python2.7/site-packages/distbuild/initiator.py", line 231, in _handle_step_output_message f.write(msg['stdout']) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2022' in position 29: ordinal not in range(128) Who said anything about encoding 'unicode' to 'ascii'? It turns out that when you write to a file, Python implicitly tries to encode the data to the 'default encoding', which happens to be 'ascii'. You lose! The only way to fix this is to tell Python that the file has a different encoding, a nice one like UTF-8. (I tried opening the file with 'b' mode, that doesn't seem to help). UTF-8 can only encode valid Unicode data, of course, so we need to make sure the data we write is valid UTF-8. You can to this by calling decode('unicode-escape'), which converts *from* Unicode *to* Unicode, but replacing any invalid characters with escape codes so that we don't get any UnicodeDecodeErrors during the conversion, or when we try to write it to the UTF-8 file. See this presentation for more info: http://farmdev.com/talks/unicode/ Or just use Python 3. Change-Id: I6316d346f5cca2c75f198b48ec9878ac647ae7e5 --- distbuild/initiator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/distbuild/initiator.py b/distbuild/initiator.py index f2cb77e9..e02f6404 100644 --- a/distbuild/initiator.py +++ b/distbuild/initiator.py @@ -16,11 +16,13 @@ import cliapp + +import codecs import itertools import logging import os -import uuid import time +import uuid import distbuild @@ -194,7 +196,7 @@ class Initiator(distbuild.StateMachine): path = self._get_step_output_dir() filename = os.path.join(path, 'build-step-%s.log' % msg['step_name']) - f = open(filename, 'a') + f = codecs.open(filename, 'a', encoding='utf-8') self._step_outputs[msg['step_name']] = f def _close_output(self, msg): -- cgit v1.2.1