Make available to log encoded strings as argumentsmitaka-eol stable/mitaka

Pure python logging library allows to log encoded strings but in oslo_log it causes UnicodeDecodeError for arguments which contains unicode symbols. Changes add additional check whether all arguments can be converted to unicode. If it isn't possible then leave it as is. Change-Id: I76e75c35a74b146fae52c0ea60156e1cf78540e7 Closes-Bug: #1563295
author: Andrey Volkov <avolkov@mirantis.com> 2016-05-31 20:14:40 +0300
committer: Andrey Volkov <avolkov@mirantis.com> 2016-09-16 16:43:14 +0300
commit: a2e7f6597ee7f57936cdf2b760068e556cad9a59 (patch)
tree: 2b75883257ad1acef0ebe4edf61489fcb75d5a9a
parent: 213aa9c6f5fa9f7a4ab4cbd2e66af32118676674 (diff)
download: oslo-log-stable/mitaka.tar.gz
3 files changed, 46 insertions, 25 deletions
diff --git a/oslo_log/formatters.py b/oslo_log/formatters.py
index 4ffd444..69f6740 100644
--- a/oslo_log/formatters.py
+++ b/oslo_log/formatters.py
@@ -25,6 +25,7 @@ from six import moves
 
 from oslo_context import context as context_utils
 from oslo_serialization import jsonutils
+from oslo_utils import encodeutils
 
 
 def _dictify_context(context):
@@ -64,6 +65,19 @@ def _update_record_with_context(record):
     return context
 
 
+def _ensure_unicode(msg):
+    """Do our best to turn the input argument into a unicode object.
+    """
+    if isinstance(msg, six.text_type):
+        return msg
+    if not isinstance(msg, six.binary_type):
+        return six.text_type(msg)
+    return encodeutils.safe_decode(
+        msg,
+        incoming='utf-8',
+        errors='xmlcharrefreplace')
+
+
 class _ReplaceFalseValue(dict):
     def __getitem__(self, key):
         return dict.get(self, key, None) or '-'
@@ -179,11 +193,17 @@ class ContextFormatter(logging.Formatter):
     def format(self, record):
         """Uses contextstring if request_id is set, otherwise default."""
 
-        # NOTE(jecarey): If msg is not unicode, coerce it into unicode
-        #                before it can get to the python logging and
-        #                possibly cause string encoding trouble
-        if not isinstance(record.msg, six.text_type):
-            record.msg = six.text_type(record.msg)
+        if six.PY2:
+            should_use_unicode = True
+            for arg in record.args:
+                try:
+                    six.text_type(arg)
+                except UnicodeDecodeError:
+                    should_use_unicode = False
+                    break
+            if (not isinstance(record.msg, six.text_type)
+                    and should_use_unicode):
+                record.msg = _ensure_unicode(record.msg)
 
         # store project info
         record.project = self.project
diff --git a/oslo_log/log.py b/oslo_log/log.py
index 3d06d20..337f17a 100644
--- a/oslo_log/log.py
+++ b/oslo_log/log.py
@@ -40,7 +40,6 @@ except ImportError:
 import traceback
 
 from oslo_config import cfg
-from oslo_utils import encodeutils
 from oslo_utils import importutils
 import six
 from six import moves
@@ -92,21 +91,6 @@ class BaseLoggerAdapter(logging.LoggerAdapter):
         self.log(TRACE, msg, *args, **kwargs)
 
 
-def _ensure_unicode(msg):
-    """Do our best to turn the input argument into a unicode object.
-    """
-    if not isinstance(msg, six.text_type):
-        if isinstance(msg, six.binary_type):
-            msg = encodeutils.safe_decode(
-                msg,
-                incoming='utf-8',
-                errors='xmlcharrefreplace',
-            )
-        else:
-            msg = six.text_type(msg)
-    return msg
-
-
 class KeywordArgumentAdapter(BaseLoggerAdapter):
     """Logger adapter to add keyword arguments to log record's extra data
 
@@ -126,7 +110,6 @@ class KeywordArgumentAdapter(BaseLoggerAdapter):
     """
 
     def process(self, msg, kwargs):
-        msg = _ensure_unicode(msg)
         # Make a new extra dictionary combining the values we were
         # given when we were constructed and anything from kwargs.
         extra = {}
diff --git a/oslo_log/tests/unit/test_log.py b/oslo_log/tests/unit/test_log.py
index 77b9d45..dd61d58 100644
--- a/oslo_log/tests/unit/test_log.py
+++ b/oslo_log/tests/unit/test_log.py
@@ -353,6 +353,14 @@ class JSONFormatterTestCase(LogTestBase):
         self.assertEqual(logging.ERROR, data['levelno'])
         self.assertTrue(data['traceback'])
 
+    def test_can_process_strings(self):
+        expected = b'\\u2622'
+        if six.PY3:
+            # see ContextFormatterTestCase.test_can_process_strings
+            expected = '\\\\xe2\\\\x98\\\\xa2'
+        self.log.info(b'%s', u'\u2622'.encode('utf8'))
+        self.assertIn(expected, self.stream.getvalue())
+
 
 def get_fake_datetime(retval):
     class FakeDateTime(datetime.datetime):
@@ -515,6 +523,16 @@ class ContextFormatterTestCase(LogTestBase):
 
         self.assertEqual(expected, self.stream.getvalue())
 
+    def test_can_process_strings(self):
+        expected = b'\xe2\x98\xa2'
+        if six.PY3:
+            # in PY3 logging format string should be unicode string
+            # or it will fail and inserting byte string in unicode string
+            # causes such formatting
+            expected = '\\xe2\\x98\\xa2'
+        self.log.info(b'%s', u'\u2622'.encode('utf8'))
+        self.assertIn(expected, self.stream.getvalue())
+
 
 class ExceptionLoggingTestCase(LogTestBase):
     """Test that Exceptions are logged."""
@@ -1081,20 +1099,20 @@ class UnicodeConversionTestCase(BaseTestCase):
     def test_ascii_to_unicode(self):
         msg = u'Message with unicode char \ua000 in the middle'
         enc_msg = msg.encode('utf-8')
-        result = log._ensure_unicode(enc_msg)
+        result = formatters._ensure_unicode(enc_msg)
         self.assertEqual(msg, result)
         self.assertIsInstance(result, six.text_type)
 
     def test_unicode_to_unicode(self):
         msg = u'Message with unicode char \ua000 in the middle'
-        result = log._ensure_unicode(msg)
+        result = formatters._ensure_unicode(msg)
         self.assertEqual(msg, result)
         self.assertIsInstance(result, six.text_type)
 
     def test_exception_to_unicode(self):
         msg = u'Message with unicode char \ua000 in the middle'
         exc = Exception(msg)
-        result = log._ensure_unicode(exc)
+        result = formatters._ensure_unicode(exc)
         self.assertEqual(msg, result)
         self.assertIsInstance(result, six.text_type)
author	Andrey Volkov <avolkov@mirantis.com>	2016-05-31 20:14:40 +0300
committer	Andrey Volkov <avolkov@mirantis.com>	2016-09-16 16:43:14 +0300
commit	a2e7f6597ee7f57936cdf2b760068e556cad9a59 (patch)
tree	2b75883257ad1acef0ebe4edf61489fcb75d5a9a
parent	213aa9c6f5fa9f7a4ab4cbd2e66af32118676674 (diff)
download	oslo-log-stable/mitaka.tar.gz