From a5ebf0d558c3a851c0884d30a0051836f7db0c2a Mon Sep 17 00:00:00 2001 From: Adam Hupp Date: Sat, 4 Jan 2014 12:48:39 -0800 Subject: Properly handle unicode filenames given in both byte-str and unicode values, across both python 3 and 2. --- magic.py | 13 ++++++++++++- test.py | 6 +++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/magic.py b/magic.py index 5025842..e7336c3 100644 --- a/magic.py +++ b/magic.py @@ -192,7 +192,18 @@ def errorcheck_negative_one(result, func, args): def coerce_filename(filename): if filename is None: return None - return filename.encode(sys.getfilesystemencoding()) + + # ctypes will implicitly convert unicode strings to bytes with + # .encode('ascii'). A more useful default here is + # getfilesystemencoding(). We need to leave byte-str unchanged. + is_unicode = (sys.version_info.major <= 2 and + isinstance(filename, unicode)) or \ + (sys.version_info.major >= 3 and + isinstance(filename, str)) + if is_unicode: + return filename.encode(sys.getfilesystemencoding()) + else: + return filename magic_open = libmagic.magic_open magic_open.restype = magic_t diff --git a/test.py b/test.py index 6412045..3d922cd 100644 --- a/test.py +++ b/test.py @@ -8,7 +8,10 @@ class MagicTest(unittest.TestCase): def assert_values(self, m, expected_values): for filename, expected_value in expected_values.items(): - filename = os.path.join(self.TESTDATA_DIR, filename) + try: + filename = os.path.join(self.TESTDATA_DIR, filename) + except TypeError: + filename = os.path.join(self.TESTDATA_DIR.encode('utf-8'), filename) value = m.from_buffer(open(filename, 'rb').read()) expected_value_bytes = expected_value.encode('utf-8') @@ -25,6 +28,7 @@ class MagicTest(unittest.TestCase): 'test.gz': 'application/x-gzip', 'text.txt': 'text/plain', b'\xce\xbb'.decode('utf-8'): 'text/plain', + b'\xce\xbb': 'text/plain', }) def test_descriptions(self): -- cgit v1.2.1