From 1718b94353733bb79043a7c6d80efeba8bd0c8d1 Mon Sep 17 00:00:00 2001 From: Philip Thiem Date: Sat, 17 May 2014 04:14:19 -0500 Subject: Starting a unicode_utils module. --HG-- extra : source : 2e47fa11a272ed61f7c1bbf88aae27e81040fe93 --- setuptools/unicode_utils.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 setuptools/unicode_utils.py (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py new file mode 100644 index 00000000..d2de941a --- /dev/null +++ b/setuptools/unicode_utils.py @@ -0,0 +1,41 @@ +import unicodedata +import sys +from setuptools.compat import unicode as decoded_string + + +# HFS Plus uses decomposed UTF-8 +def decompose(path): + if isinstance(path, decoded_string): + return unicodedata.normalize('NFD', path) + try: + path = path.decode('utf-8') + path = unicodedata.normalize('NFD', path) + path = path.encode('utf-8') + except UnicodeError: + pass # Not UTF-8 + return path + + +def filesys_decode(path): + """ + Ensure that the given path is decoded, + NONE when no expected encoding works + """ + + fs_enc = sys.getfilesystemencoding() + if isinstance(path, decoded_string): + return path + + for enc in (fs_enc, "utf-8"): + try: + return path.decode(enc) + except UnicodeDecodeError: + continue + + +def try_encode(string, enc): + "turn unicode encoding into a functional routine" + try: + return string.encode(enc) + except UnicodeEncodeError: + return None -- cgit v1.2.1 From b49435397a5094f94678adf3549cc8941aa469b7 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 5 Jul 2014 15:06:51 -0400 Subject: Use six for Python 2 compatibility --HG-- branch : feature/issue-229 extra : source : 7b1997ececc5772798ce33a0f8e77387cb55a977 --- setuptools/unicode_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index d2de941a..f028589e 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,11 +1,11 @@ import unicodedata import sys -from setuptools.compat import unicode as decoded_string +import six # HFS Plus uses decomposed UTF-8 def decompose(path): - if isinstance(path, decoded_string): + if isinstance(path, six.text_type): return unicodedata.normalize('NFD', path) try: path = path.decode('utf-8') @@ -23,7 +23,7 @@ def filesys_decode(path): """ fs_enc = sys.getfilesystemencoding() - if isinstance(path, decoded_string): + if isinstance(path, six.text_type): return path for enc in (fs_enc, "utf-8"): -- cgit v1.2.1 From 06872bb0bbbeb953e90bd0941444b0d499056557 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 31 Dec 2015 11:51:01 -0500 Subject: Update vendoring technique to match that used for packaging. Ref #229. --HG-- branch : feature/issue-229 --- setuptools/unicode_utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index f028589e..1fdb0a91 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,7 +1,13 @@ import unicodedata import sys -import six + +try: + from setuptools._vendor import six +except ImportError: + # fallback to naturally-installed version; allows system packagers to + # omit vendored packages. + import six # HFS Plus uses decomposed UTF-8 def decompose(path): -- cgit v1.2.1 From 952c1bafda1929c74c737646aa025e6ffad6632e Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 31 Dec 2015 16:30:47 -0500 Subject: Modeling after Astropy's technique for bundling libraries, the imports are now much cleaner. Thanks @embray. Ref #229. --HG-- branch : feature/issue-229 --- setuptools/unicode_utils.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 1fdb0a91..18903d9e 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,13 +1,7 @@ import unicodedata import sys - -try: - from setuptools._vendor import six -except ImportError: - # fallback to naturally-installed version; allows system packagers to - # omit vendored packages. - import six +from setuptools.extern import six # HFS Plus uses decomposed UTF-8 def decompose(path): -- cgit v1.2.1 From 9363ee420bd803f333b31466796ff00a183de66e Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 23 Jan 2016 18:49:52 -0500 Subject: Extract variable for candidate encodings --- setuptools/unicode_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 18903d9e..6eee6351 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -22,11 +22,13 @@ def filesys_decode(path): NONE when no expected encoding works """ - fs_enc = sys.getfilesystemencoding() if isinstance(path, six.text_type): return path - for enc in (fs_enc, "utf-8"): + fs_enc = sys.getfilesystemencoding() + candidates = fs_enc, 'utf-8' + + for enc in candidates: try: return path.decode(enc) except UnicodeDecodeError: -- cgit v1.2.1 From 3223234b137e9766a6f4e892a3369b13f57f878b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 23 Jan 2016 19:08:57 -0500 Subject: Avoid TypeError when getfilesystemencoding returns None. Fixes #486. --- setuptools/unicode_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 6eee6351..ffab3e24 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -25,7 +25,7 @@ def filesys_decode(path): if isinstance(path, six.text_type): return path - fs_enc = sys.getfilesystemencoding() + fs_enc = sys.getfilesystemencoding() or 'utf-8' candidates = fs_enc, 'utf-8' for enc in candidates: -- cgit v1.2.1 From 053a3a12cf0cc902e0f869b8cc4cff997f73fc84 Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 14 Jul 2016 06:59:30 +0700 Subject: Add missing blank line. --- setuptools/unicode_utils.py | 1 + 1 file changed, 1 insertion(+) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index ffab3e24..7c63efd2 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -3,6 +3,7 @@ import sys from setuptools.extern import six + # HFS Plus uses decomposed UTF-8 def decompose(path): if isinstance(path, six.text_type): -- cgit v1.2.1 From ff371f18f0076bc63da05334f7e551c1cc29e10d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 1 Jan 2017 22:34:28 -0500 Subject: Strip out vendored packages and require them instead. Ref #581. --- setuptools/unicode_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 7c63efd2..6a84f9be 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,7 +1,7 @@ import unicodedata import sys -from setuptools.extern import six +import six # HFS Plus uses decomposed UTF-8 -- cgit v1.2.1 From 3d0cc355fb5e8012cb8c72f0e25042a5a44f31d6 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 24 Feb 2017 11:49:51 -0500 Subject: Revert "Merge pull request #933 from pypa/feature/581-depend-not-bundle" This reverts commit 089cdeb489a0fa94d11b7307b54210ef9aa40511, reversing changes made to aaec654d804cb78dbb6391afff721a63f26a71cd. --- setuptools/unicode_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 6a84f9be..7c63efd2 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,7 +1,7 @@ import unicodedata import sys -import six +from setuptools.extern import six # HFS Plus uses decomposed UTF-8 -- cgit v1.2.1 From 24be5abd4cbd9d84537c457456f841522d626e14 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 25 Jan 2019 16:11:07 -0500 Subject: Given that the config file parsing functionality is unlikely to change upstream, just incorporate the functionality directly. --- setuptools/unicode_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 7c63efd2..3b8179a8 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,5 +1,6 @@ import unicodedata import sys +import re from setuptools.extern import six @@ -42,3 +43,15 @@ def try_encode(string, enc): return string.encode(enc) except UnicodeEncodeError: return None + + +CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') + + +def detect_encoding(fp): + first_line = fp.readline() + fp.seek(0) + m = CODING_RE.match(first_line) + if m is None: + return None + return m.group(1).decode('ascii') -- cgit v1.2.1 From f36781084f8f870ea747d477bd742057ea022421 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 5 Apr 2019 12:25:03 -0400 Subject: Remove detect_encoding, no longer used. --- setuptools/unicode_utils.py | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 3b8179a8..7c63efd2 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,6 +1,5 @@ import unicodedata import sys -import re from setuptools.extern import six @@ -43,15 +42,3 @@ def try_encode(string, enc): return string.encode(enc) except UnicodeEncodeError: return None - - -CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') - - -def detect_encoding(fp): - first_line = fp.readline() - fp.seek(0) - m = CODING_RE.match(first_line) - if m is None: - return None - return m.group(1).decode('ascii') -- cgit v1.2.1 From fb7ab81a3d080422687bad71f9ae9d36eeefbee2 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 16 Aug 2020 00:29:24 -0400 Subject: Remove Python 2 compatibility --- setuptools/unicode_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'setuptools/unicode_utils.py') diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index 7c63efd2..e84e65e3 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,12 +1,10 @@ import unicodedata import sys -from setuptools.extern import six - # HFS Plus uses decomposed UTF-8 def decompose(path): - if isinstance(path, six.text_type): + if isinstance(path, str): return unicodedata.normalize('NFD', path) try: path = path.decode('utf-8') @@ -23,7 +21,7 @@ def filesys_decode(path): NONE when no expected encoding works """ - if isinstance(path, six.text_type): + if isinstance(path, str): return path fs_enc = sys.getfilesystemencoding() or 'utf-8' -- cgit v1.2.1