summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Ipsum <richard.ipsum@codethink.co.uk>2015-11-02 20:11:56 +0000
committerRichard Ipsum <richard.ipsum@codethink.co.uk>2015-11-03 14:59:59 +0000
commit193eb2042c6be1775f4c41f6297fe5c1521828e0 (patch)
treed1112d58b688a6cb026e7f37bf63b92feb18dc4e
parent8f600f7424c9386adfd084e75be8aed427954e52 (diff)
downloadca-certificates-193eb2042c6be1775f4c41f6297fe5c1521828e0.tar.gz
Fix unicode conversions to work in both python2 and python3
-rw-r--r--mozilla/certdata2pem.py29
1 files changed, 24 insertions, 5 deletions
diff --git a/mozilla/certdata2pem.py b/mozilla/certdata2pem.py
index ec48ab6..7bd4d2d 100644
--- a/mozilla/certdata2pem.py
+++ b/mozilla/certdata2pem.py
@@ -25,13 +25,19 @@ import os.path
import re
import sys
import textwrap
+import io
objects = []
# Dirty file parser.
in_data, in_multiline, in_obj = False, False, False
field, type, value, obj = None, None, None, dict()
-for line in open('certdata.txt', 'r'):
+
+# Python 3 will not let us decode non-ascii characters if we
+# have not specified an encoding, but Python 2's open does not
+# have an option to set the encoding. Python 3's open is io.open
+# and io.open has been backported to Python 2.6 and 2.7, so use io.open.
+for line in io.open('certdata.txt', 'rt', encoding='utf8'):
# Ignore the file header.
if not in_data:
if line.startswith('BEGINDATA'):
@@ -123,14 +129,27 @@ for obj in objects:
.replace(',', '_')
# this is the only way to decode the way NSS stores multi-byte UTF-8
+ # and we need an escaped string for checking existence of things
+ # otherwise we're dependant on the user's current locale.
if bytes != str:
- bname = bname.encode('utf-8')
- bname = bname.decode('unicode_escape').encode('latin-1').decode('utf-8')
- fname = bname + '.crt'
+ # We're in python 3, convert the utf-8 string to a
+ # sequence of bytes that represents this utf-8 string
+ # then encode the byte-sequence as an escaped string that
+ # can be passed to open() and os.path.exists()
+ bname = bname.encode('utf-8').decode('unicode_escape').encode('latin-1')
+ else:
+ # Python 2
+ # Convert the unicode string back to its original byte form
+ # (contents of files returned by io.open are returned as
+ # unicode strings)
+ # then to an escaped string that can be passed to open()
+ # and os.path.exists()
+ bname = bname.encode('utf-8').decode('string_escape')
+ fname = bname + b'.crt'
if os.path.exists(fname):
print("Found duplicate certificate name %s, renaming." % bname)
- fname = bname + '_2.crt'
+ fname = bname + b'_2.crt'
f = open(fname, 'w')
f.write("-----BEGIN CERTIFICATE-----\n")
encoded = base64.b64encode(obj['CKA_VALUE']).decode('utf-8')