From 193eb2042c6be1775f4c41f6297fe5c1521828e0 Mon Sep 17 00:00:00 2001 From: Richard Ipsum Date: Mon, 2 Nov 2015 20:11:56 +0000 Subject: Fix unicode conversions Fix unicode conversions to work in both python2 and python3 --- mozilla/certdata2pem.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/mozilla/certdata2pem.py b/mozilla/certdata2pem.py index ec48ab6..7bd4d2d 100644 --- a/mozilla/certdata2pem.py +++ b/mozilla/certdata2pem.py @@ -25,13 +25,19 @@ import os.path import re import sys import textwrap +import io objects = [] # Dirty file parser. in_data, in_multiline, in_obj = False, False, False field, type, value, obj = None, None, None, dict() -for line in open('certdata.txt', 'r'): + +# Python 3 will not let us decode non-ascii characters if we +# have not specified an encoding, but Python 2's open does not +# have an option to set the encoding. Python 3's open is io.open +# and io.open has been backported to Python 2.6 and 2.7, so use io.open. +for line in io.open('certdata.txt', 'rt', encoding='utf8'): # Ignore the file header. if not in_data: if line.startswith('BEGINDATA'): @@ -123,14 +129,27 @@ for obj in objects: .replace(',', '_') # this is the only way to decode the way NSS stores multi-byte UTF-8 + # and we need an escaped string for checking existence of things + # otherwise we're dependant on the user's current locale. if bytes != str: - bname = bname.encode('utf-8') - bname = bname.decode('unicode_escape').encode('latin-1').decode('utf-8') - fname = bname + '.crt' + # We're in python 3, convert the utf-8 string to a + # sequence of bytes that represents this utf-8 string + # then encode the byte-sequence as an escaped string that + # can be passed to open() and os.path.exists() + bname = bname.encode('utf-8').decode('unicode_escape').encode('latin-1') + else: + # Python 2 + # Convert the unicode string back to its original byte form + # (contents of files returned by io.open are returned as + # unicode strings) + # then to an escaped string that can be passed to open() + # and os.path.exists() + bname = bname.encode('utf-8').decode('string_escape') + fname = bname + b'.crt' if os.path.exists(fname): print("Found duplicate certificate name %s, renaming." % bname) - fname = bname + '_2.crt' + fname = bname + b'_2.crt' f = open(fname, 'w') f.write("-----BEGIN CERTIFICATE-----\n") encoded = base64.b64encode(obj['CKA_VALUE']).decode('utf-8') -- cgit v1.2.1