From 193eb2042c6be1775f4c41f6297fe5c1521828e0 Mon Sep 17 00:00:00 2001
From: Richard Ipsum <richard.ipsum@codethink.co.uk>
Date: Mon, 2 Nov 2015 20:11:56 +0000
Subject: Fix unicode conversions

Fix unicode conversions to work in both python2 and python3
---
 mozilla/certdata2pem.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/mozilla/certdata2pem.py b/mozilla/certdata2pem.py
index ec48ab6..7bd4d2d 100644
--- a/mozilla/certdata2pem.py
+++ b/mozilla/certdata2pem.py
@@ -25,13 +25,19 @@ import os.path
 import re
 import sys
 import textwrap
+import io
 
 objects = []
 
 # Dirty file parser.
 in_data, in_multiline, in_obj = False, False, False
 field, type, value, obj = None, None, None, dict()
-for line in open('certdata.txt', 'r'):
+
+# Python 3 will not let us decode non-ascii characters if we
+# have not specified an encoding, but Python 2's open does not
+# have an option to set the encoding. Python 3's open is io.open
+# and io.open has been backported to Python 2.6 and 2.7, so use io.open.
+for line in io.open('certdata.txt', 'rt', encoding='utf8'):
     # Ignore the file header.
     if not in_data:
         if line.startswith('BEGINDATA'):
@@ -123,14 +129,27 @@ for obj in objects:
                                       .replace(',', '_')
 
         # this is the only way to decode the way NSS stores multi-byte UTF-8
+        # and we need an escaped string for checking existence of things
+        # otherwise we're dependant on the user's current locale.
         if bytes != str:
-            bname = bname.encode('utf-8')
-        bname = bname.decode('unicode_escape').encode('latin-1').decode('utf-8')
-        fname = bname + '.crt'
+            # We're in python 3, convert the utf-8 string to a
+            # sequence of bytes that represents this utf-8 string
+            # then encode the byte-sequence as an escaped string that
+            # can be passed to open() and os.path.exists()
+            bname = bname.encode('utf-8').decode('unicode_escape').encode('latin-1')
+        else:
+            # Python 2
+            # Convert the unicode string back to its original byte form
+            # (contents of files returned by io.open are returned as
+            #  unicode strings)
+            # then to an escaped string that can be passed to open()
+            # and os.path.exists()
+            bname = bname.encode('utf-8').decode('string_escape')
 
+        fname = bname + b'.crt'
         if os.path.exists(fname):
             print("Found duplicate certificate name %s, renaming." % bname)
-            fname = bname + '_2.crt'
+            fname = bname + b'_2.crt'
         f = open(fname, 'w')
         f.write("-----BEGIN CERTIFICATE-----\n")
         encoded = base64.b64encode(obj['CKA_VALUE']).decode('utf-8')
-- 
cgit v1.2.1