"""This script generates a Python codec module from a Windows Code Page. It uses the function MultiByteToWideChar to generate a decoding table. """ import ctypes from ctypes import wintypes from gencodec import codegen import unicodedata def genwinmap(codepage): MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, wintypes.LPCSTR, ctypes.c_int, wintypes.LPWSTR, ctypes.c_int] MultiByteToWideChar.restype = ctypes.c_int enc2uni = {} for i in list(range(32)) + [127]: enc2uni[i] = (i, 'CONTROL CHARACTER') for i in range(256): buf = ctypes.create_unicode_buffer(2) ret = MultiByteToWideChar( codepage, 0, bytes([i]), 1, buf, 2) assert ret == 1, "invalid code page" assert buf[1] == '\x00' try: name = unicodedata.name(buf[0]) except ValueError: try: name = enc2uni[i][1] except KeyError: name = '' enc2uni[i] = (ord(buf[0]), name) return enc2uni def genwincodec(codepage): import platform map = genwinmap(codepage) encodingname = 'cp%d' % codepage code = codegen("", map, encodingname) # Replace first lines with our own docstring code = '''\ """Python Character Mapping Codec %s generated on Windows: %s with the command: python Tools/unicode/genwincodec.py %s """#" ''' % (encodingname, ' '.join(platform.win32_ver()), codepage ) + code.split('"""#"', 1)[1] print(code) if __name__ == '__main__': import sys genwincodec(int(sys.argv[1]))