summaryrefslogtreecommitdiff
path: root/Tools/unicode/genmap_support.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/unicode/genmap_support.py')
-rw-r--r--Tools/unicode/genmap_support.py198
1 files changed, 198 insertions, 0 deletions
diff --git a/Tools/unicode/genmap_support.py b/Tools/unicode/genmap_support.py
new file mode 100644
index 0000000000..5e1d9ee77b
--- /dev/null
+++ b/Tools/unicode/genmap_support.py
@@ -0,0 +1,198 @@
+#
+# genmap_support.py: Multibyte Codec Map Generator
+#
+# Original Author: Hye-Shik Chang <perky@FreeBSD.org>
+# Modified Author: Dong-hee Na <donghee.na92@gmail.com>
+#
+
+
+class BufferedFiller:
+ def __init__(self, column=78):
+ self.column = column
+ self.buffered = []
+ self.cline = []
+ self.clen = 0
+ self.count = 0
+
+ def write(self, *data):
+ for s in data:
+ if len(s) > self.column:
+ raise ValueError("token is too long")
+ if len(s) + self.clen > self.column:
+ self.flush()
+ self.clen += len(s)
+ self.cline.append(s)
+ self.count += 1
+
+ def flush(self):
+ if not self.cline:
+ return
+ self.buffered.append(''.join(self.cline))
+ self.clen = 0
+ del self.cline[:]
+
+ def printout(self, fp):
+ self.flush()
+ for l in self.buffered:
+ fp.write(f'{l}\n')
+ del self.buffered[:]
+
+ def __len__(self):
+ return self.count
+
+
+class DecodeMapWriter:
+ filler_class = BufferedFiller
+
+ def __init__(self, fp, prefix, decode_map):
+ self.fp = fp
+ self.prefix = prefix
+ self.decode_map = decode_map
+ self.filler = self.filler_class()
+
+ def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
+ c2values = range(c2range[0], c2range[1] + 1)
+
+ for c1 in range(c1range[0], c1range[1] + 1):
+ if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
+ continue
+ c2map = self.decode_map[c1]
+ rc2values = [n for n in c2values if n in c2map]
+ if not rc2values:
+ continue
+
+ c2map[self.prefix] = True
+ c2map['min'] = rc2values[0]
+ c2map['max'] = rc2values[-1]
+ c2map['midx'] = len(self.filler)
+
+ for v in range(rc2values[0], rc2values[-1] + 1):
+ if v in c2map:
+ self.filler.write('%d,' % c2map[v])
+ else:
+ self.filler.write('U,')
+
+ def generate(self, wide=False):
+ if not wide:
+ self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
+ else:
+ self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
+
+ self.filler.printout(self.fp)
+ self.fp.write("};\n\n")
+
+ if not wide:
+ self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
+ else:
+ self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
+
+ for i in range(256):
+ if i in self.decode_map and self.prefix in self.decode_map[i]:
+ m = self.decode_map
+ prefix = self.prefix
+ else:
+ self.filler.write("{", "0,", "0,", "0", "},")
+ continue
+
+ self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
+ ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
+ self.filler.printout(self.fp)
+ self.fp.write("};\n\n")
+
+
+class EncodeMapWriter:
+ filler_class = BufferedFiller
+ elemtype = 'DBCHAR'
+ indextype = 'struct unim_index'
+
+ def __init__(self, fp, prefix, encode_map):
+ self.fp = fp
+ self.prefix = prefix
+ self.encode_map = encode_map
+ self.filler = self.filler_class()
+
+ def generate(self):
+ self.buildmap()
+ self.printmap()
+
+ def buildmap(self):
+ for c1 in range(0, 256):
+ if c1 not in self.encode_map:
+ continue
+ c2map = self.encode_map[c1]
+ rc2values = [k for k in c2map.keys()]
+ rc2values.sort()
+ if not rc2values:
+ continue
+
+ c2map[self.prefix] = True
+ c2map['min'] = rc2values[0]
+ c2map['max'] = rc2values[-1]
+ c2map['midx'] = len(self.filler)
+
+ for v in range(rc2values[0], rc2values[-1] + 1):
+ if v not in c2map:
+ self.write_nochar()
+ elif isinstance(c2map[v], int):
+ self.write_char(c2map[v])
+ elif isinstance(c2map[v], tuple):
+ self.write_multic(c2map[v])
+ else:
+ raise ValueError
+
+ def write_nochar(self):
+ self.filler.write('N,')
+
+ def write_multic(self, point):
+ self.filler.write('M,')
+
+ def write_char(self, point):
+ self.filler.write(str(point) + ',')
+
+ def printmap(self):
+ self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
+ self.filler.printout(self.fp)
+ self.fp.write("};\n\n")
+ self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
+
+ for i in range(256):
+ if i in self.encode_map and self.prefix in self.encode_map[i]:
+ self.filler.write("{", "__%s_encmap" % self.prefix, "+",
+ "%d" % self.encode_map[i]['midx'], ",",
+ "%d," % self.encode_map[i]['min'],
+ "%d" % self.encode_map[i]['max'], "},")
+ else:
+ self.filler.write("{", "0,", "0,", "0", "},")
+ continue
+ self.filler.printout(self.fp)
+ self.fp.write("};\n\n")
+
+
+def open_mapping_file(path, source):
+ try:
+ f = open(path)
+ except IOError:
+ raise SystemExit(f'{source} is needed')
+ return f
+
+
+def print_autogen(fo, source):
+ fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
+
+
+def loadmap(fo, natcol=0, unicol=1, sbcs=0):
+ print("Loading from", fo)
+ fo.seek(0, 0)
+ decmap = {}
+ for line in fo:
+ line = line.split('#', 1)[0].strip()
+ if not line or len(line.split()) < 2:
+ continue
+
+ row = [eval(e) for e in line.split()]
+ loc, uni = row[natcol], row[unicol]
+ if loc >= 0x100 or sbcs:
+ decmap.setdefault((loc >> 8), {})
+ decmap[(loc >> 8)][(loc & 0xff)] = uni
+
+ return decmap