diff options
Diffstat (limited to 'Tools/unicode/genmap_support.py')
-rw-r--r-- | Tools/unicode/genmap_support.py | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/Tools/unicode/genmap_support.py b/Tools/unicode/genmap_support.py new file mode 100644 index 0000000000..5e1d9ee77b --- /dev/null +++ b/Tools/unicode/genmap_support.py @@ -0,0 +1,198 @@ +# +# genmap_support.py: Multibyte Codec Map Generator +# +# Original Author: Hye-Shik Chang <perky@FreeBSD.org> +# Modified Author: Dong-hee Na <donghee.na92@gmail.com> +# + + +class BufferedFiller: + def __init__(self, column=78): + self.column = column + self.buffered = [] + self.cline = [] + self.clen = 0 + self.count = 0 + + def write(self, *data): + for s in data: + if len(s) > self.column: + raise ValueError("token is too long") + if len(s) + self.clen > self.column: + self.flush() + self.clen += len(s) + self.cline.append(s) + self.count += 1 + + def flush(self): + if not self.cline: + return + self.buffered.append(''.join(self.cline)) + self.clen = 0 + del self.cline[:] + + def printout(self, fp): + self.flush() + for l in self.buffered: + fp.write(f'{l}\n') + del self.buffered[:] + + def __len__(self): + return self.count + + +class DecodeMapWriter: + filler_class = BufferedFiller + + def __init__(self, fp, prefix, decode_map): + self.fp = fp + self.prefix = prefix + self.decode_map = decode_map + self.filler = self.filler_class() + + def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): + c2values = range(c2range[0], c2range[1] + 1) + + for c1 in range(c1range[0], c1range[1] + 1): + if c1 not in self.decode_map or (onlymask and c1 not in onlymask): + continue + c2map = self.decode_map[c1] + rc2values = [n for n in c2values if n in c2map] + if not rc2values: + continue + + c2map[self.prefix] = True + c2map['min'] = rc2values[0] + c2map['max'] = rc2values[-1] + c2map['midx'] = len(self.filler) + + for v in range(rc2values[0], rc2values[-1] + 1): + if v in c2map: + self.filler.write('%d,' % c2map[v]) + else: + self.filler.write('U,') + + def generate(self, wide=False): + if not wide: + self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") + else: + self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") + + self.filler.printout(self.fp) + self.fp.write("};\n\n") + + if not wide: + self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") + else: + self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") + + for i in range(256): + if i in self.decode_map and self.prefix in self.decode_map[i]: + m = self.decode_map + prefix = self.prefix + else: + self.filler.write("{", "0,", "0,", "0", "},") + continue + + self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], + ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") + self.filler.printout(self.fp) + self.fp.write("};\n\n") + + +class EncodeMapWriter: + filler_class = BufferedFiller + elemtype = 'DBCHAR' + indextype = 'struct unim_index' + + def __init__(self, fp, prefix, encode_map): + self.fp = fp + self.prefix = prefix + self.encode_map = encode_map + self.filler = self.filler_class() + + def generate(self): + self.buildmap() + self.printmap() + + def buildmap(self): + for c1 in range(0, 256): + if c1 not in self.encode_map: + continue + c2map = self.encode_map[c1] + rc2values = [k for k in c2map.keys()] + rc2values.sort() + if not rc2values: + continue + + c2map[self.prefix] = True + c2map['min'] = rc2values[0] + c2map['max'] = rc2values[-1] + c2map['midx'] = len(self.filler) + + for v in range(rc2values[0], rc2values[-1] + 1): + if v not in c2map: + self.write_nochar() + elif isinstance(c2map[v], int): + self.write_char(c2map[v]) + elif isinstance(c2map[v], tuple): + self.write_multic(c2map[v]) + else: + raise ValueError + + def write_nochar(self): + self.filler.write('N,') + + def write_multic(self, point): + self.filler.write('M,') + + def write_char(self, point): + self.filler.write(str(point) + ',') + + def printmap(self): + self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") + self.filler.printout(self.fp) + self.fp.write("};\n\n") + self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") + + for i in range(256): + if i in self.encode_map and self.prefix in self.encode_map[i]: + self.filler.write("{", "__%s_encmap" % self.prefix, "+", + "%d" % self.encode_map[i]['midx'], ",", + "%d," % self.encode_map[i]['min'], + "%d" % self.encode_map[i]['max'], "},") + else: + self.filler.write("{", "0,", "0,", "0", "},") + continue + self.filler.printout(self.fp) + self.fp.write("};\n\n") + + +def open_mapping_file(path, source): + try: + f = open(path) + except IOError: + raise SystemExit(f'{source} is needed') + return f + + +def print_autogen(fo, source): + fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') + + +def loadmap(fo, natcol=0, unicol=1, sbcs=0): + print("Loading from", fo) + fo.seek(0, 0) + decmap = {} + for line in fo: + line = line.split('#', 1)[0].strip() + if not line or len(line.split()) < 2: + continue + + row = [eval(e) for e in line.split()] + loc, uni = row[natcol], row[unicol] + if loc >= 0x100 or sbcs: + decmap.setdefault((loc >> 8), {}) + decmap[(loc >> 8)][(loc & 0xff)] = uni + + return decmap |