chromium/third_party/blink/renderer/build/scripts/make_css_tokenizer_codepoints.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

#!/usr/bin/env python

# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import in_generator
import sys
import os

module_basename = os.path.basename(__file__)
module_pyname = os.path.splitext(module_basename)[0] + '.py'

CPP_TEMPLATE = """
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Auto-generated by {module_pyname}

const CSSTokenizer::CodePoint CSSTokenizer::kCodePoints[{array_size}] = {{
{token_lines}
}};
const unsigned codePointsNumber = {array_size};
"""


def token_type(i):
    codepoints = {'(': 'LeftParenthesis',
                  ')': 'RightParenthesis',
                  '[': 'LeftBracket',
                  ']': 'RightBracket',
                  '{': 'LeftBrace',
                  '}': 'RightBrace',
                  '+': 'PlusOrFullStop',
                  '.': 'PlusOrFullStop',
                  '-': 'HyphenMinus',
                  '*': 'Asterisk',
                  '<': 'LessThan',
                  ',': 'Comma',
                  '/': 'Solidus',
                  '\\': 'ReverseSolidus',
                  ':': 'Colon',
                  ';': 'SemiColon',
                  '#': 'Hash',
                  '^': 'CircumflexAccent',
                  '$': 'DollarSign',
                  '|': 'VerticalLine',
                  '~': 'Tilde',
                  '@': 'CommercialAt',
                  'u': 'LetterU',
                  'U': 'LetterU',
                  }
    c = chr(i)
    if c in codepoints:
        return codepoints[c]
    whitespace = '\n\r\t\f '
    quotes = '"\''
    if c in whitespace:
        return 'WhiteSpace'
    if c.isdigit():
        return 'AsciiDigit'
    if c.isalpha() or c == '_':
        return 'NameStart'
    if c in quotes:
        return 'StringStart'
    if i == 0:
        return 'EndOfFile'


class MakeCSSTokenizerCodePointsWriter(in_generator.Writer):
    def __init__(self, in_file_path):
        super(MakeCSSTokenizerCodePointsWriter, self).__init__(in_file_path)

        self._outputs = {
            ('css_tokenizer_codepoints.cc'): self.generate,
        }

    def generate(self):
        array_size = 128  # SCHAR_MAX + 1
        token_lines = ['    &CSSTokenizer::%s,' % token_type(i)
                        if token_type(i) else '    0,'
                        for i in range(array_size)]
        return CPP_TEMPLATE.format(array_size=array_size, token_lines='\n'.join(token_lines), module_pyname=module_pyname)

if __name__ == '__main__':
    in_generator.Maker(MakeCSSTokenizerCodePointsWriter).main(sys.argv)