1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import in_generator
import sys
import os
module_basename = os.path.basename(__file__)
module_pyname = os.path.splitext(module_basename)[0] + '.py'
CPP_TEMPLATE = """
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Auto-generated by {module_pyname}
const CSSTokenizer::CodePoint CSSTokenizer::kCodePoints[{array_size}] = {{
{token_lines}
}};
const unsigned codePointsNumber = {array_size};
"""
def token_type(i):
codepoints = {'(': 'LeftParenthesis',
')': 'RightParenthesis',
'[': 'LeftBracket',
']': 'RightBracket',
'{': 'LeftBrace',
'}': 'RightBrace',
'+': 'PlusOrFullStop',
'.': 'PlusOrFullStop',
'-': 'HyphenMinus',
'*': 'Asterisk',
'<': 'LessThan',
',': 'Comma',
'/': 'Solidus',
'\\': 'ReverseSolidus',
':': 'Colon',
';': 'SemiColon',
'#': 'Hash',
'^': 'CircumflexAccent',
'$': 'DollarSign',
'|': 'VerticalLine',
'~': 'Tilde',
'@': 'CommercialAt',
'u': 'LetterU',
'U': 'LetterU',
}
c = chr(i)
if c in codepoints:
return codepoints[c]
whitespace = '\n\r\t\f '
quotes = '"\''
if c in whitespace:
return 'WhiteSpace'
if c.isdigit():
return 'AsciiDigit'
if c.isalpha() or c == '_':
return 'NameStart'
if c in quotes:
return 'StringStart'
if i == 0:
return 'EndOfFile'
class MakeCSSTokenizerCodePointsWriter(in_generator.Writer):
def __init__(self, in_file_path):
super(MakeCSSTokenizerCodePointsWriter, self).__init__(in_file_path)
self._outputs = {
('css_tokenizer_codepoints.cc'): self.generate,
}
def generate(self):
array_size = 128 # SCHAR_MAX + 1
token_lines = [' &CSSTokenizer::%s,' % token_type(i)
if token_type(i) else ' 0,'
for i in range(array_size)]
return CPP_TEMPLATE.format(array_size=array_size, token_lines='\n'.join(token_lines), module_pyname=module_pyname)
if __name__ == '__main__':
in_generator.Maker(MakeCSSTokenizerCodePointsWriter).main(sys.argv)
|