diff options
author | yaroslav-o <29219583+yaroslav-o@users.noreply.github.com> | 2019-09-25 05:44:54 -0700 |
---|---|---|
committer | Eli Bendersky <eliben@users.noreply.github.com> | 2019-09-25 05:44:54 -0700 |
commit | a4a7127dadf79ebf0deacf49f70ed9e588c40596 (patch) | |
tree | 5f2c3d980c653b644f3f357b63a896a91d597847 | |
parent | 62ee4ba5fbe58f469c72e7b5b02e88584577a147 (diff) | |
download | pycparser-a4a7127dadf79ebf0deacf49f70ed9e588c40596.tar.gz |
Recognize integer multicharacter constants like 'ABCD' (#350)
Recognize integer multicharacter constants like 'ABCD'
The feature I am adding is defined here - 5th case.
https://en.cppreference.com/w/c/language/character_constant
Also here: 6.4.4.4.10 of C99.
Put simply, pycparser thought a statement like this is an error:
int a = 'ABCD';
However it is not.
It is likely possible to just modify char_const regular expression in c_lexer.py:240 to allow longer characters, but the way it is done in this PR - multicharacter constants are clearly separated. I am also limiting the length of multicharacter const integers to 4 characters - this matches VS compiler behavior (gcc allows any length with a warning) and lets pycparser NOT consider lengthy single-quoted strings as integers - these would be nonsensical anyway.
-rw-r--r-- | pycparser/c_lexer.py | 7 | ||||
-rw-r--r-- | pycparser/c_parser.py | 1 | ||||
-rw-r--r-- | tests/test_c_lexer.py | 12 |
3 files changed, 15 insertions, 5 deletions
diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py index 371e996..045d24e 100644 --- a/pycparser/c_lexer.py +++ b/pycparser/c_lexer.py @@ -130,7 +130,7 @@ class CLexer(object): 'TYPEID', # constants - 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', + 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', 'INT_CONST_CHAR', 'FLOAT_CONST', 'HEX_FLOAT_CONST', 'CHAR_CONST', 'WCHAR_CONST', @@ -239,6 +239,7 @@ class CLexer(object): cconst_char = r"""([^'\\\n]|"""+escape_sequence+')' char_const = "'"+cconst_char+"'" wchar_const = 'L'+char_const + multicharacter_constant = "'"+cconst_char+"{2,4}'" unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)" bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')""" @@ -468,6 +469,10 @@ class CLexer(object): # Must come before bad_char_const, to prevent it from # catching valid char constants as invalid # + @TOKEN(multicharacter_constant) + def t_INT_CONST_CHAR(self, t): + return t + @TOKEN(char_const) def t_CHAR_CONST(self, t): return t diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py index 87d6c5e..4cf96fa 100644 --- a/pycparser/c_parser.py +++ b/pycparser/c_parser.py @@ -1766,6 +1766,7 @@ class CParser(PLYParser): | INT_CONST_OCT | INT_CONST_HEX | INT_CONST_BIN + | INT_CONST_CHAR """ uCount = 0 lCount = 0 diff --git a/tests/test_c_lexer.py b/tests/test_c_lexer.py index 3a70c18..d63d6fd 100644 --- a/tests/test_c_lexer.py +++ b/tests/test_c_lexer.py @@ -77,6 +77,10 @@ class TestCLexerNoErrors(unittest.TestCase): self.assertTokensTypes('0xf7', ['INT_CONST_HEX']) self.assertTokensTypes('0b110', ['INT_CONST_BIN']) self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX']) + self.assertTokensTypes("'12'", ['INT_CONST_CHAR']) + self.assertTokensTypes("'123'", ['INT_CONST_CHAR']) + self.assertTokensTypes("'1AB4'", ['INT_CONST_CHAR']) + self.assertTokensTypes(r"'1A\n4'", ['INT_CONST_CHAR']) # no 0 before x, so ID catches it self.assertTokensTypes('xf7', ['ID']) @@ -448,11 +452,11 @@ class TestCLexerErrors(unittest.TestCase): self.assertLexerError("'", ERR_UNMATCHED_QUOTE) self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE) self.assertLexerError("'\\xaa\n'", ERR_UNMATCHED_QUOTE) - - self.assertLexerError(r"'\12a'", ERR_INVALID_CCONST) - self.assertLexerError(r"'\xabg'", ERR_INVALID_CCONST) + + self.assertLexerError(r"'123\12a'", ERR_INVALID_CCONST) + self.assertLexerError(r"'123\xabg'", ERR_INVALID_CCONST) self.assertLexerError("''", ERR_INVALID_CCONST) - self.assertLexerError("'jx'", ERR_INVALID_CCONST) + self.assertLexerError("'abcjx'", ERR_INVALID_CCONST) self.assertLexerError(r"'\*'", ERR_INVALID_CCONST) def test_string_literals(self): |