From b1dbdc5e21346de0fec7fbb814dedbe1585bcfec Mon Sep 17 00:00:00 2001 From: Vitaly Cheptsov <4348897+vit9696@users.noreply.github.com> Date: Tue, 31 Aug 2021 16:05:01 +0300 Subject: Introduce partial C11 support (#429) * Introduce partial C11 support Implemented _Noreturn, _Static_assert, _Thread_local. Also fixed tests with preprocessor on macOS. * Add more tests Co-authored-by: vit9696 --- .gitignore | 1 + pycparser/_c_ast.cfg | 2 ++ pycparser/c_ast.py | 21 +++++++++++++++++++++ pycparser/c_generator.py | 8 ++++++++ pycparser/c_lexer.py | 18 +++++++++++------- pycparser/c_parser.py | 17 +++++++++++++++++ tests/c_files/c11.c | 31 +++++++++++++++++++++++++++++++ tests/test_c_generator.py | 21 ++++++++++++++++++--- tests/test_c_parser.py | 19 +++++++++++++++++++ tests/test_general.py | 31 ++++++++++++++++--------------- tests/test_util.py | 16 ++++++++++++++++ utils/fake_libc_include/_fake_defines.h | 14 ++++++++++++-- utils/fake_libc_include/stdnoreturn.h | 2 ++ utils/fake_libc_include/threads.h | 2 ++ 14 files changed, 176 insertions(+), 27 deletions(-) create mode 100644 tests/c_files/c11.c create mode 100644 tests/test_util.py create mode 100644 utils/fake_libc_include/stdnoreturn.h create mode 100644 utils/fake_libc_include/threads.h diff --git a/.gitignore b/.gitignore index afa1201..3aaede4 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ parser.out utils/z.c *.egg-info *.swp +.DS_Store diff --git a/pycparser/_c_ast.cfg b/pycparser/_c_ast.cfg index 2f5b087..e9b5685 100644 --- a/pycparser/_c_ast.cfg +++ b/pycparser/_c_ast.cfg @@ -152,6 +152,8 @@ PtrDecl: [quals, type*] Return: [expr*] +StaticAssert: [cond*, message*] + # name: struct tag name # decls: declaration of members # diff --git a/pycparser/c_ast.py b/pycparser/c_ast.py index 0ce8fef..192c106 100644 --- a/pycparser/c_ast.py +++ b/pycparser/c_ast.py @@ -863,6 +863,27 @@ class Return(Node): attr_names = () +class StaticAssert(Node): + __slots__ = ('cond', 'message', 'coord', '__weakref__') + def __init__(self, cond, message, coord=None): + self.cond = cond + self.message = message + self.coord = coord + + def children(self): + nodelist = [] + if self.cond is not None: nodelist.append(("cond", self.cond)) + if self.message is not None: nodelist.append(("message", self.message)) + return tuple(nodelist) + + def __iter__(self): + if self.cond is not None: + yield self.cond + if self.message is not None: + yield self.message + + attr_names = () + class Struct(Node): __slots__ = ('name', 'decls', 'coord', '__weakref__') def __init__(self, name, decls, coord=None): diff --git a/pycparser/c_generator.py b/pycparser/c_generator.py index 983d578..ded8c65 100644 --- a/pycparser/c_generator.py +++ b/pycparser/c_generator.py @@ -286,6 +286,14 @@ class CGenerator(object): s += ');' return s + def visit_StaticAssert(self, n): + s = '_Static_assert(' + s += self.visit(n.cond) + s += ',' + s += self.visit(n.message) + s += ')' + return s + def visit_Switch(self, n): s = 'switch (' + self.visit(n.cond) + ')\n' s += self._generate_stmt(n.stmt, add_indent=True) diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py index 045d24e..96c4aa7 100644 --- a/pycparser/c_lexer.py +++ b/pycparser/c_lexer.py @@ -100,7 +100,7 @@ class CLexer(object): ## Reserved keywords ## keywords = ( - '_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', + 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER', 'OFFSETOF', @@ -109,19 +109,23 @@ class CLexer(object): 'VOLATILE', 'WHILE', '__INT128', ) + keywords_new = ( + '_BOOL', '_COMPLEX', + '_NORETURN', '_THREAD_LOCAL', '_STATIC_ASSERT' + ) + keyword_map = {} + for keyword in keywords: - if keyword == '_BOOL': - keyword_map['_Bool'] = keyword - elif keyword == '_COMPLEX': - keyword_map['_Complex'] = keyword - else: keyword_map[keyword.lower()] = keyword + for keyword in keywords_new: + keyword_map[keyword[:2].upper() + keyword[2:].lower()] = keyword + ## ## All the tokens recognized by the lexer ## - tokens = keywords + ( + tokens = keywords + keywords_new + ( # Identifiers 'ID', diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py index d246900..b8ad4b1 100644 --- a/pycparser/c_parser.py +++ b/pycparser/c_parser.py @@ -554,6 +554,16 @@ class CParser(PLYParser): """ p[0] = [] + def p_external_declaration_5(self, p): + """ external_declaration : static_assert + """ + p[0] = p[1] + + def p_static_assert_declaration(self, p): + """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN + """ + p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))] + def p_pp_directive(self, p): """ pp_directive : PPHASH """ @@ -599,6 +609,10 @@ class CParser(PLYParser): param_decls=p[3], body=p[4]) + # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert + # is a declaration, not a statement. We additionally recognise it as a statement + # to fix parsing of _Static_assert inside the functions. + # def p_statement(self, p): """ statement : labeled_statement | expression_statement @@ -607,6 +621,7 @@ class CParser(PLYParser): | iteration_statement | jump_statement | pppragma_directive + | static_assert """ p[0] = p[1] @@ -806,11 +821,13 @@ class CParser(PLYParser): | STATIC | EXTERN | TYPEDEF + | _THREAD_LOCAL """ p[0] = p[1] def p_function_specifier(self, p): """ function_specifier : INLINE + | _NORETURN """ p[0] = p[1] diff --git a/tests/c_files/c11.c b/tests/c_files/c11.c new file mode 100644 index 0000000..854feba --- /dev/null +++ b/tests/c_files/c11.c @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include + +/* C11 thread locals */ +_Thread_local int flag; +thread_local int flag2; + +static_assert(sizeof(flag) == sizeof(flag2), "Really unexpected size difference"); + +noreturn void func2(void) +{ + abort(); +} + +_Noreturn void func(void) +{ + func2(); +} + +int main() +{ + _Static_assert(sizeof(flag) == sizeof(flag2), "Unexpected size difference"); + static_assert(sizeof(flag) == sizeof(flag2), "Unexpected size difference"); + + printf("Flag: %d\n", flag); + printf("Flag2: %d\n", flag2); + func(); +} diff --git a/tests/test_c_generator.py b/tests/test_c_generator.py index 3c724a0..0cffa9e 100644 --- a/tests/test_c_generator.py +++ b/tests/test_c_generator.py @@ -7,6 +7,7 @@ import unittest sys.path.insert(0, '.') from pycparser import c_parser, c_generator, c_ast, parse_file +from tests.test_util import cpp_supported, cpp_path, cpp_args _c_parser = c_parser.CParser( lex_optimize=False, @@ -82,6 +83,9 @@ class TestCtoC(unittest.TestCase): self._assert_ctoc_correct('int a;') self._assert_ctoc_correct('int b, a;') self._assert_ctoc_correct('int c, b, a;') + self._assert_ctoc_correct('auto int a;') + self._assert_ctoc_correct('register int a;') + self._assert_ctoc_correct('_Thread_local int a;') def test_complex_decls(self): self._assert_ctoc_correct('int** (*a)(void);') @@ -246,6 +250,13 @@ class TestCtoC(unittest.TestCase): int array[3] = {[0] = 0, [1] = 1, [1+1] = 2}; ''') + def test_noreturn(self): + self._assert_ctoc_correct(r''' + _Noreturn int x(void) { + abort(); + } + ''') + def test_exprlist_with_semi(self): self._assert_ctoc_correct(r''' void x() { @@ -361,6 +372,10 @@ class TestCtoC(unittest.TestCase): src = 'int x = ' + src + ';' self._assert_ctoc_correct(src) + def test_static_assert(self): + self._assert_ctoc_correct('_Static_assert(sizeof(int) == sizeof(int), "123");') + self._assert_ctoc_correct('int main() { _Static_assert(sizeof(int) == sizeof(int), "123"); } ') + def test_reduce_parentheses_binaryops(self): c1 = 'int x = a + b + c + d;'; self.assertEqual(self._run_c_to_c(c1), 'int x = ((a + b) + c) + d;\n') @@ -408,14 +423,14 @@ class TestCasttoC(unittest.TestCase): self.assertEqual(generator.visit(c_ast.Cast(int_type, test_fun)), '(int) test_fun()') - @unittest.skipUnless(platform.system() == 'Linux', - 'cpp only works on Linux') + @unittest.skipUnless(cpp_supported(), 'cpp only works on Unix') def test_to_type_with_cpp(self): generator = c_generator.CGenerator() test_fun = c_ast.FuncCall(c_ast.ID('test_fun'), c_ast.ExprList([])) memmgr_path = self._find_file('memmgr.h') - ast2 = parse_file(memmgr_path, use_cpp=True) + ast2 = parse_file(memmgr_path, use_cpp=True, + cpp_path = cpp_path(), cpp_args = cpp_args()) void_ptr_type = ast2.ext[-3].type.type void_type = void_ptr_type.type self.assertEqual(generator.visit(c_ast.Cast(void_ptr_type, test_fun)), diff --git a/tests/test_c_parser.py b/tests/test_c_parser.py index 270f2fb..f4a3744 100755 --- a/tests/test_c_parser.py +++ b/tests/test_c_parser.py @@ -40,6 +40,8 @@ def expand_decl(decl): assert isinstance(decl.values, EnumeratorList) values = [enum.name for enum in decl.values.enumerators] return ['Enum', decl.name, values] + elif typ == StaticAssert: + return ['StaticAssert', decl.cond.value, decl.message.value] else: nested = expand_decl(decl.type) @@ -520,6 +522,7 @@ class TestCParser_fundamentals(TestCParser_base): self.assertEqual(d.storage, storage) assert_qs("extern int p;", 0, [], ['extern']) + assert_qs("_Thread_local int p;", 0, [], ['_Thread_local']) assert_qs("const long p = 6;", 0, ['const'], []) d1 = "static const int p, q, r;" @@ -1556,6 +1559,18 @@ class TestCParser_fundamentals(TestCParser_base): [['ID', 'p']], ['TypeDecl', ['IdentifierType', ['int']]]]]) + def test_static_assert(self): + f1 = self.parse(''' + _Static_assert(1, "123"); + int factorial(int p) + { + _Static_assert(2, "456"); + } + ''') + + self.assertEqual(expand_decl(f1.ext[0]), ['StaticAssert', '1', '"123"']) + self.assertEqual(expand_decl(f1.ext[1].body.block_items[0]), ['StaticAssert', '2', '"456"']) + def test_unified_string_literals(self): # simple string, for reference d1 = self.get_decl_init('char* s = "hello";') @@ -1607,6 +1622,10 @@ class TestCParser_fundamentals(TestCParser_base): ps2 = self.parse('static inline void inlinefoo(void);') self.assertEqual(ps2.ext[0].funcspec, ['inline']) + def test_noreturn_specifier(self): + ps2 = self.parse('static _Noreturn void noreturnfoo(void);') + self.assertEqual(ps2.ext[0].funcspec, ['_Noreturn']) + # variable length array def test_vla(self): ps2 = self.parse(r''' diff --git a/tests/test_general.py b/tests/test_general.py index 18e388c..3d43cbf 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -5,9 +5,7 @@ import unittest sys.path.insert(0, '..') from pycparser import parse_file, c_ast - -CPPPATH = 'cpp' - +from tests.test_util import cpp_supported, cpp_path, cpp_args # Test successful parsing # @@ -25,26 +23,22 @@ class TestParsing(unittest.TestCase): ast = parse_file(self._find_file('example_c_file.c')) self.assertIsInstance(ast, c_ast.FileAST) - @unittest.skipUnless(platform.system() == 'Linux', - 'cpp only works on Linux') + @unittest.skipUnless(cpp_supported(), 'cpp only works on Unix') def test_with_cpp(self): memmgr_path = self._find_file('memmgr.c') c_files_path = os.path.dirname(memmgr_path) ast = parse_file(memmgr_path, use_cpp=True, - cpp_path=CPPPATH, - cpp_args='-I%s' % c_files_path) + cpp_path=cpp_path(), cpp_args=cpp_args('-I%s' % c_files_path)) self.assertIsInstance(ast, c_ast.FileAST) fake_libc = os.path.join(c_files_path, '..', '..', 'utils', 'fake_libc_include') ast2 = parse_file(self._find_file('year.c'), use_cpp=True, - cpp_path=CPPPATH, - cpp_args=[r'-I%s' % fake_libc]) + cpp_path=cpp_path(), cpp_args=cpp_args('-I%s' % fake_libc)) self.assertIsInstance(ast2, c_ast.FileAST) - @unittest.skipUnless(platform.system() == 'Linux', - 'cpp only works on Linux') + @unittest.skipUnless(cpp_supported(), 'cpp only works on Unix') def test_cpp_funkydir(self): # This test contains Windows specific path escapes if sys.platform != 'win32': @@ -52,16 +46,23 @@ class TestParsing(unittest.TestCase): c_files_path = os.path.join('tests', 'c_files') ast = parse_file(self._find_file('simplemain.c'), use_cpp=True, - cpp_path=CPPPATH, cpp_args='-I%s' % c_files_path) + cpp_path=cpp_path(), cpp_args=cpp_args('-I%s' % c_files_path)) self.assertIsInstance(ast, c_ast.FileAST) - @unittest.skipUnless(platform.system() == 'Linux', - 'cpp only works on Linux') + @unittest.skipUnless(cpp_supported(), 'cpp only works on Unix') def test_no_real_content_after_cpp(self): ast = parse_file(self._find_file('empty.h'), use_cpp=True, - cpp_path=CPPPATH) + cpp_path=cpp_path(), cpp_args=cpp_args()) self.assertIsInstance(ast, c_ast.FileAST) + @unittest.skipUnless(cpp_supported(), 'cpp only works on Unix') + def test_c11_with_cpp(self): + c_files_path = os.path.join('tests', 'c_files') + fake_libc = os.path.join(c_files_path, '..', '..', + 'utils', 'fake_libc_include') + ast = parse_file(self._find_file('c11.c'), use_cpp=True, + cpp_path=cpp_path(), cpp_args=cpp_args('-I%s' % fake_libc)) + self.assertIsInstance(ast, c_ast.FileAST) if __name__ == '__main__': unittest.main() diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..0b3365c --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,16 @@ +import platform + +def cpp_supported(): + return platform.system() == 'Linux' or platform.system() == 'Darwin' + +def cpp_path(): + if platform.system() == 'Darwin': + return 'gcc' + return 'cpp' + +def cpp_args(args=[]): + if isinstance(args, str): + args = [args] + if platform.system() == 'Darwin': + return ['-E'] + args + return args diff --git a/utils/fake_libc_include/_fake_defines.h b/utils/fake_libc_include/_fake_defines.h index 24cc0ab..f2e8dd4 100644 --- a/utils/fake_libc_include/_fake_defines.h +++ b/utils/fake_libc_include/_fake_defines.h @@ -206,8 +206,6 @@ #define va_arg(_ap, _type) __builtin_va_arg((_ap)) #define va_end(_list) -#endif - /* Vectors */ #define __m128 int #define __m128_u int @@ -227,3 +225,15 @@ #define __m512d_u int #define __m512i int #define __m512i_u int + +/* C11 stdnoreturn.h defines */ +#define __noreturn_is_defined 1 +#define noreturn _Noreturn + +/* C11 threads.h defines */ +#define thread_local _Thread_local + +/* C11 assert.h defines */ +#define static_assert _Static_assert + +#endif diff --git a/utils/fake_libc_include/stdnoreturn.h b/utils/fake_libc_include/stdnoreturn.h new file mode 100644 index 0000000..f952c1d --- /dev/null +++ b/utils/fake_libc_include/stdnoreturn.h @@ -0,0 +1,2 @@ +#include "_fake_defines.h" +#include "_fake_typedefs.h" diff --git a/utils/fake_libc_include/threads.h b/utils/fake_libc_include/threads.h new file mode 100644 index 0000000..f952c1d --- /dev/null +++ b/utils/fake_libc_include/threads.h @@ -0,0 +1,2 @@ +#include "_fake_defines.h" +#include "_fake_typedefs.h" -- cgit v1.2.1