summaryrefslogtreecommitdiff
path: root/Tools/c-analyzer/c_analyzer/parser
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/c-analyzer/c_analyzer/parser')
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/__init__.py0
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/declarations.py339
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/find.py107
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/naive.py179
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/preprocessor.py511
-rw-r--r--Tools/c-analyzer/c_analyzer/parser/source.py34
6 files changed, 0 insertions, 1170 deletions
diff --git a/Tools/c-analyzer/c_analyzer/parser/__init__.py b/Tools/c-analyzer/c_analyzer/parser/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/__init__.py
+++ /dev/null
diff --git a/Tools/c-analyzer/c_analyzer/parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py
deleted file mode 100644
index f37072ccca..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/declarations.py
+++ /dev/null
@@ -1,339 +0,0 @@
-import re
-import shlex
-import subprocess
-
-from ..common.info import UNKNOWN
-
-from . import source
-
-
-IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
-
-TYPE_QUAL = r'(?:const|volatile)'
-
-VAR_TYPE_SPEC = r'''(?:
- void |
- (?:
- (?:(?:un)?signed\s+)?
- (?:
- char |
- short |
- int |
- long |
- long\s+int |
- long\s+long
- ) |
- ) |
- float |
- double |
- {IDENTIFIER} |
- (?:struct|union)\s+{IDENTIFIER}
- )'''
-
-POINTER = rf'''(?:
- (?:\s+const)?\s*[*]
- )'''
-
-#STRUCT = r'''(?:
-# (?:struct|(struct\s+%s))\s*[{]
-# [^}]*
-# [}]
-# )''' % (IDENTIFIER)
-#UNION = r'''(?:
-# (?:union|(union\s+%s))\s*[{]
-# [^}]*
-# [}]
-# )''' % (IDENTIFIER)
-#DECL_SPEC = rf'''(?:
-# ({VAR_TYPE_SPEC}) |
-# ({STRUCT}) |
-# ({UNION})
-# )'''
-
-FUNC_START = rf'''(?:
- (?:
- (?:
- extern |
- static |
- static\s+inline
- )\s+
- )?
- #(?:const\s+)?
- {VAR_TYPE_SPEC}
- )'''
-#GLOBAL_VAR_START = rf'''(?:
-# (?:
-# (?:
-# extern |
-# static
-# )\s+
-# )?
-# (?:
-# {TYPE_QUAL}
-# (?:\s+{TYPE_QUAL})?
-# )?\s+
-# {VAR_TYPE_SPEC}
-# )'''
-GLOBAL_DECL_START_RE = re.compile(rf'''
- ^
- (?:
- ({FUNC_START})
- )
- ''', re.VERBOSE)
-
-LOCAL_VAR_START = rf'''(?:
- (?:
- (?:
- register |
- static
- )\s+
- )?
- (?:
- (?:
- {TYPE_QUAL}
- (?:\s+{TYPE_QUAL})?
- )\s+
- )?
- {VAR_TYPE_SPEC}
- {POINTER}?
- )'''
-LOCAL_STMT_START_RE = re.compile(rf'''
- ^
- (?:
- ({LOCAL_VAR_START})
- )
- ''', re.VERBOSE)
-
-
-def iter_global_declarations(lines):
- """Yield (decl, body) for each global declaration in the given lines.
-
- For function definitions the header is reduced to one line and
- the body is provided as-is. For other compound declarations (e.g.
- struct) the entire declaration is reduced to one line and "body"
- is None. Likewise for simple declarations (e.g. variables).
-
- Declarations inside function bodies are ignored, though their text
- is provided in the function body.
- """
- # XXX Bail out upon bogus syntax.
- lines = source.iter_clean_lines(lines)
- for line in lines:
- if not GLOBAL_DECL_START_RE.match(line):
- continue
- # We only need functions here, since we only need locals for now.
- if line.endswith(';'):
- continue
- if line.endswith('{') and '(' not in line:
- continue
-
- # Capture the function.
- # (assume no func is a one-liner)
- decl = line
- while '{' not in line: # assume no inline structs, etc.
- try:
- line = next(lines)
- except StopIteration:
- return
- decl += ' ' + line
-
- body, end = _extract_block(lines)
- if end is None:
- return
- assert end == '}'
- yield (f'{decl}\n{body}\n{end}', body)
-
-
-def iter_local_statements(lines):
- """Yield (lines, blocks) for each statement in the given lines.
-
- For simple statements, "blocks" is None and the statement is reduced
- to a single line. For compound statements, "blocks" is a pair of
- (header, body) for each block in the statement. The headers are
- reduced to a single line each, but the bpdies are provided as-is.
- """
- # XXX Bail out upon bogus syntax.
- lines = source.iter_clean_lines(lines)
- for line in lines:
- if not LOCAL_STMT_START_RE.match(line):
- continue
-
- stmt = line
- blocks = None
- if not line.endswith(';'):
- # XXX Support compound & multiline simple statements.
- #blocks = []
- continue
-
- yield (stmt, blocks)
-
-
-def _extract_block(lines):
- end = None
- depth = 1
- body = []
- for line in lines:
- depth += line.count('{') - line.count('}')
- if depth == 0:
- end = line
- break
- body.append(line)
- return '\n'.join(body), end
-
-
-def parse_func(stmt, body):
- """Return (name, signature) for the given function definition."""
- header, _, end = stmt.partition(body)
- assert end.strip() == '}'
- assert header.strip().endswith('{')
- header, _, _= header.rpartition('{')
-
- signature = ' '.join(header.strip().splitlines())
-
- _, _, name = signature.split('(')[0].strip().rpartition(' ')
- assert name
-
- return name, signature
-
-
-#TYPE_SPEC = rf'''(?:
-# )'''
-#VAR_DECLARATOR = rf'''(?:
-# )'''
-#VAR_DECL = rf'''(?:
-# {TYPE_SPEC}+
-# {VAR_DECLARATOR}
-# \s*
-# )'''
-#VAR_DECLARATION = rf'''(?:
-# {VAR_DECL}
-# (?: = [^=] [^;]* )?
-# ;
-# )'''
-#
-#
-#def parse_variable(decl, *, inFunc=False):
-# """Return [(name, storage, vartype)] for the given variable declaration."""
-# ...
-
-
-def _parse_var(stmt):
- """Return (name, vartype) for the given variable declaration."""
- stmt = stmt.rstrip(';')
- m = LOCAL_STMT_START_RE.match(stmt)
- assert m
- vartype = m.group(0)
- name = stmt[len(vartype):].partition('=')[0].strip()
-
- if name.startswith('('):
- name, _, after = name[1:].partition(')')
- assert after
- name = name.replace('*', '* ')
- inside, _, name = name.strip().rpartition(' ')
- vartype = f'{vartype} ({inside.strip()}){after}'
- else:
- name = name.replace('*', '* ')
- before, _, name = name.rpartition(' ')
- vartype = f'{vartype} {before}'
-
- vartype = vartype.strip()
- while ' ' in vartype:
- vartype = vartype.replace(' ', ' ')
-
- return name, vartype
-
-
-def extract_storage(decl, *, infunc=None):
- """Return (storage, vartype) based on the given declaration.
-
- The default storage is "implicit" (or "local" if infunc is True).
- """
- if decl == UNKNOWN:
- return decl
- if decl.startswith('static '):
- return 'static'
- #return 'static', decl.partition(' ')[2].strip()
- elif decl.startswith('extern '):
- return 'extern'
- #return 'extern', decl.partition(' ')[2].strip()
- elif re.match('.*\b(static|extern)\b', decl):
- raise NotImplementedError
- elif infunc:
- return 'local'
- else:
- return 'implicit'
-
-
-def parse_compound(stmt, blocks):
- """Return (headers, bodies) for the given compound statement."""
- # XXX Identify declarations inside compound statements
- # (if/switch/for/while).
- raise NotImplementedError
-
-
-def iter_variables(filename, *,
- preprocessed=False,
- _iter_source_lines=source.iter_lines,
- _iter_global=iter_global_declarations,
- _iter_local=iter_local_statements,
- _parse_func=parse_func,
- _parse_var=_parse_var,
- _parse_compound=parse_compound,
- ):
- """Yield (funcname, name, vartype) for every variable in the given file."""
- if preprocessed:
- raise NotImplementedError
- lines = _iter_source_lines(filename)
- for stmt, body in _iter_global(lines):
- # At the file top-level we only have to worry about vars & funcs.
- if not body:
- name, vartype = _parse_var(stmt)
- if name:
- yield (None, name, vartype)
- else:
- funcname, _ = _parse_func(stmt, body)
- localvars = _iter_locals(body,
- _iter_statements=_iter_local,
- _parse_var=_parse_var,
- _parse_compound=_parse_compound,
- )
- for name, vartype in localvars:
- yield (funcname, name, vartype)
-
-
-def _iter_locals(lines, *,
- _iter_statements=iter_local_statements,
- _parse_var=_parse_var,
- _parse_compound=parse_compound,
- ):
- compound = [lines]
- while compound:
- body = compound.pop(0)
- bodylines = body.splitlines()
- for stmt, blocks in _iter_statements(bodylines):
- if not blocks:
- name, vartype = _parse_var(stmt)
- if name:
- yield (name, vartype)
- else:
- headers, bodies = _parse_compound(stmt, blocks)
- for header in headers:
- for line in header:
- name, vartype = _parse_var(line)
- if name:
- yield (name, vartype)
- compound.extend(bodies)
-
-
-def iter_all(filename, *,
- preprocessed=False,
- ):
- """Yield a Declaration for each one found.
-
- If there are duplicates, due to preprocessor conditionals, then
- they are checked to make sure they are the same.
- """
- # XXX For the moment we cheat.
- for funcname, name, decl in iter_variables(filename,
- preprocessed=preprocessed):
- yield 'variable', funcname, name, decl
diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py
deleted file mode 100644
index 3860d3d459..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/find.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from ..common.info import UNKNOWN, ID
-
-from . import declarations
-
-# XXX need tests:
-# * variables
-# * variable
-# * variable_from_id
-
-
-def _iter_vars(filenames, preprocessed, *,
- handle_id=None,
- _iter_decls=declarations.iter_all,
- ):
- if handle_id is None:
- handle_id = ID
-
- for filename in filenames or ():
- for kind, funcname, name, decl in _iter_decls(filename,
- preprocessed=preprocessed,
- ):
- if kind != 'variable':
- continue
- varid = handle_id(filename, funcname, name)
- yield varid, decl
-
-
-# XXX Add a "handle_var" arg like we did for get_resolver()?
-
-def variables(*filenames,
- perfilecache=None,
- preprocessed=False,
- known=None, # for types
- handle_id=None,
- _iter_vars=_iter_vars,
- ):
- """Yield (varid, decl) for each variable found in the given files.
-
- If "preprocessed" is provided (and not False/None) then it is used
- to decide which tool to use to parse the source code after it runs
- through the C preprocessor. Otherwise the raw
- """
- if len(filenames) == 1 and not (filenames[0], str):
- filenames, = filenames
-
- if perfilecache is None:
- yield from _iter_vars(filenames, preprocessed)
- else:
- # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
- raise NotImplementedError
-
-
-def variable(name, filenames, *,
- local=False,
- perfilecache=None,
- preprocessed=False,
- handle_id=None,
- _iter_vars=variables,
- ):
- """Return (varid, decl) for the first found variable that matches.
-
- If "local" is True then the first matching local variable in the
- file will always be returned. To avoid that, pass perfilecache and
- pop each variable from the cache after using it.
- """
- for varid, decl in _iter_vars(filenames,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- ):
- if varid.name != name:
- continue
- if local:
- if varid.funcname:
- if varid.funcname == UNKNOWN:
- raise NotImplementedError
- return varid, decl
- elif not varid.funcname:
- return varid, decl
- else:
- return None, None # No matching variable was found.
-
-
-def variable_from_id(id, filenames, *,
- perfilecache=None,
- preprocessed=False,
- handle_id=None,
- _get_var=variable,
- ):
- """Return (varid, decl) for the first found variable that matches."""
- local = False
- if isinstance(id, str):
- name = id
- else:
- if id.funcname == UNKNOWN:
- local = True
- elif id.funcname:
- raise NotImplementedError
-
- name = id.name
- if id.filename and id.filename != UNKNOWN:
- filenames = [id.filename]
- return _get_var(name, filenames,
- local=local,
- perfilecache=perfilecache,
- preprocessed=preprocessed,
- handle_id=handle_id,
- )
diff --git a/Tools/c-analyzer/c_analyzer/parser/naive.py b/Tools/c-analyzer/c_analyzer/parser/naive.py
deleted file mode 100644
index 4a4822d84f..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/naive.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import re
-
-from ..common.info import UNKNOWN, ID
-
-from .preprocessor import _iter_clean_lines
-
-
-_NOT_SET = object()
-
-
-def get_srclines(filename, *,
- cache=None,
- _open=open,
- _iter_lines=_iter_clean_lines,
- ):
- """Return the file's lines as a list.
-
- Each line will have trailing whitespace removed (including newline).
-
- If a cache is given the it is used.
- """
- if cache is not None:
- try:
- return cache[filename]
- except KeyError:
- pass
-
- with _open(filename) as srcfile:
- srclines = [line
- for _, line in _iter_lines(srcfile)
- if not line.startswith('#')]
- for i, line in enumerate(srclines):
- srclines[i] = line.rstrip()
-
- if cache is not None:
- cache[filename] = srclines
- return srclines
-
-
-def parse_variable_declaration(srcline):
- """Return (name, decl) for the given declaration line."""
- # XXX possible false negatives...
- decl, sep, _ = srcline.partition('=')
- if not sep:
- if not srcline.endswith(';'):
- return None, None
- decl = decl.strip(';')
- decl = decl.strip()
- m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
- if not m:
- return None, None
- name = m.group(1)
- return name, decl
-
-
-def parse_variable(srcline, funcname=None):
- """Return (varid, decl) for the variable declared on the line (or None)."""
- line = srcline.strip()
-
- # XXX Handle more than just static variables.
- if line.startswith('static '):
- if '(' in line and '[' not in line:
- # a function
- return None, None
- return parse_variable_declaration(line)
- else:
- return None, None
-
-
-def iter_variables(filename, *,
- srccache=None,
- parse_variable=None,
- _get_srclines=get_srclines,
- _default_parse_variable=parse_variable,
- ):
- """Yield (varid, decl) for each variable in the given source file."""
- if parse_variable is None:
- parse_variable = _default_parse_variable
-
- indent = ''
- prev = ''
- funcname = None
- for line in _get_srclines(filename, cache=srccache):
- # remember current funcname
- if funcname:
- if line == indent + '}':
- funcname = None
- continue
- else:
- if '(' in prev and line == indent + '{':
- if not prev.startswith('__attribute__'):
- funcname = prev.split('(')[0].split()[-1]
- prev = ''
- continue
- indent = line[:-len(line.lstrip())]
- prev = line
-
- info = parse_variable(line, funcname)
- if isinstance(info, list):
- for name, _funcname, decl in info:
- yield ID(filename, _funcname, name), decl
- continue
- name, decl = info
-
- if name is None:
- continue
- yield ID(filename, funcname, name), decl
-
-
-def _match_varid(variable, name, funcname, ignored=None):
- if ignored and variable in ignored:
- return False
-
- if variable.name != name:
- return False
-
- if funcname == UNKNOWN:
- if not variable.funcname:
- return False
- elif variable.funcname != funcname:
- return False
-
- return True
-
-
-def find_variable(filename, funcname, name, *,
- ignored=None,
- srccache=None, # {filename: lines}
- parse_variable=None,
- _iter_variables=iter_variables,
- ):
- """Return the matching variable.
-
- Return None if the variable is not found.
- """
- for varid, decl in _iter_variables(filename,
- srccache=srccache,
- parse_variable=parse_variable,
- ):
- if _match_varid(varid, name, funcname, ignored):
- return varid, decl
- else:
- return None
-
-
-def find_variables(varids, filenames=None, *,
- srccache=_NOT_SET,
- parse_variable=None,
- _find_symbol=find_variable,
- ):
- """Yield (varid, decl) for each ID.
-
- If the variable is not found then its decl will be UNKNOWN. That
- way there will be one resulting variable per given ID.
- """
- if srccache is _NOT_SET:
- srccache = {}
-
- used = set()
- for varid in varids:
- if varid.filename and varid.filename != UNKNOWN:
- srcfiles = [varid.filename]
- else:
- if not filenames:
- yield varid, UNKNOWN
- continue
- srcfiles = filenames
- for filename in srcfiles:
- varid, decl = _find_varid(filename, varid.funcname, varid.name,
- ignored=used,
- srccache=srccache,
- parse_variable=parse_variable,
- )
- if varid:
- yield varid, decl
- used.add(varid)
- break
- else:
- yield varid, UNKNOWN
diff --git a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
deleted file mode 100644
index 41f306e5f8..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/preprocessor.py
+++ /dev/null
@@ -1,511 +0,0 @@
-from collections import namedtuple
-import shlex
-import os
-import re
-
-from ..common import util, info
-
-
-CONTINUATION = '\\' + os.linesep
-
-IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
-IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
-
-
-def _coerce_str(value):
- if not value:
- return ''
- return str(value).strip()
-
-
-#############################
-# directives
-
-DIRECTIVE_START = r'''
- (?:
- ^ \s*
- [#] \s*
- )'''
-DIRECTIVE_TEXT = r'''
- (?:
- (?: \s+ ( .*\S ) )?
- \s* $
- )'''
-DIRECTIVE = rf'''
- (?:
- {DIRECTIVE_START}
- (
- include |
- error | warning |
- pragma |
- define | undef |
- if | ifdef | ifndef | elseif | else | endif |
- __FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
- )
- {DIRECTIVE_TEXT}
- )'''
-# (?:
-# [^\\\n] |
-# \\ [^\n] |
-# \\ \n
-# )+
-# ) \n
-# )'''
-DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
-
-DEFINE = rf'''
- (?:
- {DIRECTIVE_START} define \s+
- (?:
- ( \w*[a-zA-Z]\w* )
- (?: \s* [(] ([^)]*) [)] )?
- )
- {DIRECTIVE_TEXT}
- )'''
-DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
-
-
-def parse_directive(line):
- """Return the appropriate directive for the given line."""
- line = line.strip()
- if line.startswith('#'):
- line = line[1:].lstrip()
- line = '#' + line
- directive = line
- #directive = '#' + line
- while ' ' in directive:
- directive = directive.replace(' ', ' ')
- return _parse_directive(directive)
-
-
-def _parse_directive(line):
- m = DEFINE_RE.match(line)
- if m:
- name, args, text = m.groups()
- if args:
- args = [a.strip() for a in args.split(',')]
- return Macro(name, args, text)
- else:
- return Constant(name, text)
-
- m = DIRECTIVE_RE.match(line)
- if not m:
- raise ValueError(f'unsupported directive {line!r}')
- kind, text = m.groups()
- if not text:
- if kind not in ('else', 'endif'):
- raise ValueError(f'missing text in directive {line!r}')
- elif kind in ('else', 'endif', 'define'):
- raise ValueError(f'unexpected text in directive {line!r}')
- if kind == 'include':
- directive = Include(text)
- elif kind in IfDirective.KINDS:
- directive = IfDirective(kind, text)
- else:
- directive = OtherDirective(kind, text)
- directive.validate()
- return directive
-
-
-class PreprocessorDirective(util._NTBase):
- """The base class for directives."""
-
- __slots__ = ()
-
- KINDS = frozenset([
- 'include',
- 'pragma',
- 'error', 'warning',
- 'define', 'undef',
- 'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
- '__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
- ])
-
- @property
- def text(self):
- return ' '.join(v for v in self[1:] if v and v.strip()) or None
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.kind:
- raise TypeError('missing kind')
- elif self.kind not in self.KINDS:
- raise ValueError
-
- # text can be anything, including None.
-
-
-class Constant(PreprocessorDirective,
- namedtuple('Constant', 'kind name value')):
- """A single "constant" directive ("define")."""
-
- __slots__ = ()
-
- def __new__(cls, name, value=None):
- self = super().__new__(
- cls,
- 'define',
- name=_coerce_str(name) or None,
- value=_coerce_str(value) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.name:
- raise TypeError('missing name')
- elif not IDENTIFIER_RE.match(self.name):
- raise ValueError(f'name must be identifier, got {self.name!r}')
-
- # value can be anything, including None
-
-
-class Macro(PreprocessorDirective,
- namedtuple('Macro', 'kind name args body')):
- """A single "macro" directive ("define")."""
-
- __slots__ = ()
-
- def __new__(cls, name, args, body=None):
- # "args" must be a string or an iterable of strings (or "empty").
- if isinstance(args, str):
- args = [v.strip() for v in args.split(',')]
- if args:
- args = tuple(_coerce_str(a) or None for a in args)
- self = super().__new__(
- cls,
- kind='define',
- name=_coerce_str(name) or None,
- args=args if args else (),
- body=_coerce_str(body) or None,
- )
- return self
-
- @property
- def text(self):
- if self.body:
- return f'{self.name}({", ".join(self.args)}) {self.body}'
- else:
- return f'{self.name}({", ".join(self.args)})'
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.name:
- raise TypeError('missing name')
- elif not IDENTIFIER_RE.match(self.name):
- raise ValueError(f'name must be identifier, got {self.name!r}')
-
- for arg in self.args:
- if not arg:
- raise ValueError(f'missing arg in {self.args}')
- elif not IDENTIFIER_RE.match(arg):
- raise ValueError(f'arg must be identifier, got {arg!r}')
-
- # body can be anything, including None
-
-
-class IfDirective(PreprocessorDirective,
- namedtuple('IfDirective', 'kind condition')):
- """A single conditional directive (e.g. "if", "ifdef").
-
- This only includes directives that actually provide conditions. The
- related directives "else" and "endif" are covered by OtherDirective
- instead.
- """
-
- __slots__ = ()
-
- KINDS = frozenset([
- 'if',
- 'ifdef',
- 'ifndef',
- 'elseif',
- ])
-
- @classmethod
- def _condition_from_raw(cls, raw, kind):
- #return Condition.from_raw(raw, _kind=kind)
- condition = _coerce_str(raw)
- if not condition:
- return None
-
- if kind == 'ifdef':
- condition = f'defined({condition})'
- elif kind == 'ifndef':
- condition = f'! defined({condition})'
-
- return condition
-
- def __new__(cls, kind, condition):
- kind = _coerce_str(kind)
- self = super().__new__(
- cls,
- kind=kind or None,
- condition=cls._condition_from_raw(condition, kind),
- )
- return self
-
- @property
- def text(self):
- if self.kind == 'ifdef':
- return self.condition[8:-1] # strip "defined("
- elif self.kind == 'ifndef':
- return self.condition[10:-1] # strip "! defined("
- else:
- return self.condition
- #return str(self.condition)
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.condition:
- raise TypeError('missing condition')
- #else:
- # for cond in self.condition:
- # if not cond:
- # raise ValueError(f'missing condition in {self.condition}')
- # cond.validate()
- # if self.kind in ('ifdef', 'ifndef'):
- # if len(self.condition) != 1:
- # raise ValueError('too many condition')
- # if self.kind == 'ifdef':
- # if not self.condition[0].startswith('defined '):
- # raise ValueError('bad condition')
- # else:
- # if not self.condition[0].startswith('! defined '):
- # raise ValueError('bad condition')
-
-
-class Include(PreprocessorDirective,
- namedtuple('Include', 'kind file')):
- """A single "include" directive.
-
- Supported "file" values are either follow the bracket style
- (<stdio>) or double quotes ("spam.h").
- """
-
- __slots__ = ()
-
- def __new__(cls, file):
- self = super().__new__(
- cls,
- kind='include',
- file=_coerce_str(file) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if not self.file:
- raise TypeError('missing file')
-
-
-class OtherDirective(PreprocessorDirective,
- namedtuple('OtherDirective', 'kind text')):
- """A single directive not covered by another class.
-
- This includes the "else", "endif", and "undef" directives, which are
- otherwise inherently related to the directives covered by the
- Constant, Macro, and IfCondition classes.
-
- Note that all directives must have a text value, except for "else"
- and "endif" (which must have no text).
- """
-
- __slots__ = ()
-
- KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
-
- def __new__(cls, kind, text):
- self = super().__new__(
- cls,
- kind=_coerce_str(kind) or None,
- text=_coerce_str(text) or None,
- )
- return self
-
- def validate(self):
- """Fail if the object is invalid (i.e. init with bad data)."""
- super().validate()
-
- if self.text:
- if self.kind in ('else', 'endif'):
- raise ValueError('unexpected text in directive')
- elif self.kind not in ('else', 'endif'):
- raise TypeError('missing text')
-
-
-#############################
-# iterating lines
-
-def _recompute_conditions(directive, ifstack):
- if directive.kind in ('if', 'ifdef', 'ifndef'):
- ifstack.append(
- ([], directive.condition))
- elif directive.kind == 'elseif':
- if ifstack:
- negated, active = ifstack.pop()
- if active:
- negated.append(active)
- else:
- negated = []
- ifstack.append(
- (negated, directive.condition))
- elif directive.kind == 'else':
- if ifstack:
- negated, active = ifstack.pop()
- if active:
- negated.append(active)
- ifstack.append(
- (negated, None))
- elif directive.kind == 'endif':
- if ifstack:
- ifstack.pop()
-
- conditions = []
- for negated, active in ifstack:
- for condition in negated:
- conditions.append(f'! ({condition})')
- if active:
- conditions.append(active)
- return tuple(conditions)
-
-
-def _iter_clean_lines(lines):
- lines = iter(enumerate(lines, 1))
- for lno, line in lines:
- # Handle line continuations.
- while line.endswith(CONTINUATION):
- try:
- lno, _line = next(lines)
- except StopIteration:
- break
- line = line[:-len(CONTINUATION)] + ' ' + _line
-
- # Deal with comments.
- after = line
- line = ''
- while True:
- # Look for a comment.
- before, begin, remainder = after.partition('/*')
- if '//' in before:
- before, _, _ = before.partition('//')
- line += before + ' ' # per the C99 spec
- break
- line += before
- if not begin:
- break
- line += ' ' # per the C99 spec
-
- # Go until we find the end of the comment.
- _, end, after = remainder.partition('*/')
- while not end:
- try:
- lno, remainder = next(lines)
- except StopIteration:
- raise Exception('unterminated comment')
- _, end, after = remainder.partition('*/')
-
- yield lno, line
-
-
-def iter_lines(lines, *,
- _iter_clean_lines=_iter_clean_lines,
- _parse_directive=_parse_directive,
- _recompute_conditions=_recompute_conditions,
- ):
- """Yield (lno, line, directive, active conditions) for each given line.
-
- This is effectively a subset of the operations taking place in
- translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
- section 5.1.1.2. Line continuations are removed and comments
- replaced with a single space. (In both cases "lno" will be the last
- line involved.) Otherwise each line is returned as-is.
-
- "lno" is the (1-indexed) line number for the line.
-
- "directive" will be a PreprocessorDirective or None, depending on
- whether or not there is a directive on the line.
-
- "active conditions" is the set of preprocessor conditions (e.g.
- "defined()") under which the current line of code will be included
- in compilation. That set is derived from every conditional
- directive block (e.g. "if defined()", "ifdef", "else") containing
- that line. That includes nested directives. Note that the
- current line does not affect the active conditions for iteself.
- It only impacts subsequent lines. That applies to directives
- that close blocks (e.g. "endif") just as much as conditional
- directvies. Also note that "else" and "elseif" directives
- update the active conditions (for later lines), rather than
- adding to them.
- """
- ifstack = []
- conditions = ()
- for lno, line in _iter_clean_lines(lines):
- stripped = line.strip()
- if not stripped.startswith('#'):
- yield lno, line, None, conditions
- continue
-
- directive = '#' + stripped[1:].lstrip()
- while ' ' in directive:
- directive = directive.replace(' ', ' ')
- directive = _parse_directive(directive)
- yield lno, line, directive, conditions
-
- if directive.kind in ('else', 'endif'):
- conditions = _recompute_conditions(directive, ifstack)
- elif isinstance(directive, IfDirective):
- conditions = _recompute_conditions(directive, ifstack)
-
-
-#############################
-# running (platform-specific?)
-
-def _gcc(filename, *,
- _get_argv=(lambda: _get_gcc_argv()),
- _run=util.run_cmd,
- ):
- argv = _get_argv()
- argv.extend([
- '-E', filename,
- ])
- output = _run(argv)
- return output
-
-
-def _get_gcc_argv(*,
- _open=open,
- _run=util.run_cmd,
- ):
- with _open('/tmp/print.mk', 'w') as tmpfile:
- tmpfile.write('print-%:\n')
- #tmpfile.write('\t@echo $* = $($*)\n')
- tmpfile.write('\t@echo $($*)\n')
- argv = ['/usr/bin/make',
- '-f', 'Makefile',
- '-f', '/tmp/print.mk',
- 'print-CC',
- 'print-PY_CORE_CFLAGS',
- ]
- output = _run(argv)
- gcc, cflags = output.strip().splitlines()
- argv = shlex.split(gcc.strip())
- cflags = shlex.split(cflags.strip())
- return argv + cflags
-
-
-def run(filename, *,
- _gcc=_gcc,
- ):
- """Return the text of the given file after running the preprocessor."""
- return _gcc(filename)
diff --git a/Tools/c-analyzer/c_analyzer/parser/source.py b/Tools/c-analyzer/c_analyzer/parser/source.py
deleted file mode 100644
index f8998c8a33..0000000000
--- a/Tools/c-analyzer/c_analyzer/parser/source.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from . import preprocessor
-
-
-def iter_clean_lines(lines):
- incomment = False
- for line in lines:
- # Deal with comments.
- if incomment:
- _, sep, line = line.partition('*/')
- if sep:
- incomment = False
- continue
- line, _, _ = line.partition('//')
- line, sep, remainder = line.partition('/*')
- if sep:
- _, sep, after = remainder.partition('*/')
- if not sep:
- incomment = True
- continue
- line += ' ' + after
-
- # Ignore blank lines and leading/trailing whitespace.
- line = line.strip()
- if not line:
- continue
-
- yield line
-
-
-def iter_lines(filename, *,
- preprocess=preprocessor.run,
- ):
- content = preprocess(filename)
- return iter(content.splitlines())