diff options
author | Anthony Sottile <asottile@umich.edu> | 2019-07-30 05:20:30 -0700 |
---|---|---|
committer | Steven Myint <git@stevenmyint.com> | 2019-07-30 05:20:30 -0700 |
commit | eeb626328dafd600a30a79c3a37e8b8f4387a187 (patch) | |
tree | c813fd39b073dc9abc7e0180482fae4cf160a149 | |
parent | c0193b288daf817a113fe4b16fa12d8e6be6fec8 (diff) | |
download | pyflakes-eeb626328dafd600a30a79c3a37e8b8f4387a187.tar.gz |
String formatting linting (#443)
* Add lint rule for f-strings without placeholders
* Add linting for string.format(...)
* Add linting for % formatting
-rw-r--r-- | pyflakes/checker.py | 347 | ||||
-rw-r--r-- | pyflakes/messages.py | 96 | ||||
-rw-r--r-- | pyflakes/test/test_other.py | 153 |
3 files changed, 593 insertions, 3 deletions
diff --git a/pyflakes/checker.py b/pyflakes/checker.py index acc2ae7..f9ae0aa 100644 --- a/pyflakes/checker.py +++ b/pyflakes/checker.py @@ -12,6 +12,7 @@ import doctest import functools import os import re +import string import sys import tokenize @@ -29,6 +30,8 @@ except AttributeError: builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins')) +parse_format_string = string.Formatter().parse + if PY2: tokenize_tokenize = tokenize.generate_tokens else: @@ -81,6 +84,87 @@ TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)') TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$') +MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)') +CONVERSION_FLAG_RE = re.compile('[#0+ -]*') +WIDTH_RE = re.compile(r'(?:\*|\d*)') +PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?') +LENGTH_RE = re.compile('[hlL]?') +# https://docs.python.org/3/library/stdtypes.html#old-string-formatting +VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%') + + +def _must_match(regex, string, pos): + # type: (Pattern[str], str, int) -> Match[str] + match = regex.match(string, pos) + assert match is not None + return match + + +def parse_percent_format(s): # type: (str) -> Tuple[PercentFormat, ...] + """Parses the string component of a `'...' % ...` format call + + Copied from https://github.com/asottile/pyupgrade at v1.20.1 + """ + + def _parse_inner(): + # type: () -> Generator[PercentFormat, None, None] + string_start = 0 + string_end = 0 + in_fmt = False + + i = 0 + while i < len(s): + if not in_fmt: + try: + i = s.index('%', i) + except ValueError: # no more % fields! + yield s[string_start:], None + return + else: + string_end = i + i += 1 + in_fmt = True + else: + key_match = MAPPING_KEY_RE.match(s, i) + if key_match: + key = key_match.group(1) # type: Optional[str] + i = key_match.end() + else: + key = None + + conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i) + conversion_flag = conversion_flag_match.group() or None + i = conversion_flag_match.end() + + width_match = _must_match(WIDTH_RE, s, i) + width = width_match.group() or None + i = width_match.end() + + precision_match = _must_match(PRECISION_RE, s, i) + precision = precision_match.group() or None + i = precision_match.end() + + # length modifier is ignored + i = _must_match(LENGTH_RE, s, i).end() + + try: + conversion = s[i] + except IndexError: + raise ValueError('end-of-string while parsing format') + i += 1 + + fmt = (key, conversion_flag, width, precision, conversion) + yield s[string_start:string_end], fmt + + in_fmt = False + string_start = i + + if in_fmt: + raise ValueError('end-of-string while parsing format') + + return tuple(_parse_inner()) + + class _FieldsOrder(dict): """Fix order of AST node fields.""" @@ -1241,10 +1325,250 @@ class Checker(object): PASS = ignore # "expr" type nodes - BOOLOP = BINOP = UNARYOP = IFEXP = SET = \ - CALL = REPR = ATTRIBUTE = SUBSCRIPT = \ + BOOLOP = UNARYOP = IFEXP = SET = \ + REPR = ATTRIBUTE = SUBSCRIPT = \ STARRED = NAMECONSTANT = handleChildren + def _handle_string_dot_format(self, node): + try: + placeholders = tuple(parse_format_string(node.func.value.s)) + except ValueError as e: + self.report(messages.StringDotFormatInvalidFormat, node, e) + return + + class state: # py2-compatible `nonlocal` + auto = None + next_auto = 0 + + placeholder_positional = set() + placeholder_named = set() + + def _add_key(fmtkey): + """Returns True if there is an error which should early-exit""" + if fmtkey is None: # end of string or `{` / `}` escapes + return False + + # attributes / indices are allowed in `.format(...)` + fmtkey, _, _ = fmtkey.partition('.') + fmtkey, _, _ = fmtkey.partition('[') + + try: + fmtkey = int(fmtkey) + except ValueError: + pass + else: # fmtkey was an integer + if state.auto is True: + self.report(messages.StringDotFormatMixingAutomatic, node) + return True + else: + state.auto = False + + if fmtkey == '': + if state.auto is False: + self.report(messages.StringDotFormatMixingAutomatic, node) + return True + else: + state.auto = True + + fmtkey = state.next_auto + state.next_auto += 1 + + if isinstance(fmtkey, int): + placeholder_positional.add(fmtkey) + else: + placeholder_named.add(fmtkey) + + return False + + for _, fmtkey, spec, _ in placeholders: + if _add_key(fmtkey): + return + + # spec can also contain format specifiers + if spec is not None: + try: + spec_placeholders = tuple(parse_format_string(spec)) + except ValueError as e: + self.report(messages.StringDotFormatInvalidFormat, node, e) + return + + for _, spec_fmtkey, spec_spec, _ in spec_placeholders: + # can't recurse again + if spec_spec is not None and '{' in spec_spec: + self.report( + messages.StringDotFormatInvalidFormat, + node, + 'Max string recursion exceeded', + ) + return + if _add_key(spec_fmtkey): + return + + # bail early if there is *args or **kwargs + if ( + # python 2.x *args / **kwargs + getattr(node, 'starargs', None) or + getattr(node, 'kwargs', None) or + # python 3.x *args + any( + isinstance(arg, getattr(ast, 'Starred', ())) + for arg in node.args + ) or + # python 3.x **kwargs + any(kwd.arg is None for kwd in node.keywords) + ): + return + + substitution_positional = set(range(len(node.args))) + substitution_named = {kwd.arg for kwd in node.keywords} + + extra_positional = substitution_positional - placeholder_positional + extra_named = substitution_named - placeholder_named + + missing_arguments = ( + (placeholder_positional | placeholder_named) - + (substitution_positional | substitution_named) + ) + + if extra_positional: + self.report( + messages.StringDotFormatExtraPositionalArguments, + node, + ', '.join(sorted(str(x) for x in extra_positional)), + ) + if extra_named: + self.report( + messages.StringDotFormatExtraNamedArguments, + node, + ', '.join(sorted(extra_named)), + ) + if missing_arguments: + self.report( + messages.StringDotFormatMissingArgument, + node, + ', '.join(sorted(str(x) for x in missing_arguments)), + ) + + def CALL(self, node): + if ( + isinstance(node.func, ast.Attribute) and + isinstance(node.func.value, ast.Str) and + node.func.attr == 'format' + ): + self._handle_string_dot_format(node) + self.handleChildren(node) + + def _handle_percent_format(self, node): + try: + placeholders = parse_percent_format(node.left.s) + except ValueError: + self.report( + messages.PercentFormatInvalidFormat, + node, + 'incomplete format', + ) + return + + named = set() + positional_count = 0 + positional = None + for _, placeholder in placeholders: + if placeholder is None: + continue + name, _, width, precision, conversion = placeholder + + if conversion == '%': + continue + + if conversion not in VALID_CONVERSIONS: + self.report( + messages.PercentFormatUnsupportedFormatCharacter, + node, + conversion, + ) + + if positional is None and conversion: + positional = name is None + + for part in (width, precision): + if part is not None and '*' in part: + if not positional: + self.report( + messages.PercentFormatStarRequiresSequence, + node, + ) + else: + positional_count += 1 + + if positional and name is not None: + self.report( + messages.PercentFormatMixedPositionalAndNamed, + node, + ) + return + elif not positional and name is None: + self.report( + messages.PercentFormatMixedPositionalAndNamed, + node, + ) + return + + if positional: + positional_count += 1 + else: + named.add(name) + + if ( + isinstance(node.right, (ast.List, ast.Tuple)) and + # does not have any *splats (py35+ feature) + not any( + isinstance(elt, getattr(ast, 'Starred', ())) + for elt in node.right.elts + ) + ): + substitution_count = len(node.right.elts) + if positional and positional_count != substitution_count: + self.report( + messages.PercentFormatPositionalCountMismatch, + node, + positional_count, + substitution_count, + ) + elif not positional: + self.report(messages.PercentFormatExpectedMapping, node) + + if ( + isinstance(node.right, ast.Dict) and + all(isinstance(k, ast.Str) for k in node.right.keys) + ): + if positional and positional_count > 1: + self.report(messages.PercentFormatExpectedSequence, node) + return + + substitution_keys = {k.s for k in node.right.keys} + extra_keys = substitution_keys - named + missing_keys = named - substitution_keys + if not positional and extra_keys: + self.report( + messages.PercentFormatExtraNamedArguments, + node, + ', '.join(sorted(extra_keys)), + ) + if not positional and missing_keys: + self.report( + messages.PercentFormatMissingArgument, + node, + ', '.join(sorted(missing_keys)), + ) + + def BINOP(self, node): + if ( + isinstance(node.op, ast.Mod) and + isinstance(node.left, ast.Str) + ): + self._handle_percent_format(node) + self.handleChildren(node) + NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore # "slice" type nodes @@ -1273,7 +1597,24 @@ class Checker(object): self.report(messages.RaiseNotImplemented, node) # additional node types - COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren + COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren + + _in_fstring = False + + def JOINEDSTR(self, node): + if ( + # the conversion / etc. flags are parsed as f-strings without + # placeholders + not self._in_fstring and + not any(isinstance(x, ast.FormattedValue) for x in node.values) + ): + self.report(messages.FStringMissingPlaceholders, node) + + self._in_fstring, orig = True, self._in_fstring + try: + self.handleChildren(node) + finally: + self._in_fstring = orig def DICT(self, node): # Complain if there are duplicate keys with different values diff --git a/pyflakes/messages.py b/pyflakes/messages.py index 8ae545f..cf67cf8 100644 --- a/pyflakes/messages.py +++ b/pyflakes/messages.py @@ -266,3 +266,99 @@ class InvalidPrintSyntax(Message): class IsLiteral(Message): message = 'use ==/!= to compare str, bytes, and int literals' + + +class FStringMissingPlaceholders(Message): + message = 'f-string is missing placeholders' + + +class StringDotFormatExtraPositionalArguments(Message): + message = "'...'.format(...) has unused arguments at position(s): %s" + + def __init__(self, filename, loc, extra_positions): + Message.__init__(self, filename, loc) + self.message_args = (extra_positions,) + + +class StringDotFormatExtraNamedArguments(Message): + message = "'...'.format(...) has unused named argument(s): %s" + + def __init__(self, filename, loc, extra_keywords): + Message.__init__(self, filename, loc) + self.message_args = (extra_keywords,) + + +class StringDotFormatMissingArgument(Message): + message = "'...'.format(...) is missing argument(s) for placeholder(s): %s" + + def __init__(self, filename, loc, missing_arguments): + Message.__init__(self, filename, loc) + self.message_args = (missing_arguments,) + + +class StringDotFormatMixingAutomatic(Message): + message = "'...'.format(...) mixes automatic and manual numbering" + + +class StringDotFormatInvalidFormat(Message): + message = "'...'.format(...) has invalid format string: %s" + + def __init__(self, filename, loc, error): + Message.__init__(self, filename, loc) + self.message_args = (error,) + + +class PercentFormatInvalidFormat(Message): + message = "'...' %% ... has invalid format string: %s" + + def __init__(self, filename, loc, error): + Message.__init__(self, filename, loc) + self.message_args = (error,) + + +class PercentFormatMixedPositionalAndNamed(Message): + message = "'...' %% ... has mixed positional and named placeholders" + + +class PercentFormatUnsupportedFormatCharacter(Message): + message = "'...' %% ... has unsupported format character %r" + + def __init__(self, filename, loc, c): + Message.__init__(self, filename, loc) + self.message_args = (c,) + + +class PercentFormatPositionalCountMismatch(Message): + message = "'...' %% ... has %d placeholder(s) but %d substitution(s)" + + def __init__(self, filename, loc, n_placeholders, n_substitutions): + Message.__init__(self, filename, loc) + self.message_args = (n_placeholders, n_substitutions) + + +class PercentFormatExtraNamedArguments(Message): + message = "'...' %% ... has unused named argument(s): %s" + + def __init__(self, filename, loc, extra_keywords): + Message.__init__(self, filename, loc) + self.message_args = (extra_keywords,) + + +class PercentFormatMissingArgument(Message): + message = "'...' %% ... is missing argument(s) for placeholder(s): %s" + + def __init__(self, filename, loc, missing_arguments): + Message.__init__(self, filename, loc) + self.message_args = (missing_arguments,) + + +class PercentFormatExpectedMapping(Message): + message = "'...' %% ... expected mapping but got sequence" + + +class PercentFormatExpectedSequence(Message): + message = "'...' %% ... expected sequence but got mapping" + + +class PercentFormatStarRequiresSequence(Message): + message = "'...' %% ... `*` specifier requires sequence" diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py index 1b10e02..775fdde 100644 --- a/pyflakes/test/test_other.py +++ b/pyflakes/test/test_other.py @@ -1755,6 +1755,159 @@ class TestUnusedAssignment(TestCase): ''') +class TestStringFormatting(TestCase): + + @skipIf(version_info < (3, 6), 'new in Python 3.6') + def test_f_string_without_placeholders(self): + self.flakes("f'foo'", m.FStringMissingPlaceholders) + self.flakes(''' + f"""foo + bar + """ + ''', m.FStringMissingPlaceholders) + self.flakes(''' + print( + f'foo' + f'bar' + ) + ''', m.FStringMissingPlaceholders) + # this is an "escaped placeholder" but not a placeholder + self.flakes("f'{{}}'", m.FStringMissingPlaceholders) + # ok: f-string with placeholders + self.flakes(''' + x = 5 + print(f'{x}') + ''') + # ok: f-string with format specifiers + self.flakes(''' + x = 'a' * 90 + print(f'{x:.8}') + ''') + # ok: f-string with multiple format specifiers + self.flakes(''' + x = y = 5 + print(f'{x:>2} {y:>2}') + ''') + + def test_invalid_dot_format_calls(self): + self.flakes(''' + '{'.format(1) + ''', m.StringDotFormatInvalidFormat) + self.flakes(''' + '{} {1}'.format(1, 2) + ''', m.StringDotFormatMixingAutomatic) + self.flakes(''' + '{0} {}'.format(1, 2) + ''', m.StringDotFormatMixingAutomatic) + self.flakes(''' + '{}'.format(1, 2) + ''', m.StringDotFormatExtraPositionalArguments) + self.flakes(''' + '{}'.format(1, bar=2) + ''', m.StringDotFormatExtraNamedArguments) + self.flakes(''' + '{} {}'.format(1) + ''', m.StringDotFormatMissingArgument) + self.flakes(''' + '{2}'.format() + ''', m.StringDotFormatMissingArgument) + self.flakes(''' + '{bar}'.format() + ''', m.StringDotFormatMissingArgument) + # too much string recursion (placeholder-in-placeholder) + self.flakes(''' + '{:{:{}}}'.format(1, 2, 3) + ''', m.StringDotFormatInvalidFormat) + # ok: dotted / bracketed names need to handle the param differently + self.flakes("'{.__class__}'.format('')") + self.flakes("'{foo[bar]}'.format(foo={'bar': 'barv'})") + # ok: placeholder-placeholders + self.flakes(''' + print('{:{}} {}'.format(1, 15, 2)) + ''') + # ok: not a placeholder-placeholder + self.flakes(''' + print('{:2}'.format(1)) + ''') + # ok: not mixed automatic + self.flakes(''' + '{foo}-{}'.format(1, foo=2) + ''') + # ok: we can't determine statically the format args + self.flakes(''' + a = () + "{}".format(*a) + ''') + self.flakes(''' + k = {} + "{foo}".format(**k) + ''') + + def test_invalid_percent_format_calls(self): + self.flakes(''' + '%(foo)' % {'foo': 'bar'} + ''', m.PercentFormatInvalidFormat) + self.flakes(''' + '%s %(foo)s' % {'foo': 'bar'} + ''', m.PercentFormatMixedPositionalAndNamed) + self.flakes(''' + '%(foo)s %s' % {'foo': 'bar'} + ''', m.PercentFormatMixedPositionalAndNamed) + self.flakes(''' + '%j' % (1,) + ''', m.PercentFormatUnsupportedFormatCharacter) + self.flakes(''' + '%s %s' % (1,) + ''', m.PercentFormatPositionalCountMismatch) + self.flakes(''' + '%s %s' % (1, 2, 3) + ''', m.PercentFormatPositionalCountMismatch) + self.flakes(''' + '%(bar)s' % {} + ''', m.PercentFormatMissingArgument,) + self.flakes(''' + '%(bar)s' % {'bar': 1, 'baz': 2} + ''', m.PercentFormatExtraNamedArguments) + self.flakes(''' + '%(bar)s' % (1, 2, 3) + ''', m.PercentFormatExpectedMapping) + self.flakes(''' + '%s %s' % {'k': 'v'} + ''', m.PercentFormatExpectedSequence) + self.flakes(''' + '%(bar)*s' % {'bar': 'baz'} + ''', m.PercentFormatStarRequiresSequence) + # ok: single %s with mapping + self.flakes(''' + '%s' % {'foo': 'bar', 'baz': 'womp'} + ''') + # ok: does not cause a MemoryError (the strings aren't evaluated) + self.flakes(''' + "%1000000000000f" % 1 + ''') + # ok: %% should not count towards placeholder count + self.flakes(''' + '%% %s %% %s' % (1, 2) + ''') + # ok: * consumes one positional argument + self.flakes(''' + '%.*f' % (2, 1.1234) + '%*.*f' % (5, 2, 3.1234) + ''') + + @skipIf(version_info < (3, 5), 'new in Python 3.5') + def test_ok_percent_format_cannot_determine_element_count(self): + self.flakes(''' + a = [] + '%s %s' % [*a] + '%s %s' % (*a,) + ''') + self.flakes(''' + k = {} + '%(k)s' % {**k} + ''') + + class TestAsyncStatements(TestCase): @skipIf(version_info < (3, 5), 'new in Python 3.5') |