summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Sottile <asottile@umich.edu>2019-07-30 05:20:30 -0700
committerSteven Myint <git@stevenmyint.com>2019-07-30 05:20:30 -0700
commiteeb626328dafd600a30a79c3a37e8b8f4387a187 (patch)
treec813fd39b073dc9abc7e0180482fae4cf160a149
parentc0193b288daf817a113fe4b16fa12d8e6be6fec8 (diff)
downloadpyflakes-eeb626328dafd600a30a79c3a37e8b8f4387a187.tar.gz
String formatting linting (#443)
* Add lint rule for f-strings without placeholders * Add linting for string.format(...) * Add linting for % formatting
-rw-r--r--pyflakes/checker.py347
-rw-r--r--pyflakes/messages.py96
-rw-r--r--pyflakes/test/test_other.py153
3 files changed, 593 insertions, 3 deletions
diff --git a/pyflakes/checker.py b/pyflakes/checker.py
index acc2ae7..f9ae0aa 100644
--- a/pyflakes/checker.py
+++ b/pyflakes/checker.py
@@ -12,6 +12,7 @@ import doctest
import functools
import os
import re
+import string
import sys
import tokenize
@@ -29,6 +30,8 @@ except AttributeError:
builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
+parse_format_string = string.Formatter().parse
+
if PY2:
tokenize_tokenize = tokenize.generate_tokens
else:
@@ -81,6 +84,87 @@ TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)')
TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
+MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
+CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
+WIDTH_RE = re.compile(r'(?:\*|\d*)')
+PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
+LENGTH_RE = re.compile('[hlL]?')
+# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
+VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')
+
+
+def _must_match(regex, string, pos):
+ # type: (Pattern[str], str, int) -> Match[str]
+ match = regex.match(string, pos)
+ assert match is not None
+ return match
+
+
+def parse_percent_format(s): # type: (str) -> Tuple[PercentFormat, ...]
+ """Parses the string component of a `'...' % ...` format call
+
+ Copied from https://github.com/asottile/pyupgrade at v1.20.1
+ """
+
+ def _parse_inner():
+ # type: () -> Generator[PercentFormat, None, None]
+ string_start = 0
+ string_end = 0
+ in_fmt = False
+
+ i = 0
+ while i < len(s):
+ if not in_fmt:
+ try:
+ i = s.index('%', i)
+ except ValueError: # no more % fields!
+ yield s[string_start:], None
+ return
+ else:
+ string_end = i
+ i += 1
+ in_fmt = True
+ else:
+ key_match = MAPPING_KEY_RE.match(s, i)
+ if key_match:
+ key = key_match.group(1) # type: Optional[str]
+ i = key_match.end()
+ else:
+ key = None
+
+ conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
+ conversion_flag = conversion_flag_match.group() or None
+ i = conversion_flag_match.end()
+
+ width_match = _must_match(WIDTH_RE, s, i)
+ width = width_match.group() or None
+ i = width_match.end()
+
+ precision_match = _must_match(PRECISION_RE, s, i)
+ precision = precision_match.group() or None
+ i = precision_match.end()
+
+ # length modifier is ignored
+ i = _must_match(LENGTH_RE, s, i).end()
+
+ try:
+ conversion = s[i]
+ except IndexError:
+ raise ValueError('end-of-string while parsing format')
+ i += 1
+
+ fmt = (key, conversion_flag, width, precision, conversion)
+ yield s[string_start:string_end], fmt
+
+ in_fmt = False
+ string_start = i
+
+ if in_fmt:
+ raise ValueError('end-of-string while parsing format')
+
+ return tuple(_parse_inner())
+
+
class _FieldsOrder(dict):
"""Fix order of AST node fields."""
@@ -1241,10 +1325,250 @@ class Checker(object):
PASS = ignore
# "expr" type nodes
- BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
- CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
+ BOOLOP = UNARYOP = IFEXP = SET = \
+ REPR = ATTRIBUTE = SUBSCRIPT = \
STARRED = NAMECONSTANT = handleChildren
+ def _handle_string_dot_format(self, node):
+ try:
+ placeholders = tuple(parse_format_string(node.func.value.s))
+ except ValueError as e:
+ self.report(messages.StringDotFormatInvalidFormat, node, e)
+ return
+
+ class state: # py2-compatible `nonlocal`
+ auto = None
+ next_auto = 0
+
+ placeholder_positional = set()
+ placeholder_named = set()
+
+ def _add_key(fmtkey):
+ """Returns True if there is an error which should early-exit"""
+ if fmtkey is None: # end of string or `{` / `}` escapes
+ return False
+
+ # attributes / indices are allowed in `.format(...)`
+ fmtkey, _, _ = fmtkey.partition('.')
+ fmtkey, _, _ = fmtkey.partition('[')
+
+ try:
+ fmtkey = int(fmtkey)
+ except ValueError:
+ pass
+ else: # fmtkey was an integer
+ if state.auto is True:
+ self.report(messages.StringDotFormatMixingAutomatic, node)
+ return True
+ else:
+ state.auto = False
+
+ if fmtkey == '':
+ if state.auto is False:
+ self.report(messages.StringDotFormatMixingAutomatic, node)
+ return True
+ else:
+ state.auto = True
+
+ fmtkey = state.next_auto
+ state.next_auto += 1
+
+ if isinstance(fmtkey, int):
+ placeholder_positional.add(fmtkey)
+ else:
+ placeholder_named.add(fmtkey)
+
+ return False
+
+ for _, fmtkey, spec, _ in placeholders:
+ if _add_key(fmtkey):
+ return
+
+ # spec can also contain format specifiers
+ if spec is not None:
+ try:
+ spec_placeholders = tuple(parse_format_string(spec))
+ except ValueError as e:
+ self.report(messages.StringDotFormatInvalidFormat, node, e)
+ return
+
+ for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
+ # can't recurse again
+ if spec_spec is not None and '{' in spec_spec:
+ self.report(
+ messages.StringDotFormatInvalidFormat,
+ node,
+ 'Max string recursion exceeded',
+ )
+ return
+ if _add_key(spec_fmtkey):
+ return
+
+ # bail early if there is *args or **kwargs
+ if (
+ # python 2.x *args / **kwargs
+ getattr(node, 'starargs', None) or
+ getattr(node, 'kwargs', None) or
+ # python 3.x *args
+ any(
+ isinstance(arg, getattr(ast, 'Starred', ()))
+ for arg in node.args
+ ) or
+ # python 3.x **kwargs
+ any(kwd.arg is None for kwd in node.keywords)
+ ):
+ return
+
+ substitution_positional = set(range(len(node.args)))
+ substitution_named = {kwd.arg for kwd in node.keywords}
+
+ extra_positional = substitution_positional - placeholder_positional
+ extra_named = substitution_named - placeholder_named
+
+ missing_arguments = (
+ (placeholder_positional | placeholder_named) -
+ (substitution_positional | substitution_named)
+ )
+
+ if extra_positional:
+ self.report(
+ messages.StringDotFormatExtraPositionalArguments,
+ node,
+ ', '.join(sorted(str(x) for x in extra_positional)),
+ )
+ if extra_named:
+ self.report(
+ messages.StringDotFormatExtraNamedArguments,
+ node,
+ ', '.join(sorted(extra_named)),
+ )
+ if missing_arguments:
+ self.report(
+ messages.StringDotFormatMissingArgument,
+ node,
+ ', '.join(sorted(str(x) for x in missing_arguments)),
+ )
+
+ def CALL(self, node):
+ if (
+ isinstance(node.func, ast.Attribute) and
+ isinstance(node.func.value, ast.Str) and
+ node.func.attr == 'format'
+ ):
+ self._handle_string_dot_format(node)
+ self.handleChildren(node)
+
+ def _handle_percent_format(self, node):
+ try:
+ placeholders = parse_percent_format(node.left.s)
+ except ValueError:
+ self.report(
+ messages.PercentFormatInvalidFormat,
+ node,
+ 'incomplete format',
+ )
+ return
+
+ named = set()
+ positional_count = 0
+ positional = None
+ for _, placeholder in placeholders:
+ if placeholder is None:
+ continue
+ name, _, width, precision, conversion = placeholder
+
+ if conversion == '%':
+ continue
+
+ if conversion not in VALID_CONVERSIONS:
+ self.report(
+ messages.PercentFormatUnsupportedFormatCharacter,
+ node,
+ conversion,
+ )
+
+ if positional is None and conversion:
+ positional = name is None
+
+ for part in (width, precision):
+ if part is not None and '*' in part:
+ if not positional:
+ self.report(
+ messages.PercentFormatStarRequiresSequence,
+ node,
+ )
+ else:
+ positional_count += 1
+
+ if positional and name is not None:
+ self.report(
+ messages.PercentFormatMixedPositionalAndNamed,
+ node,
+ )
+ return
+ elif not positional and name is None:
+ self.report(
+ messages.PercentFormatMixedPositionalAndNamed,
+ node,
+ )
+ return
+
+ if positional:
+ positional_count += 1
+ else:
+ named.add(name)
+
+ if (
+ isinstance(node.right, (ast.List, ast.Tuple)) and
+ # does not have any *splats (py35+ feature)
+ not any(
+ isinstance(elt, getattr(ast, 'Starred', ()))
+ for elt in node.right.elts
+ )
+ ):
+ substitution_count = len(node.right.elts)
+ if positional and positional_count != substitution_count:
+ self.report(
+ messages.PercentFormatPositionalCountMismatch,
+ node,
+ positional_count,
+ substitution_count,
+ )
+ elif not positional:
+ self.report(messages.PercentFormatExpectedMapping, node)
+
+ if (
+ isinstance(node.right, ast.Dict) and
+ all(isinstance(k, ast.Str) for k in node.right.keys)
+ ):
+ if positional and positional_count > 1:
+ self.report(messages.PercentFormatExpectedSequence, node)
+ return
+
+ substitution_keys = {k.s for k in node.right.keys}
+ extra_keys = substitution_keys - named
+ missing_keys = named - substitution_keys
+ if not positional and extra_keys:
+ self.report(
+ messages.PercentFormatExtraNamedArguments,
+ node,
+ ', '.join(sorted(extra_keys)),
+ )
+ if not positional and missing_keys:
+ self.report(
+ messages.PercentFormatMissingArgument,
+ node,
+ ', '.join(sorted(missing_keys)),
+ )
+
+ def BINOP(self, node):
+ if (
+ isinstance(node.op, ast.Mod) and
+ isinstance(node.left, ast.Str)
+ ):
+ self._handle_percent_format(node)
+ self.handleChildren(node)
+
NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore
# "slice" type nodes
@@ -1273,7 +1597,24 @@ class Checker(object):
self.report(messages.RaiseNotImplemented, node)
# additional node types
- COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
+ COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren
+
+ _in_fstring = False
+
+ def JOINEDSTR(self, node):
+ if (
+ # the conversion / etc. flags are parsed as f-strings without
+ # placeholders
+ not self._in_fstring and
+ not any(isinstance(x, ast.FormattedValue) for x in node.values)
+ ):
+ self.report(messages.FStringMissingPlaceholders, node)
+
+ self._in_fstring, orig = True, self._in_fstring
+ try:
+ self.handleChildren(node)
+ finally:
+ self._in_fstring = orig
def DICT(self, node):
# Complain if there are duplicate keys with different values
diff --git a/pyflakes/messages.py b/pyflakes/messages.py
index 8ae545f..cf67cf8 100644
--- a/pyflakes/messages.py
+++ b/pyflakes/messages.py
@@ -266,3 +266,99 @@ class InvalidPrintSyntax(Message):
class IsLiteral(Message):
message = 'use ==/!= to compare str, bytes, and int literals'
+
+
+class FStringMissingPlaceholders(Message):
+ message = 'f-string is missing placeholders'
+
+
+class StringDotFormatExtraPositionalArguments(Message):
+ message = "'...'.format(...) has unused arguments at position(s): %s"
+
+ def __init__(self, filename, loc, extra_positions):
+ Message.__init__(self, filename, loc)
+ self.message_args = (extra_positions,)
+
+
+class StringDotFormatExtraNamedArguments(Message):
+ message = "'...'.format(...) has unused named argument(s): %s"
+
+ def __init__(self, filename, loc, extra_keywords):
+ Message.__init__(self, filename, loc)
+ self.message_args = (extra_keywords,)
+
+
+class StringDotFormatMissingArgument(Message):
+ message = "'...'.format(...) is missing argument(s) for placeholder(s): %s"
+
+ def __init__(self, filename, loc, missing_arguments):
+ Message.__init__(self, filename, loc)
+ self.message_args = (missing_arguments,)
+
+
+class StringDotFormatMixingAutomatic(Message):
+ message = "'...'.format(...) mixes automatic and manual numbering"
+
+
+class StringDotFormatInvalidFormat(Message):
+ message = "'...'.format(...) has invalid format string: %s"
+
+ def __init__(self, filename, loc, error):
+ Message.__init__(self, filename, loc)
+ self.message_args = (error,)
+
+
+class PercentFormatInvalidFormat(Message):
+ message = "'...' %% ... has invalid format string: %s"
+
+ def __init__(self, filename, loc, error):
+ Message.__init__(self, filename, loc)
+ self.message_args = (error,)
+
+
+class PercentFormatMixedPositionalAndNamed(Message):
+ message = "'...' %% ... has mixed positional and named placeholders"
+
+
+class PercentFormatUnsupportedFormatCharacter(Message):
+ message = "'...' %% ... has unsupported format character %r"
+
+ def __init__(self, filename, loc, c):
+ Message.__init__(self, filename, loc)
+ self.message_args = (c,)
+
+
+class PercentFormatPositionalCountMismatch(Message):
+ message = "'...' %% ... has %d placeholder(s) but %d substitution(s)"
+
+ def __init__(self, filename, loc, n_placeholders, n_substitutions):
+ Message.__init__(self, filename, loc)
+ self.message_args = (n_placeholders, n_substitutions)
+
+
+class PercentFormatExtraNamedArguments(Message):
+ message = "'...' %% ... has unused named argument(s): %s"
+
+ def __init__(self, filename, loc, extra_keywords):
+ Message.__init__(self, filename, loc)
+ self.message_args = (extra_keywords,)
+
+
+class PercentFormatMissingArgument(Message):
+ message = "'...' %% ... is missing argument(s) for placeholder(s): %s"
+
+ def __init__(self, filename, loc, missing_arguments):
+ Message.__init__(self, filename, loc)
+ self.message_args = (missing_arguments,)
+
+
+class PercentFormatExpectedMapping(Message):
+ message = "'...' %% ... expected mapping but got sequence"
+
+
+class PercentFormatExpectedSequence(Message):
+ message = "'...' %% ... expected sequence but got mapping"
+
+
+class PercentFormatStarRequiresSequence(Message):
+ message = "'...' %% ... `*` specifier requires sequence"
diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py
index 1b10e02..775fdde 100644
--- a/pyflakes/test/test_other.py
+++ b/pyflakes/test/test_other.py
@@ -1755,6 +1755,159 @@ class TestUnusedAssignment(TestCase):
''')
+class TestStringFormatting(TestCase):
+
+ @skipIf(version_info < (3, 6), 'new in Python 3.6')
+ def test_f_string_without_placeholders(self):
+ self.flakes("f'foo'", m.FStringMissingPlaceholders)
+ self.flakes('''
+ f"""foo
+ bar
+ """
+ ''', m.FStringMissingPlaceholders)
+ self.flakes('''
+ print(
+ f'foo'
+ f'bar'
+ )
+ ''', m.FStringMissingPlaceholders)
+ # this is an "escaped placeholder" but not a placeholder
+ self.flakes("f'{{}}'", m.FStringMissingPlaceholders)
+ # ok: f-string with placeholders
+ self.flakes('''
+ x = 5
+ print(f'{x}')
+ ''')
+ # ok: f-string with format specifiers
+ self.flakes('''
+ x = 'a' * 90
+ print(f'{x:.8}')
+ ''')
+ # ok: f-string with multiple format specifiers
+ self.flakes('''
+ x = y = 5
+ print(f'{x:>2} {y:>2}')
+ ''')
+
+ def test_invalid_dot_format_calls(self):
+ self.flakes('''
+ '{'.format(1)
+ ''', m.StringDotFormatInvalidFormat)
+ self.flakes('''
+ '{} {1}'.format(1, 2)
+ ''', m.StringDotFormatMixingAutomatic)
+ self.flakes('''
+ '{0} {}'.format(1, 2)
+ ''', m.StringDotFormatMixingAutomatic)
+ self.flakes('''
+ '{}'.format(1, 2)
+ ''', m.StringDotFormatExtraPositionalArguments)
+ self.flakes('''
+ '{}'.format(1, bar=2)
+ ''', m.StringDotFormatExtraNamedArguments)
+ self.flakes('''
+ '{} {}'.format(1)
+ ''', m.StringDotFormatMissingArgument)
+ self.flakes('''
+ '{2}'.format()
+ ''', m.StringDotFormatMissingArgument)
+ self.flakes('''
+ '{bar}'.format()
+ ''', m.StringDotFormatMissingArgument)
+ # too much string recursion (placeholder-in-placeholder)
+ self.flakes('''
+ '{:{:{}}}'.format(1, 2, 3)
+ ''', m.StringDotFormatInvalidFormat)
+ # ok: dotted / bracketed names need to handle the param differently
+ self.flakes("'{.__class__}'.format('')")
+ self.flakes("'{foo[bar]}'.format(foo={'bar': 'barv'})")
+ # ok: placeholder-placeholders
+ self.flakes('''
+ print('{:{}} {}'.format(1, 15, 2))
+ ''')
+ # ok: not a placeholder-placeholder
+ self.flakes('''
+ print('{:2}'.format(1))
+ ''')
+ # ok: not mixed automatic
+ self.flakes('''
+ '{foo}-{}'.format(1, foo=2)
+ ''')
+ # ok: we can't determine statically the format args
+ self.flakes('''
+ a = ()
+ "{}".format(*a)
+ ''')
+ self.flakes('''
+ k = {}
+ "{foo}".format(**k)
+ ''')
+
+ def test_invalid_percent_format_calls(self):
+ self.flakes('''
+ '%(foo)' % {'foo': 'bar'}
+ ''', m.PercentFormatInvalidFormat)
+ self.flakes('''
+ '%s %(foo)s' % {'foo': 'bar'}
+ ''', m.PercentFormatMixedPositionalAndNamed)
+ self.flakes('''
+ '%(foo)s %s' % {'foo': 'bar'}
+ ''', m.PercentFormatMixedPositionalAndNamed)
+ self.flakes('''
+ '%j' % (1,)
+ ''', m.PercentFormatUnsupportedFormatCharacter)
+ self.flakes('''
+ '%s %s' % (1,)
+ ''', m.PercentFormatPositionalCountMismatch)
+ self.flakes('''
+ '%s %s' % (1, 2, 3)
+ ''', m.PercentFormatPositionalCountMismatch)
+ self.flakes('''
+ '%(bar)s' % {}
+ ''', m.PercentFormatMissingArgument,)
+ self.flakes('''
+ '%(bar)s' % {'bar': 1, 'baz': 2}
+ ''', m.PercentFormatExtraNamedArguments)
+ self.flakes('''
+ '%(bar)s' % (1, 2, 3)
+ ''', m.PercentFormatExpectedMapping)
+ self.flakes('''
+ '%s %s' % {'k': 'v'}
+ ''', m.PercentFormatExpectedSequence)
+ self.flakes('''
+ '%(bar)*s' % {'bar': 'baz'}
+ ''', m.PercentFormatStarRequiresSequence)
+ # ok: single %s with mapping
+ self.flakes('''
+ '%s' % {'foo': 'bar', 'baz': 'womp'}
+ ''')
+ # ok: does not cause a MemoryError (the strings aren't evaluated)
+ self.flakes('''
+ "%1000000000000f" % 1
+ ''')
+ # ok: %% should not count towards placeholder count
+ self.flakes('''
+ '%% %s %% %s' % (1, 2)
+ ''')
+ # ok: * consumes one positional argument
+ self.flakes('''
+ '%.*f' % (2, 1.1234)
+ '%*.*f' % (5, 2, 3.1234)
+ ''')
+
+ @skipIf(version_info < (3, 5), 'new in Python 3.5')
+ def test_ok_percent_format_cannot_determine_element_count(self):
+ self.flakes('''
+ a = []
+ '%s %s' % [*a]
+ '%s %s' % (*a,)
+ ''')
+ self.flakes('''
+ k = {}
+ '%(k)s' % {**k}
+ ''')
+
+
class TestAsyncStatements(TestCase):
@skipIf(version_info < (3, 5), 'new in Python 3.5')