String formatting linting (#443)

* Add lint rule for f-strings without placeholders * Add linting for string.format(...) * Add linting for % formatting
author: Anthony Sottile <asottile@umich.edu> 2019-07-30 05:20:30 -0700
committer: Steven Myint <git@stevenmyint.com> 2019-07-30 05:20:30 -0700
commit: eeb626328dafd600a30a79c3a37e8b8f4387a187 (patch)
tree: c813fd39b073dc9abc7e0180482fae4cf160a149
parent: c0193b288daf817a113fe4b16fa12d8e6be6fec8 (diff)
download: pyflakes-eeb626328dafd600a30a79c3a37e8b8f4387a187.tar.gz
3 files changed, 593 insertions, 3 deletions
diff --git a/pyflakes/checker.py b/pyflakes/checker.py
index acc2ae7..f9ae0aa 100644
--- a/pyflakes/checker.py
+++ b/pyflakes/checker.py
@@ -12,6 +12,7 @@ import doctest
 import functools
 import os
 import re
+import string
 import sys
 import tokenize
 
@@ -29,6 +30,8 @@ except AttributeError:
 
 builtin_vars = dir(__import__('__builtin__' if PY2 else 'builtins'))
 
+parse_format_string = string.Formatter().parse
+
 if PY2:
     tokenize_tokenize = tokenize.generate_tokens
 else:
@@ -81,6 +84,87 @@ TYPE_IGNORE_RE = re.compile(TYPE_COMMENT_RE.pattern + r'ignore\s*(#|$)')
 TYPE_FUNC_RE = re.compile(r'^(\(.*?\))\s*->\s*(.*)$')
 
 
+MAPPING_KEY_RE = re.compile(r'\(([^()]*)\)')
+CONVERSION_FLAG_RE = re.compile('[#0+ -]*')
+WIDTH_RE = re.compile(r'(?:\*|\d*)')
+PRECISION_RE = re.compile(r'(?:\.(?:\*|\d*))?')
+LENGTH_RE = re.compile('[hlL]?')
+# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
+VALID_CONVERSIONS = frozenset('diouxXeEfFgGcrsa%')
+
+
+def _must_match(regex, string, pos):
+    # type: (Pattern[str], str, int) -> Match[str]
+    match = regex.match(string, pos)
+    assert match is not None
+    return match
+
+
+def parse_percent_format(s):  # type: (str) -> Tuple[PercentFormat, ...]
+    """Parses the string component of a `'...' % ...` format call
+
+    Copied from https://github.com/asottile/pyupgrade at v1.20.1
+    """
+
+    def _parse_inner():
+        # type: () -> Generator[PercentFormat, None, None]
+        string_start = 0
+        string_end = 0
+        in_fmt = False
+
+        i = 0
+        while i < len(s):
+            if not in_fmt:
+                try:
+                    i = s.index('%', i)
+                except ValueError:  # no more % fields!
+                    yield s[string_start:], None
+                    return
+                else:
+                    string_end = i
+                    i += 1
+                    in_fmt = True
+            else:
+                key_match = MAPPING_KEY_RE.match(s, i)
+                if key_match:
+                    key = key_match.group(1)  # type: Optional[str]
+                    i = key_match.end()
+                else:
+                    key = None
+
+                conversion_flag_match = _must_match(CONVERSION_FLAG_RE, s, i)
+                conversion_flag = conversion_flag_match.group() or None
+                i = conversion_flag_match.end()
+
+                width_match = _must_match(WIDTH_RE, s, i)
+                width = width_match.group() or None
+                i = width_match.end()
+
+                precision_match = _must_match(PRECISION_RE, s, i)
+                precision = precision_match.group() or None
+                i = precision_match.end()
+
+                # length modifier is ignored
+                i = _must_match(LENGTH_RE, s, i).end()
+
+                try:
+                    conversion = s[i]
+                except IndexError:
+                    raise ValueError('end-of-string while parsing format')
+                i += 1
+
+                fmt = (key, conversion_flag, width, precision, conversion)
+                yield s[string_start:string_end], fmt
+
+                in_fmt = False
+                string_start = i
+
+        if in_fmt:
+            raise ValueError('end-of-string while parsing format')
+
+    return tuple(_parse_inner())
+
+
 class _FieldsOrder(dict):
     """Fix order of AST node fields."""
 
@@ -1241,10 +1325,250 @@ class Checker(object):
     PASS = ignore
 
     # "expr" type nodes
-    BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
-        CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
+    BOOLOP = UNARYOP = IFEXP = SET = \
+        REPR = ATTRIBUTE = SUBSCRIPT = \
         STARRED = NAMECONSTANT = handleChildren
 
+    def _handle_string_dot_format(self, node):
+        try:
+            placeholders = tuple(parse_format_string(node.func.value.s))
+        except ValueError as e:
+            self.report(messages.StringDotFormatInvalidFormat, node, e)
+            return
+
+        class state:  # py2-compatible `nonlocal`
+            auto = None
+            next_auto = 0
+
+        placeholder_positional = set()
+        placeholder_named = set()
+
+        def _add_key(fmtkey):
+            """Returns True if there is an error which should early-exit"""
+            if fmtkey is None:  # end of string or `{` / `}` escapes
+                return False
+
+            # attributes / indices are allowed in `.format(...)`
+            fmtkey, _, _ = fmtkey.partition('.')
+            fmtkey, _, _ = fmtkey.partition('[')
+
+            try:
+                fmtkey = int(fmtkey)
+            except ValueError:
+                pass
+            else:  # fmtkey was an integer
+                if state.auto is True:
+                    self.report(messages.StringDotFormatMixingAutomatic, node)
+                    return True
+                else:
+                    state.auto = False
+
+            if fmtkey == '':
+                if state.auto is False:
+                    self.report(messages.StringDotFormatMixingAutomatic, node)
+                    return True
+                else:
+                    state.auto = True
+
+                fmtkey = state.next_auto
+                state.next_auto += 1
+
+            if isinstance(fmtkey, int):
+                placeholder_positional.add(fmtkey)
+            else:
+                placeholder_named.add(fmtkey)
+
+            return False
+
+        for _, fmtkey, spec, _ in placeholders:
+            if _add_key(fmtkey):
+                return
+
+            # spec can also contain format specifiers
+            if spec is not None:
+                try:
+                    spec_placeholders = tuple(parse_format_string(spec))
+                except ValueError as e:
+                    self.report(messages.StringDotFormatInvalidFormat, node, e)
+                    return
+
+                for _, spec_fmtkey, spec_spec, _ in spec_placeholders:
+                    # can't recurse again
+                    if spec_spec is not None and '{' in spec_spec:
+                        self.report(
+                            messages.StringDotFormatInvalidFormat,
+                            node,
+                            'Max string recursion exceeded',
+                        )
+                        return
+                    if _add_key(spec_fmtkey):
+                        return
+
+        # bail early if there is *args or **kwargs
+        if (
+                # python 2.x *args / **kwargs
+                getattr(node, 'starargs', None) or
+                getattr(node, 'kwargs', None) or
+                # python 3.x *args
+                any(
+                    isinstance(arg, getattr(ast, 'Starred', ()))
+                    for arg in node.args
+                ) or
+                # python 3.x **kwargs
+                any(kwd.arg is None for kwd in node.keywords)
+        ):
+            return
+
+        substitution_positional = set(range(len(node.args)))
+        substitution_named = {kwd.arg for kwd in node.keywords}
+
+        extra_positional = substitution_positional - placeholder_positional
+        extra_named = substitution_named - placeholder_named
+
+        missing_arguments = (
+            (placeholder_positional | placeholder_named) -
+            (substitution_positional | substitution_named)
+        )
+
+        if extra_positional:
+            self.report(
+                messages.StringDotFormatExtraPositionalArguments,
+                node,
+                ', '.join(sorted(str(x) for x in extra_positional)),
+            )
+        if extra_named:
+            self.report(
+                messages.StringDotFormatExtraNamedArguments,
+                node,
+                ', '.join(sorted(extra_named)),
+            )
+        if missing_arguments:
+            self.report(
+                messages.StringDotFormatMissingArgument,
+                node,
+                ', '.join(sorted(str(x) for x in missing_arguments)),
+            )
+
+    def CALL(self, node):
+        if (
+                isinstance(node.func, ast.Attribute) and
+                isinstance(node.func.value, ast.Str) and
+                node.func.attr == 'format'
+        ):
+            self._handle_string_dot_format(node)
+        self.handleChildren(node)
+
+    def _handle_percent_format(self, node):
+        try:
+            placeholders = parse_percent_format(node.left.s)
+        except ValueError:
+            self.report(
+                messages.PercentFormatInvalidFormat,
+                node,
+                'incomplete format',
+            )
+            return
+
+        named = set()
+        positional_count = 0
+        positional = None
+        for _, placeholder in placeholders:
+            if placeholder is None:
+                continue
+            name, _, width, precision, conversion = placeholder
+
+            if conversion == '%':
+                continue
+
+            if conversion not in VALID_CONVERSIONS:
+                self.report(
+                    messages.PercentFormatUnsupportedFormatCharacter,
+                    node,
+                    conversion,
+                )
+
+            if positional is None and conversion:
+                positional = name is None
+
+            for part in (width, precision):
+                if part is not None and '*' in part:
+                    if not positional:
+                        self.report(
+                            messages.PercentFormatStarRequiresSequence,
+                            node,
+                        )
+                    else:
+                        positional_count += 1
+
+            if positional and name is not None:
+                self.report(
+                    messages.PercentFormatMixedPositionalAndNamed,
+                    node,
+                )
+                return
+            elif not positional and name is None:
+                self.report(
+                    messages.PercentFormatMixedPositionalAndNamed,
+                    node,
+                )
+                return
+
+            if positional:
+                positional_count += 1
+            else:
+                named.add(name)
+
+        if (
+                isinstance(node.right, (ast.List, ast.Tuple)) and
+                # does not have any *splats (py35+ feature)
+                not any(
+                    isinstance(elt, getattr(ast, 'Starred', ()))
+                    for elt in node.right.elts
+                )
+        ):
+            substitution_count = len(node.right.elts)
+            if positional and positional_count != substitution_count:
+                self.report(
+                    messages.PercentFormatPositionalCountMismatch,
+                    node,
+                    positional_count,
+                    substitution_count,
+                )
+            elif not positional:
+                self.report(messages.PercentFormatExpectedMapping, node)
+
+        if (
+                isinstance(node.right, ast.Dict) and
+                all(isinstance(k, ast.Str) for k in node.right.keys)
+        ):
+            if positional and positional_count > 1:
+                self.report(messages.PercentFormatExpectedSequence, node)
+                return
+
+            substitution_keys = {k.s for k in node.right.keys}
+            extra_keys = substitution_keys - named
+            missing_keys = named - substitution_keys
+            if not positional and extra_keys:
+                self.report(
+                    messages.PercentFormatExtraNamedArguments,
+                    node,
+                    ', '.join(sorted(extra_keys)),
+                )
+            if not positional and missing_keys:
+                self.report(
+                    messages.PercentFormatMissingArgument,
+                    node,
+                    ', '.join(sorted(missing_keys)),
+                )
+
+    def BINOP(self, node):
+        if (
+                isinstance(node.op, ast.Mod) and
+                isinstance(node.left, ast.Str)
+        ):
+            self._handle_percent_format(node)
+        self.handleChildren(node)
+
     NUM = STR = BYTES = ELLIPSIS = CONSTANT = ignore
 
     # "slice" type nodes
@@ -1273,7 +1597,24 @@ class Checker(object):
             self.report(messages.RaiseNotImplemented, node)
 
     # additional node types
-    COMPREHENSION = KEYWORD = FORMATTEDVALUE = JOINEDSTR = handleChildren
+    COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren
+
+    _in_fstring = False
+
+    def JOINEDSTR(self, node):
+        if (
+                # the conversion / etc. flags are parsed as f-strings without
+                # placeholders
+                not self._in_fstring and
+                not any(isinstance(x, ast.FormattedValue) for x in node.values)
+        ):
+            self.report(messages.FStringMissingPlaceholders, node)
+
+        self._in_fstring, orig = True, self._in_fstring
+        try:
+            self.handleChildren(node)
+        finally:
+            self._in_fstring = orig
 
     def DICT(self, node):
         # Complain if there are duplicate keys with different values
diff --git a/pyflakes/messages.py b/pyflakes/messages.py
index 8ae545f..cf67cf8 100644
--- a/pyflakes/messages.py
+++ b/pyflakes/messages.py
@@ -266,3 +266,99 @@ class InvalidPrintSyntax(Message):
 
 class IsLiteral(Message):
     message = 'use ==/!= to compare str, bytes, and int literals'
+
+
+class FStringMissingPlaceholders(Message):
+    message = 'f-string is missing placeholders'
+
+
+class StringDotFormatExtraPositionalArguments(Message):
+    message = "'...'.format(...) has unused arguments at position(s): %s"
+
+    def __init__(self, filename, loc, extra_positions):
+        Message.__init__(self, filename, loc)
+        self.message_args = (extra_positions,)
+
+
+class StringDotFormatExtraNamedArguments(Message):
+    message = "'...'.format(...) has unused named argument(s): %s"
+
+    def __init__(self, filename, loc, extra_keywords):
+        Message.__init__(self, filename, loc)
+        self.message_args = (extra_keywords,)
+
+
+class StringDotFormatMissingArgument(Message):
+    message = "'...'.format(...) is missing argument(s) for placeholder(s): %s"
+
+    def __init__(self, filename, loc, missing_arguments):
+        Message.__init__(self, filename, loc)
+        self.message_args = (missing_arguments,)
+
+
+class StringDotFormatMixingAutomatic(Message):
+    message = "'...'.format(...) mixes automatic and manual numbering"
+
+
+class StringDotFormatInvalidFormat(Message):
+    message = "'...'.format(...) has invalid format string: %s"
+
+    def __init__(self, filename, loc, error):
+        Message.__init__(self, filename, loc)
+        self.message_args = (error,)
+
+
+class PercentFormatInvalidFormat(Message):
+    message = "'...' %% ... has invalid format string: %s"
+
+    def __init__(self, filename, loc, error):
+        Message.__init__(self, filename, loc)
+        self.message_args = (error,)
+
+
+class PercentFormatMixedPositionalAndNamed(Message):
+    message = "'...' %% ... has mixed positional and named placeholders"
+
+
+class PercentFormatUnsupportedFormatCharacter(Message):
+    message = "'...' %% ... has unsupported format character %r"
+
+    def __init__(self, filename, loc, c):
+        Message.__init__(self, filename, loc)
+        self.message_args = (c,)
+
+
+class PercentFormatPositionalCountMismatch(Message):
+    message = "'...' %% ... has %d placeholder(s) but %d substitution(s)"
+
+    def __init__(self, filename, loc, n_placeholders, n_substitutions):
+        Message.__init__(self, filename, loc)
+        self.message_args = (n_placeholders, n_substitutions)
+
+
+class PercentFormatExtraNamedArguments(Message):
+    message = "'...' %% ... has unused named argument(s): %s"
+
+    def __init__(self, filename, loc, extra_keywords):
+        Message.__init__(self, filename, loc)
+        self.message_args = (extra_keywords,)
+
+
+class PercentFormatMissingArgument(Message):
+    message = "'...' %% ... is missing argument(s) for placeholder(s): %s"
+
+    def __init__(self, filename, loc, missing_arguments):
+        Message.__init__(self, filename, loc)
+        self.message_args = (missing_arguments,)
+
+
+class PercentFormatExpectedMapping(Message):
+    message = "'...' %% ... expected mapping but got sequence"
+
+
+class PercentFormatExpectedSequence(Message):
+    message = "'...' %% ... expected sequence but got mapping"
+
+
+class PercentFormatStarRequiresSequence(Message):
+    message = "'...' %% ... `*` specifier requires sequence"
diff --git a/pyflakes/test/test_other.py b/pyflakes/test/test_other.py
index 1b10e02..775fdde 100644
--- a/pyflakes/test/test_other.py
+++ b/pyflakes/test/test_other.py
@@ -1755,6 +1755,159 @@ class TestUnusedAssignment(TestCase):
         ''')
 
 
+class TestStringFormatting(TestCase):
+
+    @skipIf(version_info < (3, 6), 'new in Python 3.6')
+    def test_f_string_without_placeholders(self):
+        self.flakes("f'foo'", m.FStringMissingPlaceholders)
+        self.flakes('''
+            f"""foo
+            bar
+            """
+        ''', m.FStringMissingPlaceholders)
+        self.flakes('''
+            print(
+                f'foo'
+                f'bar'
+            )
+        ''', m.FStringMissingPlaceholders)
+        # this is an "escaped placeholder" but not a placeholder
+        self.flakes("f'{{}}'", m.FStringMissingPlaceholders)
+        # ok: f-string with placeholders
+        self.flakes('''
+            x = 5
+            print(f'{x}')
+        ''')
+        # ok: f-string with format specifiers
+        self.flakes('''
+            x = 'a' * 90
+            print(f'{x:.8}')
+        ''')
+        # ok: f-string with multiple format specifiers
+        self.flakes('''
+            x = y = 5
+            print(f'{x:>2} {y:>2}')
+        ''')
+
+    def test_invalid_dot_format_calls(self):
+        self.flakes('''
+            '{'.format(1)
+        ''', m.StringDotFormatInvalidFormat)
+        self.flakes('''
+            '{} {1}'.format(1, 2)
+        ''', m.StringDotFormatMixingAutomatic)
+        self.flakes('''
+            '{0} {}'.format(1, 2)
+        ''', m.StringDotFormatMixingAutomatic)
+        self.flakes('''
+            '{}'.format(1, 2)
+        ''', m.StringDotFormatExtraPositionalArguments)
+        self.flakes('''
+            '{}'.format(1, bar=2)
+        ''', m.StringDotFormatExtraNamedArguments)
+        self.flakes('''
+            '{} {}'.format(1)
+        ''', m.StringDotFormatMissingArgument)
+        self.flakes('''
+            '{2}'.format()
+        ''', m.StringDotFormatMissingArgument)
+        self.flakes('''
+            '{bar}'.format()
+        ''', m.StringDotFormatMissingArgument)
+        # too much string recursion (placeholder-in-placeholder)
+        self.flakes('''
+            '{:{:{}}}'.format(1, 2, 3)
+        ''', m.StringDotFormatInvalidFormat)
+        # ok: dotted / bracketed names need to handle the param differently
+        self.flakes("'{.__class__}'.format('')")
+        self.flakes("'{foo[bar]}'.format(foo={'bar': 'barv'})")
+        # ok: placeholder-placeholders
+        self.flakes('''
+            print('{:{}} {}'.format(1, 15, 2))
+        ''')
+        # ok: not a placeholder-placeholder
+        self.flakes('''
+            print('{:2}'.format(1))
+        ''')
+        # ok: not mixed automatic
+        self.flakes('''
+            '{foo}-{}'.format(1, foo=2)
+        ''')
+        # ok: we can't determine statically the format args
+        self.flakes('''
+            a = ()
+            "{}".format(*a)
+        ''')
+        self.flakes('''
+            k = {}
+            "{foo}".format(**k)
+        ''')
+
+    def test_invalid_percent_format_calls(self):
+        self.flakes('''
+            '%(foo)' % {'foo': 'bar'}
+        ''', m.PercentFormatInvalidFormat)
+        self.flakes('''
+            '%s %(foo)s' % {'foo': 'bar'}
+        ''', m.PercentFormatMixedPositionalAndNamed)
+        self.flakes('''
+            '%(foo)s %s' % {'foo': 'bar'}
+        ''', m.PercentFormatMixedPositionalAndNamed)
+        self.flakes('''
+            '%j' % (1,)
+        ''', m.PercentFormatUnsupportedFormatCharacter)
+        self.flakes('''
+            '%s %s' % (1,)
+        ''', m.PercentFormatPositionalCountMismatch)
+        self.flakes('''
+            '%s %s' % (1, 2, 3)
+        ''', m.PercentFormatPositionalCountMismatch)
+        self.flakes('''
+            '%(bar)s' % {}
+        ''', m.PercentFormatMissingArgument,)
+        self.flakes('''
+            '%(bar)s' % {'bar': 1, 'baz': 2}
+        ''', m.PercentFormatExtraNamedArguments)
+        self.flakes('''
+            '%(bar)s' % (1, 2, 3)
+        ''', m.PercentFormatExpectedMapping)
+        self.flakes('''
+            '%s %s' % {'k': 'v'}
+        ''', m.PercentFormatExpectedSequence)
+        self.flakes('''
+            '%(bar)*s' % {'bar': 'baz'}
+        ''', m.PercentFormatStarRequiresSequence)
+        # ok: single %s with mapping
+        self.flakes('''
+            '%s' % {'foo': 'bar', 'baz': 'womp'}
+        ''')
+        # ok: does not cause a MemoryError (the strings aren't evaluated)
+        self.flakes('''
+            "%1000000000000f" % 1
+        ''')
+        # ok: %% should not count towards placeholder count
+        self.flakes('''
+            '%% %s %% %s' % (1, 2)
+        ''')
+        # ok: * consumes one positional argument
+        self.flakes('''
+            '%.*f' % (2, 1.1234)
+            '%*.*f' % (5, 2, 3.1234)
+        ''')
+
+    @skipIf(version_info < (3, 5), 'new in Python 3.5')
+    def test_ok_percent_format_cannot_determine_element_count(self):
+        self.flakes('''
+            a = []
+            '%s %s' % [*a]
+            '%s %s' % (*a,)
+        ''')
+        self.flakes('''
+            k = {}
+            '%(k)s' % {**k}
+        ''')
+
+
 class TestAsyncStatements(TestCase):
 
     @skipIf(version_info < (3, 5), 'new in Python 3.5')
author	Anthony Sottile <asottile@umich.edu>	2019-07-30 05:20:30 -0700
committer	Steven Myint <git@stevenmyint.com>	2019-07-30 05:20:30 -0700
commit	eeb626328dafd600a30a79c3a37e8b8f4387a187 (patch)
tree	c813fd39b073dc9abc7e0180482fae4cf160a149
parent	c0193b288daf817a113fe4b16fa12d8e6be6fec8 (diff)
download	pyflakes-eeb626328dafd600a30a79c3a37e8b8f4387a187.tar.gz