summaryrefslogtreecommitdiff
path: root/checkers/string_format.py
blob: c00ca4cdf16ccc9614b931a4622447ae513f9bae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard
# Copyright (c) 2004-2010 LOGILAB S.A. (Paris, FRANCE).
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


"""Checker for string formatting operations.
"""

import string
from logilab import astng
from pylint.interfaces import IASTNGChecker
from pylint.checkers import BaseChecker
from logilab.common.compat import set

MSGS = {
    'E9900': ("Unsupported format character %r (%#02x) at index %d",
              "Used when a unsupported format character is used in a format\
              string."),
    'E9901': ("Format string ends in middle of conversion specifier",
              "Used when a format string terminates before the end of a \
              conversion specifier."),
    'E9902': ("Mixing named and unnamed conversion specifiers in format string",
              "Used when a format string contains both named (e.g. '%(foo)d') \
              and unnamed (e.g. '%d') conversion specifiers.  This is also \
              used when a named conversion specifier contains * for the \
              minimum field width and/or precision."),
    'E9903': ("Expected mapping for format string, not %s",
              "Used when a format string that uses named conversion specifiers \
              is used with an argument that is not a mapping."),
    'W9900': ("Format string dictionary key should be a string, not %s",
              "Used when a format string that uses named conversion specifiers \
              is used with a dictionary whose keys are not all strings."),
    'W9901': ("Unused key %r in format string dictionary",
              "Used when a format string that uses named conversion specifiers \
              is used with a dictionary that conWtains keys not required by the \
              format string."),
    'E9904': ("Missing key %r in format string dictionary",
              "Used when a format string that uses named conversion specifiers \
              is used with a dictionary that doesn't contain all the keys \
              required by the format string."),
    'E9905': ("Too many arguments for format string",
              "Used when a format string that uses unnamed conversion \
              specifiers is given too few arguments."),
    'E9906': ("Not enough arguments for format string",
              "Used when a format string that uses unnamed conversion \
              specifiers is given too many arguments"),
    }

class IncompleteFormatStringException(Exception):
    """A format string ended in the middle of a format specifier."""
    pass

class UnsupportedFormatCharacterException(Exception):
    """A format character in a format string is not one of the supported
    format characters."""
    def __init__(self, index):
        Exception.__init__(self, index)
        self.index = index

def parse_format_string(format_string):
    """Parses a format string, returning a tuple of (keys, num_args), where keys
    is the set of mapping keys in the format string, and num_args is the number
    of arguments required by the format string.  Raises
    IncompleteFormatStringException or UnsupportedFormatCharacterException if a
    parse error occurs."""
    keys = set()
    num_args = 0
    def next_char(i):
        i += 1
        if i == len(format_string):
            raise IncompleteFormatStringException
        return (i, format_string[i])
    i = 0
    while i < len(format_string):
        c = format_string[i]
        if c == '%':
            i, c = next_char(i)
            # Parse the mapping key (optional).
            key = None
            if c == '(':
                depth = 1
                i, c = next_char(i)
                key_start = i
                while depth != 0:
                    if c == '(':
                        depth += 1
                    elif c == ')':
                        depth -= 1
                    i, c = next_char(i)
                key_end = i - 1
                key = format_string[key_start:key_end]

            # Parse the conversion flags (optional).
            while c in '#0- +':
                i, c = next_char(i)
            # Parse the minimum field width (optional).
            if c == '*':
                num_args += 1
                i, c = next_char(i)
            else:
                while c in string.digits:
                    i, c = next_char(i)
            # Parse the precision (optional).
            if c == '.':
                i, c = next_char(i)
                if c == '*':
                    num_args += 1
                    i, c = next_char(i)
                else:
                    while c in string.digits:
                        i, c = next_char(i)
            # Parse the length modifier (optional).
            if c in 'hlL':
                i, c = next_char(i)
            # Parse the conversion type (mandatory).
            if c not in 'diouxXeEfFgGcrs%':
                raise UnsupportedFormatCharacterException(i)
            if key:
                keys.add(key)
            elif c != '%':
                num_args += 1
        i += 1
    return keys, num_args

class StringFormatChecker(BaseChecker):
    """Checks string formatting operations to ensure that the format string
    is valid and the arguments match the format string.
    """
    __implements__ = (IASTNGChecker,)
    name = 'string_format'
    msgs = MSGS
    def visit_binop(self, node):
        if node.op != '%':
            return
        f = node.left
        args = node.right
        if isinstance(f, astng.Const) and isinstance(f.value, basestring):
            format_string = f.value
            try:
                required_keys, required_num_args = \
                    parse_format_string(format_string)
            except UnsupportedFormatCharacterException, e:
                c = format_string[e.index]
                self.add_message('E9900', node=node, args=(c, ord(c), e.index))
            except IncompleteFormatStringException:
                self.add_message('E9901', node=node)
            else:
                if required_keys and required_num_args:
                    # The format string uses both named and unnamed format
                    # specifiers.
                    self.add_message('E9902', node=node)
                elif required_keys:
                    # The format string uses only named format specifiers.
                    # Check that the RHS of the % operator is a mapping object
                    # that contains precisely the set of keys required by the
                    # format string.
                    if isinstance(args, astng.Dict):
                        keys = set()
                        unknown_keys = False
                        for k, v in args.items:
                            if isinstance(k, astng.Const):
                                key = k.value
                                if isinstance(key, basestring):
                                    keys.add(key)
                                else:
                                    self.add_message('W9900',
                                                     node=node,
                                                     args=key)
                            else:
                                # One of the keys was something other than a
                                # constant.  Since we can't tell what it is,
                                # supress checks for missing keys in the
                                # dictionary.
                                unknown_keys = True
                        if not unknown_keys:
                            for key in required_keys:
                                if key not in keys:
                                    self.add_message('E9904',
                                                     node=node,
                                                     args=key)
                        for key in keys:
                            if key not in required_keys:
                                self.add_message('W9901', node=node, args=key)
                    elif (isinstance(args, astng.Const) or
                          isinstance(args, astng.Tuple) or
                          isinstance(args, astng.List) or
                          isinstance(args, astng.ListComp) or
                          isinstance(args, astng.SetComp) or
                          isinstance(args, astng.GenExpr) or
                          isinstance(args, astng.Backquote) or
                          isinstance(args, astng.Lambda)):
                        type_name = type(args).__name__
                        self.add_message('E9903', node=node, args=type_name)
                    else:
                        # The RHS of the format specifier is a name or
                        # expression.  It may be a mapping object, so
                        # there's nothing we can check.
                        pass
                else:
                    # The format string uses only unnamed format specifiers.
                    # Check that the number of arguments passed to the RHS of
                    # the % operator matches the number required by the format
                    # string.
                    if isinstance(args, astng.Tuple):
                        num_args = len(args.elts)
                    elif (isinstance(args, astng.Const) or
                          isinstance(args, astng.Dict) or
                          isinstance(args, astng.List) or
                          isinstance(args, astng.ListComp) or
                          isinstance(args, astng.SetComp) or
                          isinstance(args, astng.DictComp) or
                          isinstance(args, astng.GenExpr) or
                          isinstance(args, astng.Backquote) or
                          isinstance(args, astng.Lambda) or
                          isinstance(args, astng.Function)):
                        num_args = 1
                    else:
                        # The RHS of the format specifier is a name or
                        # expression.  It could be a tuple of unknown size, so
                        # there's nothing we can check.
                        num_args = None
                    if num_args is not None:
                        if num_args > required_num_args:
                            self.add_message('E9905', node=node)
                        elif num_args < required_num_args:
                            self.add_message('E9906', node=node)


def register(linter):
    """required method to auto register this checker """
    linter.register_checker(StringFormatChecker(linter))