summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSylvain Th?nault <thenault@gmail.com>2013-05-07 10:01:47 +0200
committerSylvain Th?nault <thenault@gmail.com>2013-05-07 10:01:47 +0200
commit5cbe6b872ac9e6b8f4f66313e4f16f1bb4031033 (patch)
tree4214db3eb3866b9cefd053f31d503affbbcd392f
parent8ccee612b4da0de68d37eaeea602ccf6302ee10f (diff)
parent17fb1d38dc5123609c0d5064707654ddffdef349 (diff)
downloadpylint-5cbe6b872ac9e6b8f4f66313e4f16f1bb4031033.tar.gz
Merged in tmarek/pylint (pull request #15)
Tokenize the input source only once and hand it to all checkers that need the token stream.
-rw-r--r--ChangeLog4
-rw-r--r--checkers/__init__.py12
-rw-r--r--checkers/format.py22
-rw-r--r--checkers/raw_metrics.py10
-rw-r--r--checkers/strings.py8
-rw-r--r--interfaces.py9
-rw-r--r--lint.py31
-rw-r--r--test/unittest_lint.py4
-rw-r--r--utils.py16
9 files changed, 73 insertions, 43 deletions
diff --git a/ChangeLog b/ChangeLog
index d5a7887..99d89ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,10 @@ ChangeLog for Pylint
--
* bitbucket #6: put back documentation in source distribution
+ * Added a new base class and interface for checkers that work on the
+ tokens rather than the syntax, and only tokenize the input file
+ once.
+
2013-04-25 -- 0.28.0
* bitbucket #1: fix "dictionary changed size during iteration" crash
diff --git a/checkers/__init__.py b/checkers/__init__.py
index 700a78e..dd868c6 100644
--- a/checkers/__init__.py
+++ b/checkers/__init__.py
@@ -39,6 +39,7 @@ messages nor reports. XXX not true, emit a 07 report !
"""
import tokenize
+import warnings
from os import listdir
from os.path import dirname, join, isdir, splitext
@@ -121,6 +122,9 @@ class BaseRawChecker(BaseChecker):
stream must implement the readline method
"""
+ warnings.warn("Modules that need access to the tokens should "
+ "use the ITokenChecker interface.",
+ DeprecationWarning)
stream = node.file_stream
stream.seek(0) # XXX may be removed with astng > 0.23
self.process_tokens(tokenize.generate_tokens(stream.readline))
@@ -130,6 +134,14 @@ class BaseRawChecker(BaseChecker):
raise NotImplementedError()
+class BaseTokenChecker(BaseChecker):
+ """Base class for checkers that want to have access to the token stream."""
+
+ def process_tokens(self, tokens):
+ """Should be overridden by subclasses."""
+ raise NotImplementedError()
+
+
PY_EXTS = ('.py', '.pyc', '.pyo', '.pyw', '.so', '.dll')
def initialize(linter):
diff --git a/checkers/format.py b/checkers/format.py
index 4a68d39..ea8cf17 100644
--- a/checkers/format.py
+++ b/checkers/format.py
@@ -29,8 +29,8 @@ if not hasattr(tokenize, 'NL'):
from logilab.common.textutils import pretty_match
from logilab.astng import nodes
-from pylint.interfaces import IRawChecker, IASTNGChecker
-from pylint.checkers import BaseRawChecker
+from pylint.interfaces import ITokenChecker, IASTNGChecker
+from pylint.checkers import BaseTokenChecker
from pylint.checkers.utils import check_messages
from pylint.utils import WarningScope
@@ -163,7 +163,7 @@ def check_line(line):
return msg_id, pretty_match(match, line.rstrip())
-class FormatChecker(BaseRawChecker):
+class FormatChecker(BaseTokenChecker):
"""checks for :
* unauthorized constructions
* strict indentation
@@ -171,7 +171,7 @@ class FormatChecker(BaseRawChecker):
* use of <> instead of !=
"""
- __implements__ = (IRawChecker, IASTNGChecker)
+ __implements__ = (ITokenChecker, IASTNGChecker)
# configuration section name
name = 'format'
@@ -192,22 +192,10 @@ class FormatChecker(BaseRawChecker):
" " (4 spaces) or "\\t" (1 tab).'}),
)
def __init__(self, linter=None):
- BaseRawChecker.__init__(self, linter)
+ BaseTokenChecker.__init__(self, linter)
self._lines = None
self._visited_lines = None
- def process_module(self, node):
- """extracts encoding from the stream and decodes each line, so that
- international text's length is properly calculated.
- """
- stream = node.file_stream
- stream.seek(0) # XXX may be removed with astng > 0.23
- readline = stream.readline
- if sys.version_info < (3, 0):
- if node.file_encoding is not None:
- readline = lambda: stream.readline().decode(node.file_encoding, 'replace')
- self.process_tokens(tokenize.generate_tokens(readline))
-
def new_line(self, tok_type, line, line_num, junk):
"""a new line has been encountered, process it if necessary"""
if not tok_type in junk:
diff --git a/checkers/raw_metrics.py b/checkers/raw_metrics.py
index 872ca7b..8728fb6 100644
--- a/checkers/raw_metrics.py
+++ b/checkers/raw_metrics.py
@@ -24,8 +24,8 @@ import tokenize
from logilab.common.ureports import Table
-from pylint.interfaces import IRawChecker
-from pylint.checkers import BaseRawChecker, EmptyReport
+from pylint.interfaces import ITokenChecker
+from pylint.checkers import BaseTokenChecker, EmptyReport
from pylint.reporters import diff_string
def report_raw_stats(sect, stats, old_stats):
@@ -50,7 +50,7 @@ def report_raw_stats(sect, stats, old_stats):
sect.append(Table(children=lines, cols=5, rheaders=1))
-class RawMetricsChecker(BaseRawChecker):
+class RawMetricsChecker(BaseTokenChecker):
"""does not check anything but gives some raw metrics :
* total number of lines
* total number of code lines
@@ -59,7 +59,7 @@ class RawMetricsChecker(BaseRawChecker):
* total number of empty lines
"""
- __implements__ = (IRawChecker,)
+ __implements__ = (ITokenChecker,)
# configuration section name
name = 'metrics'
@@ -71,7 +71,7 @@ class RawMetricsChecker(BaseRawChecker):
reports = ( ('RP0701', 'Raw metrics', report_raw_stats), )
def __init__(self, linter):
- BaseRawChecker.__init__(self, linter)
+ BaseTokenChecker.__init__(self, linter)
self.stats = None
def open(self):
diff --git a/checkers/strings.py b/checkers/strings.py
index 5c7d1e7..52ff003 100644
--- a/checkers/strings.py
+++ b/checkers/strings.py
@@ -23,8 +23,8 @@ import tokenize
from logilab import astng
-from pylint.interfaces import IRawChecker, IASTNGChecker
-from pylint.checkers import BaseChecker, BaseRawChecker
+from pylint.interfaces import ITokenChecker, IASTNGChecker
+from pylint.checkers import BaseChecker, BaseTokenChecker
from pylint.checkers import utils
_PY3K = sys.version_info >= (3, 0)
@@ -190,9 +190,9 @@ class StringMethodsChecker(BaseChecker):
args=(func.bound.name, func.name))
-class StringConstantChecker(BaseRawChecker):
+class StringConstantChecker(BaseTokenChecker):
"""Check string literals"""
- __implements__ = (IRawChecker, IASTNGChecker)
+ __implements__ = (ITokenChecker,)
name = 'string_constant'
msgs = {
'W1401': ('Anomalous backslash in string: \'%s\'. '
diff --git a/interfaces.py b/interfaces.py
index e29026d..a24e36f 100644
--- a/interfaces.py
+++ b/interfaces.py
@@ -50,6 +50,15 @@ class IRawChecker(IChecker):
"""
+class ITokenChecker(IChecker):
+ """Interface for checkers that need access to the token list."""
+ def process_tokens(self, tokens):
+ """Process a module.
+
+ tokens is a list of all source code tokens in the file.
+ """
+
+
class IASTNGChecker(IChecker):
""" interface for checker which prefers receive events according to
statement type
diff --git a/lint.py b/lint.py
index 43d25ca..98f7c70 100644
--- a/lint.py
+++ b/lint.py
@@ -48,9 +48,9 @@ from logilab.astng.__pkginfo__ import version as astng_version
from pylint.utils import (PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn,
ReportsHandlerMixIn, MSG_TYPES, expand_modules,
- WarningScope)
-from pylint.interfaces import ILinter, IRawChecker, IASTNGChecker
-from pylint.checkers import (BaseRawChecker, EmptyReport,
+ WarningScope, tokenize_module)
+from pylint.interfaces import ILinter, IRawChecker, ITokenChecker, IASTNGChecker
+from pylint.checkers import (BaseTokenChecker, EmptyReport,
table_lines_from_stats)
from pylint.reporters.text import (TextReporter, ParseableTextReporter,
VSTextReporter, ColorizedTextReporter)
@@ -157,7 +157,7 @@ MSGS = {
class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
- BaseRawChecker):
+ BaseTokenChecker):
"""lint Python modules using external checkers.
This is the main checker controlling the other ones and the reports
@@ -171,7 +171,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
to ensure the latest code version is actually checked.
"""
- __implements__ = (ILinter, IRawChecker)
+ __implements__ = (ILinter, ITokenChecker)
name = 'master'
priority = 0
@@ -310,7 +310,7 @@ This is used by the global evaluation report (RP0004).'}),
config_file=pylintrc or config.PYLINTRC)
MessagesHandlerMixIn.__init__(self)
ReportsHandlerMixIn.__init__(self)
- BaseRawChecker.__init__(self)
+ BaseTokenChecker.__init__(self)
# provided reports
self.reports = (('RP0001', 'Messages by category',
report_total_messages_stats),
@@ -385,7 +385,7 @@ This is used by the global evaluation report (RP0004).'}),
self.set_reporter(reporter_class())
try:
- BaseRawChecker.set_option(self, optname, value, action, optdict)
+ BaseTokenChecker.set_option(self, optname, value, action, optdict)
except UnsupportedAction:
print >> sys.stderr, 'option %s can\'t be read from config file' % \
optname
@@ -565,8 +565,9 @@ This is used by the global evaluation report (RP0004).'}),
files_or_modules = (files_or_modules,)
walker = PyLintASTWalker(self)
checkers = self.prepare_checkers()
- rawcheckers = [c for c in checkers if implements(c, IRawChecker)
- and c is not self]
+ tokencheckers = [c for c in checkers if implements(c, ITokenChecker)
+ and c is not self]
+ rawcheckers = [c for c in checkers if implements(c, IRawChecker)]
# notify global begin
for checker in checkers:
checker.open()
@@ -589,7 +590,7 @@ This is used by the global evaluation report (RP0004).'}),
# fix the current file (if the source file was not available or
# if it's actually a c extension)
self.current_file = astng.file
- self.check_astng_module(astng, walker, rawcheckers)
+ self.check_astng_module(astng, walker, rawcheckers, tokencheckers)
self._add_suppression_messages()
# notify global end
self.set_current_module('')
@@ -645,16 +646,18 @@ This is used by the global evaluation report (RP0004).'}),
traceback.print_exc()
self.add_message('F0002', args=(ex.__class__, ex))
- def check_astng_module(self, astng, walker, rawcheckers):
+ def check_astng_module(self, astng, walker, rawcheckers, tokencheckers):
"""check a module from its astng representation, real work"""
# call raw checkers if possible
+ tokens = tokenize_module(astng)
+
if not astng.pure_python:
self.add_message('I0001', args=astng.name)
else:
#assert astng.file.endswith('.py')
- # invoke IRawChecker interface on self to fetch module/block
+ # invoke ITokenChecker interface on self to fetch module/block
# level options
- self.process_module(astng)
+ self.process_tokens(tokens)
if self._ignore_file:
return False
# walk ast to collect line numbers
@@ -666,6 +669,8 @@ This is used by the global evaluation report (RP0004).'}),
self.collect_block_lines(astng, orig_state)
for checker in rawcheckers:
checker.process_module(astng)
+ for checker in tokencheckers:
+ checker.process_tokens(tokens)
# generate events to astng checkers
walker.walk(astng)
return True
diff --git a/test/unittest_lint.py b/test/unittest_lint.py
index 2043ca6..94c0b03 100644
--- a/test/unittest_lint.py
+++ b/test/unittest_lint.py
@@ -27,7 +27,7 @@ from pylint import config
from pylint.lint import PyLinter, Run, UnknownMessage, preprocess_options, \
ArgumentPreprocessingError
from pylint.utils import sort_msgs, PyLintASTWalker, MSG_STATE_SCOPE_CONFIG, \
- MSG_STATE_SCOPE_MODULE
+ MSG_STATE_SCOPE_MODULE, tokenize_module
from pylint import checkers
@@ -154,7 +154,7 @@ class PyLinterTC(TestCase):
filepath = join(INPUTDIR, 'func_block_disable_msg.py')
linter.set_current_module('func_block_disable_msg')
astng = linter.get_astng(filepath, 'func_block_disable_msg')
- linter.process_module(astng)
+ linter.process_tokens(tokenize_module(astng))
orig_state = linter._module_msgs_state.copy()
linter._module_msgs_state = {}
linter._suppression_mapping = {}
diff --git a/utils.py b/utils.py
index c75d3bb..a984f58 100644
--- a/utils.py
+++ b/utils.py
@@ -18,6 +18,7 @@ main pylint class
"""
import sys
+import tokenize
from warnings import warn
from os.path import dirname, basename, splitext, exists, isdir, join, normpath
@@ -31,7 +32,7 @@ from logilab.common.ureports import Section
from logilab.astng import nodes, Module
from pylint.checkers import EmptyReport
-from pylint.interfaces import IRawChecker
+from pylint.interfaces import IRawChecker, ITokenChecker
class UnknownMessage(Exception):
@@ -104,6 +105,17 @@ def category_id(id):
return MSG_TYPES_LONG.get(id)
+def tokenize_module(module):
+ stream = module.file_stream
+ stream.seek(0)
+ if sys.version_info < (3, 0) and module.file_encoding is not None:
+ readline = lambda: stream.readline().decode(module.file_encoding,
+ 'replace')
+ else:
+ readline = stream.readline
+ return list(tokenize.generate_tokens(readline))
+
+
class Message:
def __init__(self, checker, msgid, msg, descr, symbol, scope):
assert len(msgid) == 5, 'Invalid message id %s' % msgid
@@ -147,7 +159,7 @@ class MessagesHandlerMixIn:
chkid = None
for msgid, msg_tuple in msgs_dict.iteritems():
- if implements(checker, IRawChecker):
+ if implements(checker, (IRawChecker, ITokenChecker)):
scope = WarningScope.LINE
else:
scope = WarningScope.NODE