summaryrefslogtreecommitdiff
path: root/checkers
diff options
context:
space:
mode:
Diffstat (limited to 'checkers')
-rw-r--r--checkers/__init__.py4
-rw-r--r--checkers/format.py20
-rw-r--r--checkers/misc.py53
-rw-r--r--checkers/similar.py5
4 files changed, 17 insertions, 65 deletions
diff --git a/checkers/__init__.py b/checkers/__init__.py
index 0e0a2f3..e9d7e21 100644
--- a/checkers/__init__.py
+++ b/checkers/__init__.py
@@ -107,13 +107,15 @@ class BaseChecker(OptionsProviderMixIn, ASTWalker):
class BaseRawChecker(BaseChecker):
"""base class for raw checkers"""
- def process_module(self, stream):
+ def process_module(self, node):
"""process a module
the module's content is accessible via the stream object
stream must implement the readline method
"""
+ stream = node.file_stream
+ stream.seek(0)
self.process_tokens(tokenize.generate_tokens(stream.readline))
def process_tokens(self, tokens):
diff --git a/checkers/format.py b/checkers/format.py
index 8c874fa..001ee5b 100644
--- a/checkers/format.py
+++ b/checkers/format.py
@@ -31,7 +31,6 @@ from logilab.astng import nodes
from pylint.interfaces import IRawChecker, IASTNGChecker
from pylint.checkers import BaseRawChecker
-from pylint.checkers.misc import guess_encoding, is_ascii
from pylint.checkers.utils import check_messages
MSGS = {
@@ -180,22 +179,17 @@ class FormatChecker(BaseRawChecker):
self._lines = None
self._visited_lines = None
- def process_module(self, stream):
+ def process_module(self, node):
"""extracts encoding from the stream and decodes each line, so that
international text's length is properly calculated.
"""
- line_reader = stream.readline
- if sys.version_info < (3, 0):
- data = stream.read()
- if not is_ascii(data)[0]:
- encoding = guess_encoding(data)
- if encoding is not None:
- line_reader = lambda: stream.readline().decode(encoding,
- 'replace')
- del data
-
+ stream = node.file_stream
stream.seek(0)
- self.process_tokens(tokenize.generate_tokens(line_reader))
+ readline = stream.readline
+ if sys.version_info < (3, 0):
+ if node.file_encoding is not None:
+ readline = lambda: stream.readline().decode(node.file_encoding, 'replace')
+ self.process_tokens(tokenize.generate_tokens(readline))
def new_line(self, tok_type, line, line_num, junk):
"""a new line has been encountered, process it if necessary"""
diff --git a/checkers/misc.py b/checkers/misc.py
index 83e337c..8f6ad2d 100644
--- a/checkers/misc.py
+++ b/checkers/misc.py
@@ -17,46 +17,13 @@
Check source code is ascii only or has an encoding declaration (PEP 263)
"""
-import re
+import re, sys
from pylint.interfaces import IRawChecker
from pylint.checkers import BaseChecker
-def is_ascii(string):
- """return true if non ascii characters are detected in the given string
- and line number where non-ascii has been encountered.
- """
- for i, line in enumerate(string.splitlines()):
- if line and max([ord(char) for char in line]) >= 128:
- return False, i + 1
- return True, 0
-
-# regexp matching both emacs and vim declaration
-ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
-
-def guess_encoding(string):
- """try to guess encoding from a python file as string
- return None if not found
- """
- assert isinstance(string, str), type(string)
- # check for UTF-8 byte-order mark
- if string.startswith('\xef\xbb\xbf'):
- return 'UTF-8'
- first_lines = string.split('\n', 2)[:2]
- for line in first_lines:
- # check for emacs / vim encoding declaration
- match = ENCODING_RGX.match(line)
- if match is not None:
- return match.group(1)
-
MSGS = {
- 'E0501': ('Non ascii characters found but no encoding specified (PEP 263)',
- 'Used when some non ascii characters are detected but now \
- encoding is specified, as explicited in the PEP 263.'),
- 'E0502': ('Wrong encoding specified (%s)',
- 'Used when a known encoding is specified but the file doesn\'t \
- seem to be actually in this encoding.'),
'W0511': ('%s',
'Used when a warning note as FIXME or XXX is detected.'),
}
@@ -83,25 +50,13 @@ separated by a comma.'
def __init__(self, linter=None):
BaseChecker.__init__(self, linter)
- def process_module(self, stream):
+ def process_module(self, node):
"""inspect the source file to found encoding problem or fixmes like
notes
"""
- # source encoding
- data = stream.read()
- ascii, lineno = is_ascii(data)
- if not ascii:
- encoding = guess_encoding(data)
- if encoding is None:
- self.add_message('E0501', line=lineno)
- else:
- try:
- unicode(data, encoding)
- except UnicodeError:
- self.add_message('E0502', args=encoding, line=1)
- del data
- # warning notes in the code
+ stream = node.file_stream
stream.seek(0)
+ # warning notes in the code
notes = []
for note in self.config.notes:
notes.append(re.compile(note))
diff --git a/checkers/similar.py b/checkers/similar.py
index 6a62b8a..d8651b5 100644
--- a/checkers/similar.py
+++ b/checkers/similar.py
@@ -39,6 +39,7 @@ class Similar:
def append_stream(self, streamid, stream):
"""append a file to search for similarities"""
+ stream.seek(0)
self.linesets.append(LineSet(streamid,
stream.readlines(),
self.ignore_comments,
@@ -264,14 +265,14 @@ class SimilarChecker(BaseChecker, Similar):
self.stats = self.linter.add_stats(nb_duplicated_lines=0,
percent_duplicated_lines=0)
- def process_module(self, stream):
+ def process_module(self, node):
"""process a module
the module's content is accessible via the stream object
stream must implement the readlines method
"""
- self.append_stream(self.linter.current_name, stream)
+ self.append_stream(self.linter.current_name, node.file_stream)
def close(self):
"""compute and display similarities on closing (i.e. end of parsing)"""