diff options
Diffstat (limited to 'checkers')
-rw-r--r-- | checkers/__init__.py | 4 | ||||
-rw-r--r-- | checkers/format.py | 20 | ||||
-rw-r--r-- | checkers/misc.py | 53 | ||||
-rw-r--r-- | checkers/similar.py | 5 |
4 files changed, 17 insertions, 65 deletions
diff --git a/checkers/__init__.py b/checkers/__init__.py index 0e0a2f3..e9d7e21 100644 --- a/checkers/__init__.py +++ b/checkers/__init__.py @@ -107,13 +107,15 @@ class BaseChecker(OptionsProviderMixIn, ASTWalker): class BaseRawChecker(BaseChecker): """base class for raw checkers""" - def process_module(self, stream): + def process_module(self, node): """process a module the module's content is accessible via the stream object stream must implement the readline method """ + stream = node.file_stream + stream.seek(0) self.process_tokens(tokenize.generate_tokens(stream.readline)) def process_tokens(self, tokens): diff --git a/checkers/format.py b/checkers/format.py index 8c874fa..001ee5b 100644 --- a/checkers/format.py +++ b/checkers/format.py @@ -31,7 +31,6 @@ from logilab.astng import nodes from pylint.interfaces import IRawChecker, IASTNGChecker from pylint.checkers import BaseRawChecker -from pylint.checkers.misc import guess_encoding, is_ascii from pylint.checkers.utils import check_messages MSGS = { @@ -180,22 +179,17 @@ class FormatChecker(BaseRawChecker): self._lines = None self._visited_lines = None - def process_module(self, stream): + def process_module(self, node): """extracts encoding from the stream and decodes each line, so that international text's length is properly calculated. """ - line_reader = stream.readline - if sys.version_info < (3, 0): - data = stream.read() - if not is_ascii(data)[0]: - encoding = guess_encoding(data) - if encoding is not None: - line_reader = lambda: stream.readline().decode(encoding, - 'replace') - del data - + stream = node.file_stream stream.seek(0) - self.process_tokens(tokenize.generate_tokens(line_reader)) + readline = stream.readline + if sys.version_info < (3, 0): + if node.file_encoding is not None: + readline = lambda: stream.readline().decode(node.file_encoding, 'replace') + self.process_tokens(tokenize.generate_tokens(readline)) def new_line(self, tok_type, line, line_num, junk): """a new line has been encountered, process it if necessary""" diff --git a/checkers/misc.py b/checkers/misc.py index 83e337c..8f6ad2d 100644 --- a/checkers/misc.py +++ b/checkers/misc.py @@ -17,46 +17,13 @@ Check source code is ascii only or has an encoding declaration (PEP 263) """ -import re +import re, sys from pylint.interfaces import IRawChecker from pylint.checkers import BaseChecker -def is_ascii(string): - """return true if non ascii characters are detected in the given string - and line number where non-ascii has been encountered. - """ - for i, line in enumerate(string.splitlines()): - if line and max([ord(char) for char in line]) >= 128: - return False, i + 1 - return True, 0 - -# regexp matching both emacs and vim declaration -ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)") - -def guess_encoding(string): - """try to guess encoding from a python file as string - return None if not found - """ - assert isinstance(string, str), type(string) - # check for UTF-8 byte-order mark - if string.startswith('\xef\xbb\xbf'): - return 'UTF-8' - first_lines = string.split('\n', 2)[:2] - for line in first_lines: - # check for emacs / vim encoding declaration - match = ENCODING_RGX.match(line) - if match is not None: - return match.group(1) - MSGS = { - 'E0501': ('Non ascii characters found but no encoding specified (PEP 263)', - 'Used when some non ascii characters are detected but now \ - encoding is specified, as explicited in the PEP 263.'), - 'E0502': ('Wrong encoding specified (%s)', - 'Used when a known encoding is specified but the file doesn\'t \ - seem to be actually in this encoding.'), 'W0511': ('%s', 'Used when a warning note as FIXME or XXX is detected.'), } @@ -83,25 +50,13 @@ separated by a comma.' def __init__(self, linter=None): BaseChecker.__init__(self, linter) - def process_module(self, stream): + def process_module(self, node): """inspect the source file to found encoding problem or fixmes like notes """ - # source encoding - data = stream.read() - ascii, lineno = is_ascii(data) - if not ascii: - encoding = guess_encoding(data) - if encoding is None: - self.add_message('E0501', line=lineno) - else: - try: - unicode(data, encoding) - except UnicodeError: - self.add_message('E0502', args=encoding, line=1) - del data - # warning notes in the code + stream = node.file_stream stream.seek(0) + # warning notes in the code notes = [] for note in self.config.notes: notes.append(re.compile(note)) diff --git a/checkers/similar.py b/checkers/similar.py index 6a62b8a..d8651b5 100644 --- a/checkers/similar.py +++ b/checkers/similar.py @@ -39,6 +39,7 @@ class Similar: def append_stream(self, streamid, stream): """append a file to search for similarities""" + stream.seek(0) self.linesets.append(LineSet(streamid, stream.readlines(), self.ignore_comments, @@ -264,14 +265,14 @@ class SimilarChecker(BaseChecker, Similar): self.stats = self.linter.add_stats(nb_duplicated_lines=0, percent_duplicated_lines=0) - def process_module(self, stream): + def process_module(self, node): """process a module the module's content is accessible via the stream object stream must implement the readlines method """ - self.append_stream(self.linter.current_name, stream) + self.append_stream(self.linter.current_name, node.file_stream) def close(self): """compute and display similarities on closing (i.e. end of parsing)""" |