diff options
author | Emile Anclin <emile.anclin@logilab.fr> | 2010-11-22 15:36:44 +0100 |
---|---|---|
committer | Emile Anclin <emile.anclin@logilab.fr> | 2010-11-22 15:36:44 +0100 |
commit | ec33eaf8fbd27986d9610abef32534aba2608fe5 (patch) | |
tree | 994fc3f7722ab05d9440248c84ca3d9197772c14 | |
parent | a068a892c623a0697240bfe163f2aa5735ae7777 (diff) | |
download | pylint-ec33eaf8fbd27986d9610abef32534aba2608fe5.tar.gz |
py3k: need to handle guess_encoding in astng
Astng will try to find the right encoding and provide the right "stream"
interface for the Pylint checkers.
Reading a stream with the wrong encoding in py3k will generate a UnicodeError.
The introduced a 'F0010' failure should maybe be replaced by E0501, E0502 and
F0002? However, can we call 'unexpected errors' the ASTNGBuildingExceptions
that we raise in logilab.astng.builder?
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | checkers/__init__.py | 4 | ||||
-rw-r--r-- | checkers/format.py | 20 | ||||
-rw-r--r-- | checkers/misc.py | 53 | ||||
-rw-r--r-- | checkers/similar.py | 5 | ||||
-rw-r--r-- | examples/custom_raw.py | 14 | ||||
-rw-r--r-- | interfaces.py | 28 | ||||
-rw-r--r-- | lint.py | 14 | ||||
-rw-r--r-- | test/input/func_nonascii_noencoding.py | 5 | ||||
-rw-r--r-- | test/input/func_wrong_encoding.py | 6 | ||||
-rw-r--r-- | test/messages/func_nonascii_noencoding.txt | 1 | ||||
-rw-r--r-- | test/messages/func_wrong_encoding.txt | 1 | ||||
-rw-r--r-- | test/test_encoding.py | 62 | ||||
-rw-r--r-- | test/unittest_lint.py | 5 |
14 files changed, 56 insertions, 167 deletions
@@ -1,6 +1,11 @@ ChangeLog for PyLint ==================== + * drop E0501 and E0502 messages about wrong source encoding: not anymore + interesting since it's a syntax error for python >= 2.5 and we now only + support this python version and above. + + 2010-11-15 -- 0.22.0 * python versions: minimal python3.x support; drop python < 2.5 support diff --git a/checkers/__init__.py b/checkers/__init__.py index 0e0a2f3..e9d7e21 100644 --- a/checkers/__init__.py +++ b/checkers/__init__.py @@ -107,13 +107,15 @@ class BaseChecker(OptionsProviderMixIn, ASTWalker): class BaseRawChecker(BaseChecker): """base class for raw checkers""" - def process_module(self, stream): + def process_module(self, node): """process a module the module's content is accessible via the stream object stream must implement the readline method """ + stream = node.file_stream + stream.seek(0) self.process_tokens(tokenize.generate_tokens(stream.readline)) def process_tokens(self, tokens): diff --git a/checkers/format.py b/checkers/format.py index 8c874fa..001ee5b 100644 --- a/checkers/format.py +++ b/checkers/format.py @@ -31,7 +31,6 @@ from logilab.astng import nodes from pylint.interfaces import IRawChecker, IASTNGChecker from pylint.checkers import BaseRawChecker -from pylint.checkers.misc import guess_encoding, is_ascii from pylint.checkers.utils import check_messages MSGS = { @@ -180,22 +179,17 @@ class FormatChecker(BaseRawChecker): self._lines = None self._visited_lines = None - def process_module(self, stream): + def process_module(self, node): """extracts encoding from the stream and decodes each line, so that international text's length is properly calculated. """ - line_reader = stream.readline - if sys.version_info < (3, 0): - data = stream.read() - if not is_ascii(data)[0]: - encoding = guess_encoding(data) - if encoding is not None: - line_reader = lambda: stream.readline().decode(encoding, - 'replace') - del data - + stream = node.file_stream stream.seek(0) - self.process_tokens(tokenize.generate_tokens(line_reader)) + readline = stream.readline + if sys.version_info < (3, 0): + if node.file_encoding is not None: + readline = lambda: stream.readline().decode(node.file_encoding, 'replace') + self.process_tokens(tokenize.generate_tokens(readline)) def new_line(self, tok_type, line, line_num, junk): """a new line has been encountered, process it if necessary""" diff --git a/checkers/misc.py b/checkers/misc.py index 83e337c..8f6ad2d 100644 --- a/checkers/misc.py +++ b/checkers/misc.py @@ -17,46 +17,13 @@ Check source code is ascii only or has an encoding declaration (PEP 263) """ -import re +import re, sys from pylint.interfaces import IRawChecker from pylint.checkers import BaseChecker -def is_ascii(string): - """return true if non ascii characters are detected in the given string - and line number where non-ascii has been encountered. - """ - for i, line in enumerate(string.splitlines()): - if line and max([ord(char) for char in line]) >= 128: - return False, i + 1 - return True, 0 - -# regexp matching both emacs and vim declaration -ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)") - -def guess_encoding(string): - """try to guess encoding from a python file as string - return None if not found - """ - assert isinstance(string, str), type(string) - # check for UTF-8 byte-order mark - if string.startswith('\xef\xbb\xbf'): - return 'UTF-8' - first_lines = string.split('\n', 2)[:2] - for line in first_lines: - # check for emacs / vim encoding declaration - match = ENCODING_RGX.match(line) - if match is not None: - return match.group(1) - MSGS = { - 'E0501': ('Non ascii characters found but no encoding specified (PEP 263)', - 'Used when some non ascii characters are detected but now \ - encoding is specified, as explicited in the PEP 263.'), - 'E0502': ('Wrong encoding specified (%s)', - 'Used when a known encoding is specified but the file doesn\'t \ - seem to be actually in this encoding.'), 'W0511': ('%s', 'Used when a warning note as FIXME or XXX is detected.'), } @@ -83,25 +50,13 @@ separated by a comma.' def __init__(self, linter=None): BaseChecker.__init__(self, linter) - def process_module(self, stream): + def process_module(self, node): """inspect the source file to found encoding problem or fixmes like notes """ - # source encoding - data = stream.read() - ascii, lineno = is_ascii(data) - if not ascii: - encoding = guess_encoding(data) - if encoding is None: - self.add_message('E0501', line=lineno) - else: - try: - unicode(data, encoding) - except UnicodeError: - self.add_message('E0502', args=encoding, line=1) - del data - # warning notes in the code + stream = node.file_stream stream.seek(0) + # warning notes in the code notes = [] for note in self.config.notes: notes.append(re.compile(note)) diff --git a/checkers/similar.py b/checkers/similar.py index 6a62b8a..d8651b5 100644 --- a/checkers/similar.py +++ b/checkers/similar.py @@ -39,6 +39,7 @@ class Similar: def append_stream(self, streamid, stream): """append a file to search for similarities""" + stream.seek(0) self.linesets.append(LineSet(streamid, stream.readlines(), self.ignore_comments, @@ -264,14 +265,14 @@ class SimilarChecker(BaseChecker, Similar): self.stats = self.linter.add_stats(nb_duplicated_lines=0, percent_duplicated_lines=0) - def process_module(self, stream): + def process_module(self, node): """process a module the module's content is accessible via the stream object stream must implement the readlines method """ - self.append_stream(self.linter.current_name, stream) + self.append_stream(self.linter.current_name, node.file_stream) def close(self): """compute and display similarities on closing (i.e. end of parsing)""" diff --git a/examples/custom_raw.py b/examples/custom_raw.py index 701f6e9..811c785 100644 --- a/examples/custom_raw.py +++ b/examples/custom_raw.py @@ -5,7 +5,7 @@ class MyRawChecker(BaseChecker): """check for line continuations with '\' instead of using triple quoted string or parenthesis """ - + __implements__ = IRawChecker name = 'custom_raw' @@ -15,17 +15,19 @@ class MyRawChecker(BaseChecker): } options = () - def process_module(self, stream): + def process_module(self, node): """process a module - - the module's content is accessible via the stream object + + the module's content is accessible via node.file_stream object """ + stream = node.file_stream + stream.seek(0) for (lineno, line) in enumerate(stream): if line.rstrip().endswith('\\'): self.add_message('W9901', line=lineno) - + def register(linter): """required method to auto register this checker""" linter.register_checker(MyRawChecker(linter)) - + diff --git a/interfaces.py b/interfaces.py index 7bbf34e..3d7bdad 100644 --- a/interfaces.py +++ b/interfaces.py @@ -28,25 +28,25 @@ class IChecker(Interface): def open(self): """called before visiting project (i.e set of modules)""" - + def close(self): """called after visiting project (i.e set of modules)""" ## def open_module(self): ## """called before visiting a module""" - + ## def close_module(self): ## """called after visiting a module""" - - + + class IRawChecker(IChecker): """interface for checker which need to parse the raw file """ - - def process_module(self, stream): + + def process_module(self, astng): """ process a module - - the module's content is accessible via the stream object + + the module's content is accessible via astng.file_stream """ @@ -62,7 +62,7 @@ class ILinter(Interface): the linter class will generate events to its registered checkers. Each checker may interact with the linter instance using this API """ - + def register_checker(self, checker): """register a new checker class @@ -73,11 +73,11 @@ class ILinter(Interface): """add the message corresponding to the given id. If provided, msg is expanded using args - + astng checkers should provide the node argument, raw checkers should provide the line argument. """ - + class IReporter(Interface): """ reporter collect messages and display results encapsulated in a layout @@ -89,10 +89,10 @@ class IReporter(Interface): location is a 3-uple (module, object, line) msg is the actual message """ - + def display_results(self, layout): """display results encapsulated in the layout tree """ - - + + __all__ = ('IRawChecker', 'IStatable', 'ILinter', 'IReporter') @@ -44,7 +44,7 @@ from logilab.common.textutils import splitstrip from logilab.common.ureports import Table, Text, Section from logilab.common.__pkginfo__ import version as common_version -from logilab.astng import MANAGER, nodes +from logilab.astng import MANAGER, nodes, ASTNGBuildingException from logilab.astng.__pkginfo__ import version as astng_version from pylint.utils import PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn,\ @@ -84,6 +84,10 @@ MSGS = { 'F0004': ('unexpected inferred value %s', 'Used to indicate that some value of an unexpected type has been \ inferred.'), + 'F0010': ('error while code parsing: %s', + 'Used when an exception occured while building the ASTNG \ + representation which could be handled by astng.'), + 'I0001': ('Unable to run raw checkers on built-in module %s', 'Used to inform that a built-in module has not been checked \ @@ -532,6 +536,8 @@ This is used by the global evaluation report (RP0004).'}), return MANAGER.astng_from_file(filepath, modname, source=True) except SyntaxError, ex: self.add_message('E0001', line=ex.lineno, args=ex.msg) + except ASTNGBuildingException, ex: + self.add_message('F0010', args=ex) except Exception, ex: # import traceback # traceback.print_exc() @@ -544,10 +550,9 @@ This is used by the global evaluation report (RP0004).'}), self.add_message('I0001', args=astng.name) else: #assert astng.file.endswith('.py') - stream = open(astng.file, 'U') # invoke IRawChecker interface on self to fetch module/block # level options - self.process_module(stream) + self.process_module(astng) if self._ignore_file: return False # walk ast to collect line numbers @@ -555,8 +560,7 @@ This is used by the global evaluation report (RP0004).'}), self._module_msgs_state = {} self.collect_block_lines(astng, orig_state) for checker in rawcheckers: - stream.seek(0) - checker.process_module(stream) + checker.process_module(astng) # generate events to astng checkers walker.walk(astng) return True diff --git a/test/input/func_nonascii_noencoding.py b/test/input/func_nonascii_noencoding.py deleted file mode 100644 index 1ba3578..0000000 --- a/test/input/func_nonascii_noencoding.py +++ /dev/null @@ -1,5 +0,0 @@ -"""test file with non ascii characters and no encoding declaration""" - -__revision__ = '' - -YOP = 'héhéhé' diff --git a/test/input/func_wrong_encoding.py b/test/input/func_wrong_encoding.py deleted file mode 100644 index 267fa2c..0000000 --- a/test/input/func_wrong_encoding.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: UTF-8 -*- -""" check correct wrong encoding declaration -""" - -__revision__ = 'éééé' - diff --git a/test/messages/func_nonascii_noencoding.txt b/test/messages/func_nonascii_noencoding.txt deleted file mode 100644 index a802512..0000000 --- a/test/messages/func_nonascii_noencoding.txt +++ /dev/null @@ -1 +0,0 @@ -E: 5: Non ascii characters found but no encoding specified (PEP 263) diff --git a/test/messages/func_wrong_encoding.txt b/test/messages/func_wrong_encoding.txt deleted file mode 100644 index 10123a1..0000000 --- a/test/messages/func_wrong_encoding.txt +++ /dev/null @@ -1 +0,0 @@ -E: 1: Wrong encoding specified (UTF-8) diff --git a/test/test_encoding.py b/test/test_encoding.py deleted file mode 100644 index 34003df..0000000 --- a/test/test_encoding.py +++ /dev/null @@ -1,62 +0,0 @@ -# This program is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any later -# version. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -""" Copyright (c) 2003-2005 LOGILAB S.A. (Paris, FRANCE). - http://www.logilab.fr/ -- mailto:contact@logilab.fr - -Check source code is ascii only or has an encoding declaration (PEP 263) -""" - -__revision__ = '$Id: test_encoding.py,v 1.6 2005-11-02 09:22:04 syt Exp $' - -from logilab.common.testlib import TestCase, unittest_main -import sys -from pylint.checkers.misc import guess_encoding - -class TestGuessEncoding(TestCase): - - def testEmacs(self): - e = guess_encoding('# -*- coding: UTF-8 -*-') - self.failUnlessEqual(e, 'UTF-8') - e = guess_encoding('# -*- coding:UTF-8 -*-') - self.failUnlessEqual(e, 'UTF-8') - e = guess_encoding(''' - ### -*- coding: ISO-8859-1 -*- - ''') - self.failUnlessEqual(e, 'ISO-8859-1') - e = guess_encoding(''' - - ### -*- coding: ISO-8859-1 -*- - ''') - self.failUnlessEqual(e, None) - - def testVim(self): - e = guess_encoding('# vim:fileencoding=UTF-8') - self.failUnlessEqual(e, 'UTF-8') - e = guess_encoding(''' - ### vim:fileencoding=ISO-8859-1 - ''') - self.failUnlessEqual(e, 'ISO-8859-1') - e = guess_encoding(''' - - ### vim:fileencoding= ISO-8859-1 - ''') - self.failUnlessEqual(e, None) - - def testUTF8(self): - e = guess_encoding('\xef\xbb\xbf any UTF-8 data') - self.failUnlessEqual(e, 'UTF-8') - e = guess_encoding(' any UTF-8 data \xef\xbb\xbf') - self.failUnlessEqual(e, None) - -if __name__ == '__main__': - unittest_main() diff --git a/test/unittest_lint.py b/test/unittest_lint.py index 66641f1..b59b640 100644 --- a/test/unittest_lint.py +++ b/test/unittest_lint.py @@ -134,10 +134,11 @@ class PyLinterTC(TestCase): linter.open() filepath = join(INPUTDIR, 'func_block_disable_msg.py') linter.set_current_module('func_block_disable_msg') - linter.process_module(open(filepath)) + astng = linter.get_astng(filepath, 'func_block_disable_msg') + linter.process_module(astng) orig_state = linter._module_msgs_state.copy() linter._module_msgs_state = {} - linter.collect_block_lines(linter.get_astng(filepath, 'func_block_disable_msg'), orig_state) + linter.collect_block_lines(astng, orig_state) # global (module level) self.assert_(linter.is_message_enabled('W0613')) self.assert_(linter.is_message_enabled('E1101')) |