summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmile Anclin <emile.anclin@logilab.fr>2010-11-22 15:36:44 +0100
committerEmile Anclin <emile.anclin@logilab.fr>2010-11-22 15:36:44 +0100
commitec33eaf8fbd27986d9610abef32534aba2608fe5 (patch)
tree994fc3f7722ab05d9440248c84ca3d9197772c14
parenta068a892c623a0697240bfe163f2aa5735ae7777 (diff)
downloadpylint-ec33eaf8fbd27986d9610abef32534aba2608fe5.tar.gz
py3k: need to handle guess_encoding in astng
Astng will try to find the right encoding and provide the right "stream" interface for the Pylint checkers. Reading a stream with the wrong encoding in py3k will generate a UnicodeError. The introduced a 'F0010' failure should maybe be replaced by E0501, E0502 and F0002? However, can we call 'unexpected errors' the ASTNGBuildingExceptions that we raise in logilab.astng.builder?
-rw-r--r--ChangeLog5
-rw-r--r--checkers/__init__.py4
-rw-r--r--checkers/format.py20
-rw-r--r--checkers/misc.py53
-rw-r--r--checkers/similar.py5
-rw-r--r--examples/custom_raw.py14
-rw-r--r--interfaces.py28
-rw-r--r--lint.py14
-rw-r--r--test/input/func_nonascii_noencoding.py5
-rw-r--r--test/input/func_wrong_encoding.py6
-rw-r--r--test/messages/func_nonascii_noencoding.txt1
-rw-r--r--test/messages/func_wrong_encoding.txt1
-rw-r--r--test/test_encoding.py62
-rw-r--r--test/unittest_lint.py5
14 files changed, 56 insertions, 167 deletions
diff --git a/ChangeLog b/ChangeLog
index c7bfced..033d0ae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,11 @@
ChangeLog for PyLint
====================
+ * drop E0501 and E0502 messages about wrong source encoding: not anymore
+ interesting since it's a syntax error for python >= 2.5 and we now only
+ support this python version and above.
+
+
2010-11-15 -- 0.22.0
* python versions: minimal python3.x support; drop python < 2.5 support
diff --git a/checkers/__init__.py b/checkers/__init__.py
index 0e0a2f3..e9d7e21 100644
--- a/checkers/__init__.py
+++ b/checkers/__init__.py
@@ -107,13 +107,15 @@ class BaseChecker(OptionsProviderMixIn, ASTWalker):
class BaseRawChecker(BaseChecker):
"""base class for raw checkers"""
- def process_module(self, stream):
+ def process_module(self, node):
"""process a module
the module's content is accessible via the stream object
stream must implement the readline method
"""
+ stream = node.file_stream
+ stream.seek(0)
self.process_tokens(tokenize.generate_tokens(stream.readline))
def process_tokens(self, tokens):
diff --git a/checkers/format.py b/checkers/format.py
index 8c874fa..001ee5b 100644
--- a/checkers/format.py
+++ b/checkers/format.py
@@ -31,7 +31,6 @@ from logilab.astng import nodes
from pylint.interfaces import IRawChecker, IASTNGChecker
from pylint.checkers import BaseRawChecker
-from pylint.checkers.misc import guess_encoding, is_ascii
from pylint.checkers.utils import check_messages
MSGS = {
@@ -180,22 +179,17 @@ class FormatChecker(BaseRawChecker):
self._lines = None
self._visited_lines = None
- def process_module(self, stream):
+ def process_module(self, node):
"""extracts encoding from the stream and decodes each line, so that
international text's length is properly calculated.
"""
- line_reader = stream.readline
- if sys.version_info < (3, 0):
- data = stream.read()
- if not is_ascii(data)[0]:
- encoding = guess_encoding(data)
- if encoding is not None:
- line_reader = lambda: stream.readline().decode(encoding,
- 'replace')
- del data
-
+ stream = node.file_stream
stream.seek(0)
- self.process_tokens(tokenize.generate_tokens(line_reader))
+ readline = stream.readline
+ if sys.version_info < (3, 0):
+ if node.file_encoding is not None:
+ readline = lambda: stream.readline().decode(node.file_encoding, 'replace')
+ self.process_tokens(tokenize.generate_tokens(readline))
def new_line(self, tok_type, line, line_num, junk):
"""a new line has been encountered, process it if necessary"""
diff --git a/checkers/misc.py b/checkers/misc.py
index 83e337c..8f6ad2d 100644
--- a/checkers/misc.py
+++ b/checkers/misc.py
@@ -17,46 +17,13 @@
Check source code is ascii only or has an encoding declaration (PEP 263)
"""
-import re
+import re, sys
from pylint.interfaces import IRawChecker
from pylint.checkers import BaseChecker
-def is_ascii(string):
- """return true if non ascii characters are detected in the given string
- and line number where non-ascii has been encountered.
- """
- for i, line in enumerate(string.splitlines()):
- if line and max([ord(char) for char in line]) >= 128:
- return False, i + 1
- return True, 0
-
-# regexp matching both emacs and vim declaration
-ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
-
-def guess_encoding(string):
- """try to guess encoding from a python file as string
- return None if not found
- """
- assert isinstance(string, str), type(string)
- # check for UTF-8 byte-order mark
- if string.startswith('\xef\xbb\xbf'):
- return 'UTF-8'
- first_lines = string.split('\n', 2)[:2]
- for line in first_lines:
- # check for emacs / vim encoding declaration
- match = ENCODING_RGX.match(line)
- if match is not None:
- return match.group(1)
-
MSGS = {
- 'E0501': ('Non ascii characters found but no encoding specified (PEP 263)',
- 'Used when some non ascii characters are detected but now \
- encoding is specified, as explicited in the PEP 263.'),
- 'E0502': ('Wrong encoding specified (%s)',
- 'Used when a known encoding is specified but the file doesn\'t \
- seem to be actually in this encoding.'),
'W0511': ('%s',
'Used when a warning note as FIXME or XXX is detected.'),
}
@@ -83,25 +50,13 @@ separated by a comma.'
def __init__(self, linter=None):
BaseChecker.__init__(self, linter)
- def process_module(self, stream):
+ def process_module(self, node):
"""inspect the source file to found encoding problem or fixmes like
notes
"""
- # source encoding
- data = stream.read()
- ascii, lineno = is_ascii(data)
- if not ascii:
- encoding = guess_encoding(data)
- if encoding is None:
- self.add_message('E0501', line=lineno)
- else:
- try:
- unicode(data, encoding)
- except UnicodeError:
- self.add_message('E0502', args=encoding, line=1)
- del data
- # warning notes in the code
+ stream = node.file_stream
stream.seek(0)
+ # warning notes in the code
notes = []
for note in self.config.notes:
notes.append(re.compile(note))
diff --git a/checkers/similar.py b/checkers/similar.py
index 6a62b8a..d8651b5 100644
--- a/checkers/similar.py
+++ b/checkers/similar.py
@@ -39,6 +39,7 @@ class Similar:
def append_stream(self, streamid, stream):
"""append a file to search for similarities"""
+ stream.seek(0)
self.linesets.append(LineSet(streamid,
stream.readlines(),
self.ignore_comments,
@@ -264,14 +265,14 @@ class SimilarChecker(BaseChecker, Similar):
self.stats = self.linter.add_stats(nb_duplicated_lines=0,
percent_duplicated_lines=0)
- def process_module(self, stream):
+ def process_module(self, node):
"""process a module
the module's content is accessible via the stream object
stream must implement the readlines method
"""
- self.append_stream(self.linter.current_name, stream)
+ self.append_stream(self.linter.current_name, node.file_stream)
def close(self):
"""compute and display similarities on closing (i.e. end of parsing)"""
diff --git a/examples/custom_raw.py b/examples/custom_raw.py
index 701f6e9..811c785 100644
--- a/examples/custom_raw.py
+++ b/examples/custom_raw.py
@@ -5,7 +5,7 @@ class MyRawChecker(BaseChecker):
"""check for line continuations with '\' instead of using triple
quoted string or parenthesis
"""
-
+
__implements__ = IRawChecker
name = 'custom_raw'
@@ -15,17 +15,19 @@ class MyRawChecker(BaseChecker):
}
options = ()
- def process_module(self, stream):
+ def process_module(self, node):
"""process a module
-
- the module's content is accessible via the stream object
+
+ the module's content is accessible via node.file_stream object
"""
+ stream = node.file_stream
+ stream.seek(0)
for (lineno, line) in enumerate(stream):
if line.rstrip().endswith('\\'):
self.add_message('W9901', line=lineno)
-
+
def register(linter):
"""required method to auto register this checker"""
linter.register_checker(MyRawChecker(linter))
-
+
diff --git a/interfaces.py b/interfaces.py
index 7bbf34e..3d7bdad 100644
--- a/interfaces.py
+++ b/interfaces.py
@@ -28,25 +28,25 @@ class IChecker(Interface):
def open(self):
"""called before visiting project (i.e set of modules)"""
-
+
def close(self):
"""called after visiting project (i.e set of modules)"""
## def open_module(self):
## """called before visiting a module"""
-
+
## def close_module(self):
## """called after visiting a module"""
-
-
+
+
class IRawChecker(IChecker):
"""interface for checker which need to parse the raw file
"""
-
- def process_module(self, stream):
+
+ def process_module(self, astng):
""" process a module
-
- the module's content is accessible via the stream object
+
+ the module's content is accessible via astng.file_stream
"""
@@ -62,7 +62,7 @@ class ILinter(Interface):
the linter class will generate events to its registered checkers.
Each checker may interact with the linter instance using this API
"""
-
+
def register_checker(self, checker):
"""register a new checker class
@@ -73,11 +73,11 @@ class ILinter(Interface):
"""add the message corresponding to the given id.
If provided, msg is expanded using args
-
+
astng checkers should provide the node argument,
raw checkers should provide the line argument.
"""
-
+
class IReporter(Interface):
""" reporter collect messages and display results encapsulated in a layout
@@ -89,10 +89,10 @@ class IReporter(Interface):
location is a 3-uple (module, object, line)
msg is the actual message
"""
-
+
def display_results(self, layout):
"""display results encapsulated in the layout tree
"""
-
-
+
+
__all__ = ('IRawChecker', 'IStatable', 'ILinter', 'IReporter')
diff --git a/lint.py b/lint.py
index ac89ae5..7e21e4a 100644
--- a/lint.py
+++ b/lint.py
@@ -44,7 +44,7 @@ from logilab.common.textutils import splitstrip
from logilab.common.ureports import Table, Text, Section
from logilab.common.__pkginfo__ import version as common_version
-from logilab.astng import MANAGER, nodes
+from logilab.astng import MANAGER, nodes, ASTNGBuildingException
from logilab.astng.__pkginfo__ import version as astng_version
from pylint.utils import PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn,\
@@ -84,6 +84,10 @@ MSGS = {
'F0004': ('unexpected inferred value %s',
'Used to indicate that some value of an unexpected type has been \
inferred.'),
+ 'F0010': ('error while code parsing: %s',
+ 'Used when an exception occured while building the ASTNG \
+ representation which could be handled by astng.'),
+
'I0001': ('Unable to run raw checkers on built-in module %s',
'Used to inform that a built-in module has not been checked \
@@ -532,6 +536,8 @@ This is used by the global evaluation report (RP0004).'}),
return MANAGER.astng_from_file(filepath, modname, source=True)
except SyntaxError, ex:
self.add_message('E0001', line=ex.lineno, args=ex.msg)
+ except ASTNGBuildingException, ex:
+ self.add_message('F0010', args=ex)
except Exception, ex:
# import traceback
# traceback.print_exc()
@@ -544,10 +550,9 @@ This is used by the global evaluation report (RP0004).'}),
self.add_message('I0001', args=astng.name)
else:
#assert astng.file.endswith('.py')
- stream = open(astng.file, 'U')
# invoke IRawChecker interface on self to fetch module/block
# level options
- self.process_module(stream)
+ self.process_module(astng)
if self._ignore_file:
return False
# walk ast to collect line numbers
@@ -555,8 +560,7 @@ This is used by the global evaluation report (RP0004).'}),
self._module_msgs_state = {}
self.collect_block_lines(astng, orig_state)
for checker in rawcheckers:
- stream.seek(0)
- checker.process_module(stream)
+ checker.process_module(astng)
# generate events to astng checkers
walker.walk(astng)
return True
diff --git a/test/input/func_nonascii_noencoding.py b/test/input/func_nonascii_noencoding.py
deleted file mode 100644
index 1ba3578..0000000
--- a/test/input/func_nonascii_noencoding.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""test file with non ascii characters and no encoding declaration"""
-
-__revision__ = ''
-
-YOP = 'héhéhé'
diff --git a/test/input/func_wrong_encoding.py b/test/input/func_wrong_encoding.py
deleted file mode 100644
index 267fa2c..0000000
--- a/test/input/func_wrong_encoding.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- coding: UTF-8 -*-
-""" check correct wrong encoding declaration
-"""
-
-__revision__ = 'éééé'
-
diff --git a/test/messages/func_nonascii_noencoding.txt b/test/messages/func_nonascii_noencoding.txt
deleted file mode 100644
index a802512..0000000
--- a/test/messages/func_nonascii_noencoding.txt
+++ /dev/null
@@ -1 +0,0 @@
-E: 5: Non ascii characters found but no encoding specified (PEP 263)
diff --git a/test/messages/func_wrong_encoding.txt b/test/messages/func_wrong_encoding.txt
deleted file mode 100644
index 10123a1..0000000
--- a/test/messages/func_wrong_encoding.txt
+++ /dev/null
@@ -1 +0,0 @@
-E: 1: Wrong encoding specified (UTF-8)
diff --git a/test/test_encoding.py b/test/test_encoding.py
deleted file mode 100644
index 34003df..0000000
--- a/test/test_encoding.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-""" Copyright (c) 2003-2005 LOGILAB S.A. (Paris, FRANCE).
- http://www.logilab.fr/ -- mailto:contact@logilab.fr
-
-Check source code is ascii only or has an encoding declaration (PEP 263)
-"""
-
-__revision__ = '$Id: test_encoding.py,v 1.6 2005-11-02 09:22:04 syt Exp $'
-
-from logilab.common.testlib import TestCase, unittest_main
-import sys
-from pylint.checkers.misc import guess_encoding
-
-class TestGuessEncoding(TestCase):
-
- def testEmacs(self):
- e = guess_encoding('# -*- coding: UTF-8 -*-')
- self.failUnlessEqual(e, 'UTF-8')
- e = guess_encoding('# -*- coding:UTF-8 -*-')
- self.failUnlessEqual(e, 'UTF-8')
- e = guess_encoding('''
- ### -*- coding: ISO-8859-1 -*-
- ''')
- self.failUnlessEqual(e, 'ISO-8859-1')
- e = guess_encoding('''
-
- ### -*- coding: ISO-8859-1 -*-
- ''')
- self.failUnlessEqual(e, None)
-
- def testVim(self):
- e = guess_encoding('# vim:fileencoding=UTF-8')
- self.failUnlessEqual(e, 'UTF-8')
- e = guess_encoding('''
- ### vim:fileencoding=ISO-8859-1
- ''')
- self.failUnlessEqual(e, 'ISO-8859-1')
- e = guess_encoding('''
-
- ### vim:fileencoding= ISO-8859-1
- ''')
- self.failUnlessEqual(e, None)
-
- def testUTF8(self):
- e = guess_encoding('\xef\xbb\xbf any UTF-8 data')
- self.failUnlessEqual(e, 'UTF-8')
- e = guess_encoding(' any UTF-8 data \xef\xbb\xbf')
- self.failUnlessEqual(e, None)
-
-if __name__ == '__main__':
- unittest_main()
diff --git a/test/unittest_lint.py b/test/unittest_lint.py
index 66641f1..b59b640 100644
--- a/test/unittest_lint.py
+++ b/test/unittest_lint.py
@@ -134,10 +134,11 @@ class PyLinterTC(TestCase):
linter.open()
filepath = join(INPUTDIR, 'func_block_disable_msg.py')
linter.set_current_module('func_block_disable_msg')
- linter.process_module(open(filepath))
+ astng = linter.get_astng(filepath, 'func_block_disable_msg')
+ linter.process_module(astng)
orig_state = linter._module_msgs_state.copy()
linter._module_msgs_state = {}
- linter.collect_block_lines(linter.get_astng(filepath, 'func_block_disable_msg'), orig_state)
+ linter.collect_block_lines(astng, orig_state)
# global (module level)
self.assert_(linter.is_message_enabled('W0613'))
self.assert_(linter.is_message_enabled('E1101'))