4 files changed, 17 insertions, 65 deletions
diff --git a/checkers/__init__.py b/checkers/__init__.py
index 0e0a2f3..e9d7e21 100644
--- a/checkers/__init__.py
+++ b/checkers/__init__.py
@@ -107,13 +107,15 @@ class BaseChecker(OptionsProviderMixIn, ASTWalker):
 class BaseRawChecker(BaseChecker):
     """base class for raw checkers"""
 
-    def process_module(self, stream):
+    def process_module(self, node):
         """process a module
 
         the module's content is accessible via the stream object
 
         stream must implement the readline method
         """
+        stream = node.file_stream
+        stream.seek(0)
         self.process_tokens(tokenize.generate_tokens(stream.readline))
 
     def process_tokens(self, tokens):
diff --git a/checkers/format.py b/checkers/format.py
index 8c874fa..001ee5b 100644
--- a/checkers/format.py
+++ b/checkers/format.py
@@ -31,7 +31,6 @@ from logilab.astng import nodes
 
 from pylint.interfaces import IRawChecker, IASTNGChecker
 from pylint.checkers import BaseRawChecker
-from pylint.checkers.misc import guess_encoding, is_ascii
 from pylint.checkers.utils import check_messages
 
 MSGS = {
@@ -180,22 +179,17 @@ class FormatChecker(BaseRawChecker):
         self._lines = None
         self._visited_lines = None
 
-    def process_module(self, stream):
+    def process_module(self, node):
         """extracts encoding from the stream and decodes each line, so that
         international text's length is properly calculated.
         """
-        line_reader = stream.readline
-        if sys.version_info < (3, 0):
-            data = stream.read()
-            if not is_ascii(data)[0]:
-                encoding = guess_encoding(data)
-                if encoding is not None:
-                    line_reader = lambda: stream.readline().decode(encoding,
-                                                                   'replace')
-            del data
-
+        stream = node.file_stream
         stream.seek(0)
-        self.process_tokens(tokenize.generate_tokens(line_reader))
+        readline = stream.readline
+        if sys.version_info < (3, 0):
+            if node.file_encoding is not None:
+                readline = lambda: stream.readline().decode(node.file_encoding, 'replace')
+        self.process_tokens(tokenize.generate_tokens(readline))
 
     def new_line(self, tok_type, line, line_num, junk):
         """a new line has been encountered, process it if necessary"""
diff --git a/checkers/misc.py b/checkers/misc.py
index 83e337c..8f6ad2d 100644
--- a/checkers/misc.py
+++ b/checkers/misc.py
@@ -17,46 +17,13 @@
 Check source code is ascii only or has an encoding declaration (PEP 263)
 """
 
-import re
+import re, sys
 
 from pylint.interfaces import IRawChecker
 from pylint.checkers import BaseChecker
 
-def is_ascii(string):
-    """return true if non ascii characters are detected in the given string
-    and line number where non-ascii has been encountered.
-    """
-    for i, line in enumerate(string.splitlines()):
-        if line and max([ord(char) for char in line]) >= 128:
-            return False, i + 1
-    return True, 0
-
-# regexp matching both emacs and vim declaration
-ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
-
-def guess_encoding(string):
-    """try to guess encoding from a python file as string
-    return None if not found
-    """
-    assert isinstance(string, str), type(string)
-    # check for UTF-8 byte-order mark
-    if string.startswith('\xef\xbb\xbf'):
-        return 'UTF-8'
-    first_lines = string.split('\n', 2)[:2]
-    for line in first_lines:
-        # check for emacs / vim encoding declaration
-        match = ENCODING_RGX.match(line)
-        if match is not None:
-            return match.group(1)
-
 
 MSGS = {
-    'E0501': ('Non ascii characters found but no encoding specified (PEP 263)',
-              'Used when some non ascii characters are detected but now \
-              encoding is specified, as explicited in the PEP 263.'),
-    'E0502': ('Wrong encoding specified (%s)',
-              'Used when a known encoding is specified but the file doesn\'t \
-              seem to be actually in this encoding.'),
     'W0511': ('%s',
               'Used when a warning note as FIXME or XXX is detected.'),
     }
@@ -83,25 +50,13 @@ separated by a comma.'
     def __init__(self, linter=None):
         BaseChecker.__init__(self, linter)
 
-    def process_module(self, stream):
+    def process_module(self, node):
         """inspect the source file to found encoding problem or fixmes like
         notes
         """
-        # source encoding
-        data = stream.read()
-        ascii, lineno = is_ascii(data)
-        if not ascii:
-            encoding = guess_encoding(data)
-            if encoding is None:
-                self.add_message('E0501', line=lineno)
-            else:
-                try:
-                    unicode(data, encoding)
-                except UnicodeError:
-                    self.add_message('E0502', args=encoding, line=1)
-        del data
-        # warning notes in the code
+        stream = node.file_stream
         stream.seek(0)
+        # warning notes in the code
         notes = []
         for note in self.config.notes:
             notes.append(re.compile(note))
diff --git a/checkers/similar.py b/checkers/similar.py
index 6a62b8a..d8651b5 100644
--- a/checkers/similar.py
+++ b/checkers/similar.py
@@ -39,6 +39,7 @@ class Similar:
 
     def append_stream(self, streamid, stream):
         """append a file to search for similarities"""
+        stream.seek(0)
         self.linesets.append(LineSet(streamid,
                                      stream.readlines(),
                                      self.ignore_comments,
@@ -264,14 +265,14 @@ class SimilarChecker(BaseChecker, Similar):
         self.stats = self.linter.add_stats(nb_duplicated_lines=0,
                                            percent_duplicated_lines=0)
 
-    def process_module(self, stream):
+    def process_module(self, node):
         """process a module
 
         the module's content is accessible via the stream object
 
         stream must implement the readlines method
         """
-        self.append_stream(self.linter.current_name, stream)
+        self.append_stream(self.linter.current_name, node.file_stream)
 
     def close(self):
         """compute and display similarities on closing (i.e. end of parsing)"""