From 63674f4b52aa7c2832fec09a026e24cd521e491b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Fri, 20 Apr 2012 14:36:47 +0200
Subject: Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
 first two lines have non-UTF-8 characters without an encoding declaration.

---
 Lib/tokenize.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'Lib/tokenize.py')

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f575e9bc23..f283c6dd7f 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -292,9 +292,12 @@ def detect_encoding(readline):
 
     def find_cookie(line):
         try:
-            line_string = line.decode('ascii')
+            # Decode as UTF-8. Either the line is an encoding declaration,
+            # in which case it should be pure ASCII, or it must be UTF-8
+            # per default encoding.
+            line_string = line.decode('utf-8')
         except UnicodeDecodeError:
-            return None
+            raise SyntaxError("invalid or missing encoding declaration")
 
         matches = cookie_re.findall(line_string)
         if not matches:
-- 
cgit v1.2.1