summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Dufresne <jon.dufresne@gmail.com>2017-08-19 10:19:59 -0700
committerJon Dufresne <jon.dufresne@gmail.com>2017-09-10 09:13:45 -0700
commit13d2bd200911850dacde31665235122d85290265 (patch)
tree1765e8f9b4f0efa4601ab8457e187c5e83c3e2c0
parent769ea413d1b998628c60613c2e0c3dfa97b33c30 (diff)
downloadpep8-13d2bd200911850dacde31665235122d85290265.tar.gz
Add W605 warning for invalid escape sequences in string literals
Starting with Python 3.6, invalid escape sequences in string literals are now deprecated. In a future version of Python, invalid escape sequences will be a syntax error. While this deprecation produces a runtime warning, it only appears if warnings are enabled and the first time the Python source is compiled to byte code. By adding a check to pycodestyle, projects can take advantage of static analysis to catch and fix these future syntax errors. For more information on the deprecation, see the Python release notes, https://docs.python.org/3/whatsnew/3.6.html#deprecated-python-behavior > A backslash-character pair that is not a valid escape sequence now > generates a DeprecationWarning. Although this will eventually become a > SyntaxError, that will not be for several Python releases. Fixes #633
-rw-r--r--CHANGES.txt7
-rw-r--r--docs/intro.rst2
-rwxr-xr-xpycodestyle.py51
-rw-r--r--testsuite/E12not.py8
-rw-r--r--testsuite/W60.py16
5 files changed, 80 insertions, 4 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 0977432..0957be8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,13 @@
Changelog
=========
+UNRELEASED
+----------
+
+New checks:
+
+* Add W605 warning for invalid escape sequences in string literals
+
2.3.1 (2017-01-31)
------------------
diff --git a/docs/intro.rst b/docs/intro.rst
index fcdcf72..3035a2f 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -413,6 +413,8 @@ This is the current list of error and warning codes:
+------------+----------------------------------------------------------------------+
| W604 | backticks are deprecated, use 'repr()' |
+------------+----------------------------------------------------------------------+
+| W605 | invalid escape sequence '\x' |
++------------+----------------------------------------------------------------------+
**(*)** In the default configuration, the checks **E121**, **E123**, **E126**,
diff --git a/pycodestyle.py b/pycodestyle.py
index 5b7a39c..d31ac9e 100755
--- a/pycodestyle.py
+++ b/pycodestyle.py
@@ -1388,6 +1388,57 @@ def python_3000_backticks(logical_line):
yield pos, "W604 backticks are deprecated, use 'repr()'"
+@register_check
+def python_3000_invalid_escape_sequence(logical_line, tokens):
+ r"""Invalid escape sequences are deprecated in Python 3.6.
+
+ Okay: regex = r'\.png$'
+ W605: regex = '\.png$'
+ """
+ # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
+ valid = [
+ '\n',
+ '\\',
+ '\'',
+ '"',
+ 'a',
+ 'b',
+ 'f',
+ 'n',
+ 'r',
+ 't',
+ 'v',
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ 'x',
+
+ # Escape sequences only recognized in string literals
+ 'N',
+ 'u',
+ 'U',
+ ]
+
+ for token_type, text, start, end, line in tokens:
+ if token_type == tokenize.STRING:
+ quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
+ # Extract string modifiers (e.g. u or r)
+ quote_pos = text.index(quote)
+ prefix = text[:quote_pos].lower()
+ start = quote_pos + len(quote)
+ string = text[start:-len(quote)]
+
+ if 'r' not in prefix:
+ pos = string.find('\\')
+ while pos >= 0:
+ pos += 1
+ if string[pos] not in valid:
+ yield (
+ pos,
+ "W605 invalid escape sequence '\\%s'" %
+ string[pos],
+ )
+ pos = string.find('\\', pos + 1)
+
+
##############################################################################
# Helper functions
##############################################################################
diff --git a/testsuite/E12not.py b/testsuite/E12not.py
index 18c6a64..6528107 100644
--- a/testsuite/E12not.py
+++ b/testsuite/E12not.py
@@ -358,10 +358,10 @@ def qualify_by_address(self, cr, uid, ids, context=None,
""" This gets called by the web server """
-_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
- '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
- '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
- '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
+_ipv4_re = re.compile(r'^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
fct("""
diff --git a/testsuite/W60.py b/testsuite/W60.py
index 973d22f..cbe267d 100644
--- a/testsuite/W60.py
+++ b/testsuite/W60.py
@@ -13,3 +13,19 @@ if x <> 0:
x = 0
#: W604
val = `1 + 2`
+#: W605
+regex = '\.png$'
+#: W605
+regex = '''
+\.png$
+'''
+#: Okay
+regex = r'\.png$'
+regex = '\\.png$'
+regex = r'''
+\.png$
+'''
+regex = r'''
+\\.png$
+'''
+s = '\\'