Merge. Allow long lines in docstrings and comments.

Allow long lines in multiline strings and comments if they cannot be wrapped; issue #224 Optionally disable physical line checks inside multiline strings, using '# noqa'; issue #242
author: Florent Xicluna <florent.xicluna@gmail.com> 2014-03-25 19:35:37 +0100
committer: Florent Xicluna <florent.xicluna@gmail.com> 2014-03-25 19:35:37 +0100
commit: fed43c5b560c90995e2ea3f742f2bf6b6e497657 (patch)
tree: 9892f524f6a57381ee08004e4496babdc75ec26e
parent: 5685c9799aa72a461d0b41a5ade2ceeb0341efeb (diff)
parent: fd5cc44df54b0aeff605ccc62ed1e69fdb52c28b (diff)
download: pep8-fed43c5b560c90995e2ea3f742f2bf6b6e497657.tar.gz
8 files changed, 148 insertions, 22 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index fc79c00..82124be 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -15,6 +15,12 @@ Changes:
 * Report E713 and E714 when operators ``not in`` and ``is not`` are
   recommended. (Issue #236)
 
+* Allow long lines in multiline strings and comments if they cannot
+  be wrapped. (Issue #224).
+
+* Optionally disable physical line checks inside multiline strings,
+  using ``# noqa``. (Issue #242)
+
 * Change text for E121 to report "continuation line under-indented
   for hanging indent" instead of indentation not being a
   multiple of 4.
diff --git a/pep8.py b/pep8.py
index fa91141..91e3cca 100755
--- a/pep8.py
+++ b/pep8.py
@@ -201,7 +201,7 @@ def missing_newline(physical_line):
         return len(physical_line), "W292 no newline at end of file"
 
 
-def maximum_line_length(physical_line, max_line_length):
+def maximum_line_length(physical_line, max_line_length, multiline):
     """
     Limit all lines to a maximum of 79 characters.
 
@@ -217,6 +217,13 @@ def maximum_line_length(physical_line, max_line_length):
     line = physical_line.rstrip()
     length = len(line)
     if length > max_line_length and not noqa(line):
+        # Special case for long URLs in multi-line docstrings or comments,
+        # but still report the error when the 72 first chars are whitespaces.
+        chunks = line.split()
+        if ((len(chunks) == 1 and multiline) or
+            (len(chunks) == 2 and chunks[0] == '#')) and \
+                len(line) - len(chunks[-1]) < max_line_length - 7:
+            return
         if hasattr(line, 'decode'):   # Python 2
             # The line could contain multi-byte characters
             try:
@@ -1251,6 +1258,7 @@ class Checker(object):
         self._logical_checks = options.logical_checks
         self._ast_checks = options.ast_checks
         self.max_line_length = options.max_line_length
+        self.multiline = False  # in a multiline string?
         self.hang_closing = options.hang_closing
         self.verbose = options.verbose
         self.filename = filename
@@ -1299,16 +1307,9 @@ class Checker(object):
         self.line_number += 1
         if self.line_number > len(self.lines):
             return ''
-        return self.lines[self.line_number - 1]
-
-    def readline_check_physical(self):
-        """
-        Check and return the next physical line. This method can be
-        used to feed tokenize.generate_tokens.
-        """
-        line = self.readline()
-        if line:
-            self.check_physical(line)
+        line = self.lines[self.line_number - 1]
+        if self.indent_char is None and line[:1] in WHITESPACE:
+            self.indent_char = line[0]
         return line
 
     def run_check(self, check, argument_names):
@@ -1325,8 +1326,6 @@ class Checker(object):
         Run all physical checks on a raw input line.
         """
         self.physical_line = line
-        if self.indent_char is None and line[:1] in WHITESPACE:
-            self.indent_char = line[0]
         for name, check, argument_names in self._physical_checks:
             result = self.run_check(check, argument_names)
             if result is not None:
@@ -1421,13 +1420,46 @@ class Checker(object):
     def generate_tokens(self):
         if self._io_error:
             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
-        tokengen = tokenize.generate_tokens(self.readline_check_physical)
+        tokengen = tokenize.generate_tokens(self.readline)
         try:
             for token in tokengen:
+                self.maybe_check_physical(token)
                 yield token
         except (SyntaxError, tokenize.TokenError):
             self.report_invalid_syntax()
 
+    def maybe_check_physical(self, token):
+        """
+        If appropriate (based on token), check current physical line(s).
+        """
+        # Called after every token, but act only on end of line.
+        if token[0] in (tokenize.NEWLINE, tokenize.NL):
+            # Obviously, a newline token ends a single physical line.
+            self.check_physical(token[4])
+        elif token[0] == tokenize.STRING and '\n' in token[1]:
+            # Less obviously, a string that contains newlines is a
+            # multiline string, either triple-quoted or with internal
+            # newlines backslash-escaped. Check every physical line in the
+            # string *except* for the last one: its newline is outside of
+            # the multiline string, so we consider it a regular physical
+            # line, and will check it like any other physical line.
+            #
+            # Subtleties:
+            # - we don't *completely* ignore the last line; if it contains
+            #   the magical "# noqa" comment, we disable all physical
+            #   checks for the entire multiline string
+            # - have to wind self.line_number back because initially it
+            #   points to the last line of the string, and we want
+            #   check_physical() to give accurate feedback
+            if noqa(token[4]):
+                return
+            self.multiline = True
+            self.line_number = token[2][0]
+            for line in token[1].split('\n')[:-1]:
+                self.check_physical(line + '\n')
+                self.line_number += 1
+            self.multiline = False
+
     def check_all(self, expected=None, line_offset=0):
         """
         Run all checks on the input file.
diff --git a/testsuite/E50.py b/testsuite/E50.py
index 4cc4383..31ad6b9 100644
--- a/testsuite/E50.py
+++ b/testsuite/E50.py
@@ -45,3 +45,60 @@ ddd = \
 ('''
     ''' + ' \
 ')
+#: E501 E225 E226
+very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines)
+#
+#: E501
+'''multiline string
+with a long long long long long long long long long long long long long long long long line
+'''
+#: E501
+'''same thing, but this time without a terminal newline in the string
+long long long long long long long long long long long long long long long long line'''
+#
+# issue 224 (unavoidable long lines in docstrings)
+#: Okay
+"""
+I'm some great documentation.  Because I'm some great documentation, I'm
+going to give you a reference to some valuable information about some API
+that I'm calling:
+
+    http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
+"""
+#: E501
+"""
+longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces"""
+#: Okay
+"""
+This
+                                                                       almost_empty_line
+"""
+#: E501
+"""
+This
+                                                                        almost_empty_line
+"""
+#: E501
+# A basic comment
+# with a long long long long long long long long long long long long long long long long line
+
+#
+#: Okay
+# I'm some great comment.  Because I'm so great, I'm going to give you a
+# reference to some valuable information about some API that I'm calling:
+#
+#     http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
+
+import this
+
+# longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces
+
+#
+#: Okay
+# This
+#                                                                      almost_empty_line
+
+#
+#: E501
+# This
+#                                                                       almost_empty_line
diff --git a/testsuite/E90.py b/testsuite/E90.py
index 2d0b2dc..1db3d0e 100644
--- a/testsuite/E90.py
+++ b/testsuite/E90.py
@@ -2,7 +2,7 @@
 }
 #: E901
 = [x
-#: E901 E101 W191 W191
+#: E901 E101 W191
 while True:
     try:
 	    pass
diff --git a/testsuite/W19.py b/testsuite/W19.py
index 3e303d9..edbb1f0 100644
--- a/testsuite/W19.py
+++ b/testsuite/W19.py
@@ -86,7 +86,7 @@ if (a == 2 or
     b == """abc def ghi
 jkl mno"""):
 	return True
-#: E101 W191 W191
+#: E101 W191
 if length > options.max_line_length:
 	return options.max_line_length, \
 	    "E501 line too long (%d characters)" % length
@@ -97,6 +97,28 @@ if length > options.max_line_length:
 if os.path.exists(os.path.join(path, PEP8_BIN)):
 	cmd = ([os.path.join(path, PEP8_BIN)] +
 	       self._pep8_options(targetfile))
+#: W191
+'''
+	multiline string with tab in it'''
+#: E101 W191
+'''multiline string
+	with tabs
+   and spaces
+'''
+#: Okay
+'''sometimes, you just need to go nuts in a multiline string
+	and allow all sorts of crap
+  like mixed tabs and spaces
+      
+or trailing whitespace  
+or long long long long long long long long long long long long long long long long long lines
+'''  # nopep8
+#: Okay
+'''this one
+	will get no warning
+even though the noqa comment is not immediately after the string
+''' + foo  # noqa
+#
 #: E101 W191
 if foo is None and bar is "frop" and \
         blah == 'yeah':
diff --git a/testsuite/W29.py b/testsuite/W29.py
index 2578f4f..42802ca 100644
--- a/testsuite/W29.py
+++ b/testsuite/W29.py
@@ -6,5 +6,8 @@ print
 class Foo(object):
     
     bang = 12
+#: W291
+'''multiline
+string with trailing whitespace'''   
 #: W292
 # This line doesn't have a linefeed
 \ No newline at end of file
diff --git a/testsuite/W39.py b/testsuite/W39.py
index 4fe4fc4..554814c 100644
--- a/testsuite/W39.py
+++ b/testsuite/W39.py
@@ -1,3 +1,9 @@
 #: W391
 # The next line is blank
 
+#: Okay
+'''there is nothing wrong
+with a multiline string at EOF
+
+that happens to have a blank line in it
+'''
diff --git a/testsuite/utf-8.py b/testsuite/utf-8.py
index 7a3d249..2cee579 100644
--- a/testsuite/utf-8.py
+++ b/testsuite/utf-8.py
@@ -31,11 +31,11 @@ class Rectangle(Blob):
 # 78 narrow chars (Na) + 1 wide char (W)
 # 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情
 
-# 2 narrow chars (Na) + 40 wide chars (W)
-# 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+# 3 narrow chars (Na) + 40 wide chars (W)
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
 
-# 2 narrow chars (Na) + 77 wide chars (W)
-# 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+# 3 narrow chars (Na) + 76 wide chars (W)
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
 
 #
 #: E501
@@ -47,6 +47,6 @@ class Rectangle(Blob):
 # 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情情
 #
 #: E501
-# 2 narrow chars (Na) + 78 wide chars (W)
-# 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
+# 3 narrow chars (Na) + 77 wide chars (W)
+# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情
 #
author	Florent Xicluna <florent.xicluna@gmail.com>	2014-03-25 19:35:37 +0100
committer	Florent Xicluna <florent.xicluna@gmail.com>	2014-03-25 19:35:37 +0100
commit	fed43c5b560c90995e2ea3f742f2bf6b6e497657 (patch)
tree	9892f524f6a57381ee08004e4496babdc75ec26e
parent	5685c9799aa72a461d0b41a5ade2ceeb0341efeb (diff)
parent	fd5cc44df54b0aeff605ccc62ed1e69fdb52c28b (diff)
download	pep8-fed43c5b560c90995e2ea3f742f2bf6b6e497657.tar.gz