summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES3
-rw-r--r--sphinx/writers/text.py93
-rw-r--r--tests/test_build_text.py21
3 files changed, 116 insertions, 1 deletions
diff --git a/CHANGES b/CHANGES
index 8538e051..33a57e40 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,8 @@
Release 1.2 (in development)
============================
-* Fix text builder did not respect wide/fullwidth charactors.
+* Fix text builder did not respect wide/fullwidth characters:
+ title underline width, table layout width and text wrap width.
* #1062: sphinx.ext.autodoc use __init__ method signature for class signature.
diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py
index f42d637a..1f90497e 100644
--- a/sphinx/writers/text.py
+++ b/sphinx/writers/text.py
@@ -11,6 +11,7 @@
import os
import re
import textwrap
+from itertools import groupby
from docutils import nodes, writers
from docutils.utils import column_width
@@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper):
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
+ def _wrap_chunks(self, chunks):
+ """_wrap_chunks(chunks : [string]) -> [string]
+
+ Original _wrap_chunks use len() to calculate width.
+ This method respect to wide/fullwidth characters for width adjustment.
+ """
+ drop_whitespace = getattr(self, 'drop_whitespace', True) #py25 compat
+ lines = []
+ if self.width <= 0:
+ raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+ chunks.reverse()
+
+ while chunks:
+ cur_line = []
+ cur_len = 0
+
+ if lines:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+
+ width = self.width - column_width(indent)
+
+ if drop_whitespace and chunks[-1].strip() == '' and lines:
+ del chunks[-1]
+
+ while chunks:
+ l = column_width(chunks[-1])
+
+ if cur_len + l <= width:
+ cur_line.append(chunks.pop())
+ cur_len += l
+
+ else:
+ break
+
+ if chunks and column_width(chunks[-1]) > width:
+ self._handle_long_word(chunks, cur_line, cur_len, width)
+
+ if drop_whitespace and cur_line and cur_line[-1].strip() == '':
+ del cur_line[-1]
+
+ if cur_line:
+ lines.append(indent + ''.join(cur_line))
+
+ return lines
+
+ def _break_word(self, word, space_left):
+ """_break_word(word : string, space_left : int) -> (string, string)
+
+ Break line by unicode width instead of len(word).
+ """
+ total = 0
+ for i,c in enumerate(word):
+ total += column_width(c)
+ if total > space_left:
+ return word[:i-1], word[i-1:]
+ return word, ''
+
+ def _split(self, text):
+ """_split(text : string) -> [string]
+
+ Override original method that only split by 'wordsep_re'.
+ This '_split' split wide-characters into chunk by one character.
+ """
+ split = lambda t: textwrap.TextWrapper._split(self, t)
+ chunks = []
+ for chunk in split(text):
+ for w, g in groupby(chunk, column_width):
+ if w == 1:
+ chunks.extend(split(''.join(g)))
+ else:
+ chunks.extend(list(g))
+ return chunks
+
+ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+ """_handle_long_word(chunks : [string],
+ cur_line : [string],
+ cur_len : int, width : int)
+
+ Override original method for using self._break_word() instead of slice.
+ """
+ space_left = max(width - cur_len, 1)
+ if self.break_long_words:
+ l, r = self._break_word(reversed_chunks[-1], space_left)
+ cur_line.append(l)
+ reversed_chunks[-1] = r
+
+ elif not cur_line:
+ cur_line.append(reversed_chunks.pop())
+
MAXWIDTH = 70
STDINDENT = 3
diff --git a/tests/test_build_text.py b/tests/test_build_text.py
index 63df8ee0..79edc623 100644
--- a/tests/test_build_text.py
+++ b/tests/test_build_text.py
@@ -12,6 +12,7 @@
from textwrap import dedent
from docutils.utils import column_width
+from sphinx.writers.text import MAXWIDTH
from util import *
@@ -63,3 +64,23 @@ def test_multibyte_table(app):
lines = [line.strip() for line in result.splitlines() if line.strip()]
line_widths = [column_width(line) for line in lines]
assert len(set(line_widths)) == 1 # same widths
+
+
+@with_text_app()
+def test_multibyte_maxwidth(app):
+ sb_text = u'abc' #length=3
+ mb_text = u'\u65e5\u672c\u8a9e' #length=3
+
+ sb_line = ' '.join([sb_text] * int(MAXWIDTH / 3))
+ mb_line = ' '.join([mb_text] * int(MAXWIDTH / 3))
+ mix_line = ' '.join([sb_text, mb_text] * int(MAXWIDTH / 6))
+
+ contents = u'\n\n'.join((sb_line, mb_line, mix_line))
+
+ (app.srcdir / 'contents.rst').write_text(contents, encoding='utf-8')
+ app.builder.build_all()
+ result = (app.outdir / 'contents.txt').text(encoding='utf-8')
+
+ lines = [line.strip() for line in result.splitlines() if line.strip()]
+ line_widths = [column_width(line) for line in lines]
+ assert max(line_widths) < MAXWIDTH