diff options
| author | shimizukawa <shimizukawa@gmail.com> | 2013-02-07 03:34:51 +0000 |
|---|---|---|
| committer | shimizukawa <shimizukawa@gmail.com> | 2013-02-07 03:34:51 +0000 |
| commit | cc7e4963ee8022df2f69898c1c8db276387720cf (patch) | |
| tree | f94e403096a46035c87fd2827a0d2c16a6e564a0 /sphinx | |
| parent | 9bd8c2371bf682cb420d8fa0435e77e641b3530b (diff) | |
| download | sphinx-cc7e4963ee8022df2f69898c1c8db276387720cf.tar.gz | |
Fix text builder did not respect wide/fullwidth characters for textwrap.
Diffstat (limited to 'sphinx')
| -rw-r--r-- | sphinx/writers/text.py | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py index f42d637a..1f90497e 100644 --- a/sphinx/writers/text.py +++ b/sphinx/writers/text.py @@ -11,6 +11,7 @@ import os import re import textwrap +from itertools import groupby from docutils import nodes, writers from docutils.utils import column_width @@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper): r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Original _wrap_chunks use len() to calculate width. + This method respect to wide/fullwidth characters for width adjustment. + """ + drop_whitespace = getattr(self, 'drop_whitespace', True) #py25 compat + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + + chunks.reverse() + + while chunks: + cur_line = [] + cur_len = 0 + + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + width = self.width - column_width(indent) + + if drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = column_width(chunks[-1]) + + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + else: + break + + if chunks and column_width(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + + if drop_whitespace and cur_line and cur_line[-1].strip() == '': + del cur_line[-1] + + if cur_line: + lines.append(indent + ''.join(cur_line)) + + return lines + + def _break_word(self, word, space_left): + """_break_word(word : string, space_left : int) -> (string, string) + + Break line by unicode width instead of len(word). + """ + total = 0 + for i,c in enumerate(word): + total += column_width(c) + if total > space_left: + return word[:i-1], word[i-1:] + return word, '' + + def _split(self, text): + """_split(text : string) -> [string] + + Override original method that only split by 'wordsep_re'. + This '_split' split wide-characters into chunk by one character. + """ + split = lambda t: textwrap.TextWrapper._split(self, t) + chunks = [] + for chunk in split(text): + for w, g in groupby(chunk, column_width): + if w == 1: + chunks.extend(split(''.join(g))) + else: + chunks.extend(list(g)) + return chunks + + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): + """_handle_long_word(chunks : [string], + cur_line : [string], + cur_len : int, width : int) + + Override original method for using self._break_word() instead of slice. + """ + space_left = max(width - cur_len, 1) + if self.break_long_words: + l, r = self._break_word(reversed_chunks[-1], space_left) + cur_line.append(l) + reversed_chunks[-1] = r + + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + MAXWIDTH = 70 STDINDENT = 3 |
