Fix text builder did not respect wide/fullwidth characters for textwrap.

author: shimizukawa <shimizukawa@gmail.com> 2013-02-07 03:34:51 +0000
committer: shimizukawa <shimizukawa@gmail.com> 2013-02-07 03:34:51 +0000
commit: cc7e4963ee8022df2f69898c1c8db276387720cf (patch)
tree: f94e403096a46035c87fd2827a0d2c16a6e564a0 /sphinx
parent: 9bd8c2371bf682cb420d8fa0435e77e641b3530b (diff)
download: sphinx-cc7e4963ee8022df2f69898c1c8db276387720cf.tar.gz
1 files changed, 93 insertions, 0 deletions
diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py
index f42d637a..1f90497e 100644
--- a/sphinx/writers/text.py
+++ b/sphinx/writers/text.py
@@ -11,6 +11,7 @@
 import os
 import re
 import textwrap
+from itertools import groupby
 
 from docutils import nodes, writers
 from docutils.utils import column_width
@@ -28,6 +29,98 @@ class TextWrapper(textwrap.TextWrapper):
         r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
 
+    def _wrap_chunks(self, chunks):
+        """_wrap_chunks(chunks : [string]) -> [string]
+
+        Original _wrap_chunks use len() to calculate width.
+        This method respect to wide/fullwidth characters for width adjustment.
+        """
+        drop_whitespace = getattr(self, 'drop_whitespace', True)  #py25 compat
+        lines = []
+        if self.width <= 0:
+            raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+        chunks.reverse()
+
+        while chunks:
+            cur_line = []
+            cur_len = 0
+
+            if lines:
+                indent = self.subsequent_indent
+            else:
+                indent = self.initial_indent
+
+            width = self.width - column_width(indent)
+
+            if drop_whitespace and chunks[-1].strip() == '' and lines:
+                del chunks[-1]
+
+            while chunks:
+                l = column_width(chunks[-1])
+
+                if cur_len + l <= width:
+                    cur_line.append(chunks.pop())
+                    cur_len += l
+
+                else:
+                    break
+
+            if chunks and column_width(chunks[-1]) > width:
+                self._handle_long_word(chunks, cur_line, cur_len, width)
+
+            if drop_whitespace and cur_line and cur_line[-1].strip() == '':
+                del cur_line[-1]
+
+            if cur_line:
+                lines.append(indent + ''.join(cur_line))
+
+        return lines
+
+    def _break_word(self, word, space_left):
+        """_break_word(word : string, space_left : int) -> (string, string)
+
+        Break line by unicode width instead of len(word).
+        """
+        total = 0
+        for i,c in enumerate(word):
+            total += column_width(c)
+            if total > space_left:
+                return word[:i-1], word[i-1:]
+        return word, ''
+
+    def _split(self, text):
+        """_split(text : string) -> [string]
+
+        Override original method that only split by 'wordsep_re'.
+        This '_split' split wide-characters into chunk by one character.
+        """
+        split = lambda t: textwrap.TextWrapper._split(self, t)
+        chunks = []
+        for chunk in split(text):
+            for w, g in groupby(chunk, column_width):
+                if w == 1:
+                    chunks.extend(split(''.join(g)))
+                else:
+                    chunks.extend(list(g))
+        return chunks
+
+    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+        """_handle_long_word(chunks : [string],
+                             cur_line : [string],
+                             cur_len : int, width : int)
+
+        Override original method for using self._break_word() instead of slice.
+        """
+        space_left = max(width - cur_len, 1)
+        if self.break_long_words:
+            l, r = self._break_word(reversed_chunks[-1], space_left)
+            cur_line.append(l)
+            reversed_chunks[-1] = r
+
+        elif not cur_line:
+            cur_line.append(reversed_chunks.pop())
+
 
 MAXWIDTH = 70
 STDINDENT = 3
author	shimizukawa <shimizukawa@gmail.com>	2013-02-07 03:34:51 +0000
committer	shimizukawa <shimizukawa@gmail.com>	2013-02-07 03:34:51 +0000
commit	cc7e4963ee8022df2f69898c1c8db276387720cf (patch)
tree	f94e403096a46035c87fd2827a0d2c16a6e564a0 /sphinx
parent	9bd8c2371bf682cb420d8fa0435e77e641b3530b (diff)
download	sphinx-cc7e4963ee8022df2f69898c1c8db276387720cf.tar.gz