diff options
author | Bram Moolenaar <Bram@vim.org> | 2020-06-04 18:22:13 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2020-06-04 18:22:13 +0200 |
commit | e52702f00322c8a8861efd0bd6a3775e685e5685 (patch) | |
tree | ffd498e52ee2b139f3a145147aca02afc5013d63 /src/textformat.c | |
parent | 9155825b2428ef6bf654204a534b033a6879c90d (diff) | |
download | vim-git-e52702f00322c8a8861efd0bd6a3775e685e5685.tar.gz |
patch 8.2.0901: formatting CJK text isn't optimalv8.2.0901
Problem: Formatting CJK text isn't optimal.
Solution: Properly break CJK lines. (closes #3875)
Diffstat (limited to 'src/textformat.c')
-rw-r--r-- | src/textformat.c | 76 |
1 files changed, 69 insertions, 7 deletions
diff --git a/src/textformat.c b/src/textformat.c index c50d70f7c..373ab5969 100644 --- a/src/textformat.c +++ b/src/textformat.c @@ -45,10 +45,12 @@ internal_format( int c) // character to be inserted (can be NUL) { int cc; + int skip_pos; int save_char = NUL; int haveto_redraw = FALSE; int fo_ins_blank = has_format_option(FO_INS_BLANK); int fo_multibyte = has_format_option(FO_MBYTE_BREAK); + int fo_rigor_tw = has_format_option(FO_RIGOROUS_TW); int fo_white_par = has_format_option(FO_WHITE_PAR); int first_line = TRUE; colnr_T leader_len; @@ -125,6 +127,7 @@ internal_format( curwin->w_cursor.col = startcol; foundcol = 0; + skip_pos = 0; // Find position to break at. // Stop at first entered white when 'formatoptions' has 'v' @@ -189,8 +192,11 @@ internal_format( if (curwin->w_cursor.col <= (colnr_T)wantcol) break; } - else if (cc >= 0x100 && fo_multibyte) + else if ((cc >= 0x100 || !utf_allow_break_before(cc)) && fo_multibyte) { + int ncc; + int allow_break; + // Break after or before a multi-byte character. if (curwin->w_cursor.col != startcol) { @@ -199,8 +205,14 @@ internal_format( break; col = curwin->w_cursor.col; inc_cursor(); - // Don't change end_foundcol if already set. - if (foundcol != curwin->w_cursor.col) + ncc = gchar_cursor(); + + allow_break = + (enc_utf8 && utf_allow_break(cc, ncc)) + || enc_dbcs; + + // If we have already checked this position, skip! + if (curwin->w_cursor.col != skip_pos && allow_break) { foundcol = curwin->w_cursor.col; end_foundcol = foundcol; @@ -213,6 +225,7 @@ internal_format( if (curwin->w_cursor.col == 0) break; + ncc = cc; col = curwin->w_cursor.col; dec_cursor(); @@ -220,16 +233,65 @@ internal_format( if (WHITECHAR(cc)) continue; // break with space - // Don't break until after the comment leader + // Don't break until after the comment leader. if (curwin->w_cursor.col < leader_len) break; curwin->w_cursor.col = col; + skip_pos = curwin->w_cursor.col; - foundcol = curwin->w_cursor.col; - end_foundcol = foundcol; + allow_break = + (enc_utf8 && utf_allow_break(cc, ncc)) + || enc_dbcs; + + // Must handle this to respect line break prohibition. + if (allow_break) + { + foundcol = curwin->w_cursor.col; + end_foundcol = foundcol; + } if (curwin->w_cursor.col <= (colnr_T)wantcol) - break; + { + int ncc_allow_break = + (enc_utf8 && utf_allow_break_before(ncc)) || enc_dbcs; + + if (allow_break) + break; + if (!ncc_allow_break && !fo_rigor_tw) + { + // Enable at most 1 punct hang outside of textwidth. + if (curwin->w_cursor.col == startcol) + { + // We are inserting a non-breakable char, postpone + // line break check to next insert. + end_foundcol = foundcol = 0; + break; + } + + // Neither cc nor ncc is NUL if we are here, so + // it's safe to inc_cursor. + col = curwin->w_cursor.col; + + inc_cursor(); + cc = ncc; + ncc = gchar_cursor(); + // handle insert + ncc = (ncc != NUL) ? ncc : c; + + allow_break = + (enc_utf8 && utf_allow_break(cc, ncc)) + || enc_dbcs; + + if (allow_break) + { + // Break only when we are not at end of line. + end_foundcol = foundcol = + ncc == NUL? 0 : curwin->w_cursor.col; + break; + } + curwin->w_cursor.col = col; + } + } } if (curwin->w_cursor.col == 0) break; |