From 57da69816872d53038e8a7e8dd4dc39a31192f0d Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Fri, 13 Sep 2019 22:30:11 +0200 Subject: patch 8.1.2027: MS-Windows: problem with ambiwidth characters Problem: MS-Windows: problem with ambiwidth characters. Solution: handle ambiguous width characters in ConPTY on Windows 10 (1903). (Nobuhiro Takasaki, closes #4411) --- src/libvterm/src/parser.c | 10 ++++++++++ src/libvterm/src/state.c | 37 +++++++++++++++++++++++++++++++++++++ src/libvterm/src/termscreen.c | 27 ++++++++++++++++++++++----- src/libvterm/src/unicode.c | 30 +++++++++++++++++++++++++++--- src/libvterm/src/vterm_internal.h | 4 ++++ 5 files changed, 100 insertions(+), 8 deletions(-) (limited to 'src/libvterm') diff --git a/src/libvterm/src/parser.c b/src/libvterm/src/parser.c index 77a2c17e5..7d6d2175d 100644 --- a/src/libvterm/src/parser.c +++ b/src/libvterm/src/parser.c @@ -127,6 +127,9 @@ size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len) size_t pos = 0; const char *string_start = NULL; // init to avoid gcc warning + vt->in_backspace = 0; // Count down with BS key and activate when + // it reaches 1 + switch(vt->parser.state) { case NORMAL: case CSI_LEADER: @@ -172,6 +175,13 @@ size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len) // fallthrough } else if(c < 0x20) { // other C0 + if(vterm_get_special_pty_type() == 2) { + if(c == 0x08) // BS + // Set the trick for BS output after a sequence, to delay backspace + // activation + if(pos + 2 < len && bytes[pos + 1] == 0x20 && bytes[pos + 2] == 0x08) + vt->in_backspace = 2; // Trigger when count down to 1 + } if(vt->parser.state >= STRING) more_string(vt, string_start, bytes + pos - string_start); do_control(vt, c); diff --git a/src/libvterm/src/state.c b/src/libvterm/src/state.c index 22168d6df..1f7ac02de 100644 --- a/src/libvterm/src/state.c +++ b/src/libvterm/src/state.c @@ -336,6 +336,11 @@ static int on_text(const char bytes[], size_t len, void *user) for( ; i < glyph_ends; i++) { int this_width; + if(vterm_get_special_pty_type() == 2) { + state->vt->in_backspace -= (state->vt->in_backspace > 0) ? 1 : 0; + if(state->vt->in_backspace == 1) + codepoints[i] = 0; // codepoints under this condition must be 0 + } chars[i - glyph_starts] = codepoints[i]; this_width = vterm_unicode_width(codepoints[i]); #ifdef DEBUG @@ -425,6 +430,12 @@ static int on_control(unsigned char control, void *user) VTermPos oldpos = state->pos; + VTermScreenCell cell; + + // Preparing to see the leading byte + VTermPos leadpos = state->pos; + leadpos.col -= (leadpos.col >= 2 ? 2 : 0); + switch(control) { case 0x07: // BEL - ECMA-48 8.3.3 if(state->callbacks && state->callbacks->bell) @@ -434,6 +445,12 @@ static int on_control(unsigned char control, void *user) case 0x08: // BS - ECMA-48 8.3.5 if(state->pos.col > 0) state->pos.col--; + if(vterm_get_special_pty_type() == 2) { + // In 2 cell letters, go back 2 cells + vterm_screen_get_cell(state->vt->screen, leadpos, &cell); + if(vterm_unicode_width(cell.chars[0]) == 2) + state->pos.col--; + } break; case 0x09: // HT - ECMA-48 8.3.60 @@ -1019,6 +1036,26 @@ static int on_csi(const char *leader, const long args[], int argcount, const cha row = CSI_ARG_OR(args[0], 1); col = argcount < 2 || CSI_ARG_IS_MISSING(args[1]) ? 1 : CSI_ARG(args[1]); // zero-based + if(vterm_get_special_pty_type() == 2) { + // Fix a sequence that is not correct right now + if(state->pos.row == row - 1) { + int cnt, ptr = 0; + for(cnt = 0; cnt < col - 1; ++cnt) { + VTermPos p; + VTermScreenCell c0, c1; + p.row = row - 1; + p.col = ptr; + vterm_screen_get_cell(state->vt->screen, p, &c0); + p.col++; + vterm_screen_get_cell(state->vt->screen, p, &c1); + ptr += (c1.chars[0] == (uint32_t)-1) // double cell? + ? (vterm_unicode_is_ambiguous(c0.chars[0])) // is ambiguous? + ? vterm_unicode_width(0x00a1) : 1 // &ambiwidth + : 1; // not ambiguous + } + col = ptr + 1; + } + } state->pos.row = row-1; state->pos.col = col-1; if(state->mode.origin) { diff --git a/src/libvterm/src/termscreen.c b/src/libvterm/src/termscreen.c index 37215cef0..c33fb59b4 100644 --- a/src/libvterm/src/termscreen.c +++ b/src/libvterm/src/termscreen.c @@ -770,11 +770,28 @@ int vterm_screen_get_cell(const VTermScreen *screen, VTermPos pos, VTermScreenCe cell->fg = intcell->pen.fg; cell->bg = intcell->pen.bg; - if(pos.col < (screen->cols - 1) && - getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) - cell->width = 2; - else - cell->width = 1; + if(vterm_get_special_pty_type() == 2) { + /* Get correct cell width from cell information contained in line buffer */ + if(pos.col < (screen->cols - 1) && + getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) { + if(getcell(screen, pos.row, pos.col)->chars[0] == 0x20) { + getcell(screen, pos.row, pos.col)->chars[0] = 0; + cell->width = 2; + } else if(getcell(screen, pos.row, pos.col)->chars[0] == 0) { + getcell(screen, pos.row, pos.col + 1)->chars[0] = 0; + cell->width = 1; + } else { + cell->width = 2; + } + } else + cell->width = 1; + } else { + if(pos.col < (screen->cols - 1) && + getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) + cell->width = 2; + else + cell->width = 1; + } return 1; } diff --git a/src/libvterm/src/unicode.c b/src/libvterm/src/unicode.c index 33e5f41b8..7f93763aa 100644 --- a/src/libvterm/src/unicode.c +++ b/src/libvterm/src/unicode.c @@ -68,12 +68,13 @@ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c */ -#if !defined(IS_COMBINING_FUNCTION) || !defined(WCWIDTH_FUNCTION) struct interval { int first; int last; }; +#if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION) + // sorted list of non-overlapping intervals of non-spacing characters // generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" // Replaced by the combining table from Vim. @@ -359,6 +360,7 @@ static const struct interval combining[] = { {0X1E944, 0X1E94A}, {0XE0100, 0XE01EF} }; +#endif // auxiliary function for binary search in interval table static int bisearch(uint32_t ucs, const struct interval *table, int max) { @@ -379,8 +381,6 @@ static int bisearch(uint32_t ucs, const struct interval *table, int max) { return 0; } -#endif - /* The following two functions define the column width of an ISO 10646 * character as follows: @@ -478,6 +478,7 @@ static int mk_wcswidth(const uint32_t *pwcs, size_t n) */ static int mk_wcwidth_cjk(uint32_t ucs) { +#endif /* sorted list of non-overlapping intervals of East Asian Ambiguous * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ static const struct interval ambiguous[] = { @@ -534,6 +535,7 @@ static int mk_wcwidth_cjk(uint32_t ucs) { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } }; +#if 0 // binary search in table of non-spacing characters if (bisearch(ucs, ambiguous, @@ -557,6 +559,12 @@ static int mk_wcswidth_cjk(const uint32_t *pwcs, size_t n) } #endif +INTERNAL int vterm_unicode_is_ambiguous(uint32_t codepoint) +{ + return (bisearch(codepoint, ambiguous, + sizeof(ambiguous) / sizeof(struct interval) - 1)) ? 1 : 0; +} + #ifdef IS_COMBINING_FUNCTION // Use a provided is_combining() function. int IS_COMBINING_FUNCTION(uint32_t codepoint); @@ -569,6 +577,17 @@ vterm_is_combining(uint32_t codepoint) } #endif +#ifdef GET_SPECIAL_PTY_TYPE_FUNCTION +int GET_SPECIAL_PTY_TYPE_FUNCTION(void); +#else +# define GET_SPECIAL_PTY_TYPE_FUNCTION vterm_get_special_pty_type_placeholder + static int +vterm_get_special_pty_type_placeholder(void) +{ + return 0; +} +#endif + // ################################ // ### The rest added by Paul Evans @@ -581,3 +600,8 @@ INTERNAL int vterm_unicode_is_combining(uint32_t codepoint) { return IS_COMBINING_FUNCTION(codepoint); } + +INTERNAL int vterm_get_special_pty_type(void) +{ + return GET_SPECIAL_PTY_TYPE_FUNCTION(); +} diff --git a/src/libvterm/src/vterm_internal.h b/src/libvterm/src/vterm_internal.h index 3c73b7a50..5b6198bdc 100644 --- a/src/libvterm/src/vterm_internal.h +++ b/src/libvterm/src/vterm_internal.h @@ -212,6 +212,8 @@ struct VTerm VTermState *state; VTermScreen *screen; + + int in_backspace; }; struct VTermEncoding { @@ -259,5 +261,7 @@ VTermEncoding *vterm_lookup_encoding(VTermEncodingType type, char designation); int vterm_unicode_width(uint32_t codepoint); int vterm_unicode_is_combining(uint32_t codepoint); +int vterm_unicode_is_ambiguous(uint32_t codepoint); +int vterm_get_special_pty_type(void); #endif -- cgit v1.2.1