summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2020-08-28 22:24:57 +0200
committerBram Moolenaar <Bram@vim.org>2020-08-28 22:24:57 +0200
commit4e4473c927167fd24e5c8df90e0e8035080cf2da (patch)
tree07e43fbf7948fd3dbd6878b6c5c29069124a4c43
parent08aac3c6192f0103cb87e280270a32b50e653be1 (diff)
downloadvim-git-4e4473c927167fd24e5c8df90e0e8035080cf2da.tar.gz
patch 8.2.1536: cannot get the class of a character; emoji widths are wrongv8.2.1536
Problem: Cannot get the class of a character; emoji widths are wrong in some environments. Solution: Add charclass(). Update some emoji widths. Add script to check emoji widths.
-rw-r--r--Filelist1
-rw-r--r--runtime/doc/eval.txt13
-rw-r--r--runtime/doc/usr_41.txt1
-rw-r--r--src/evalfunc.c1
-rw-r--r--src/mbyte.c51
-rw-r--r--src/proto/mbyte.pro1
-rw-r--r--src/testdir/emoji_list.vim22
-rw-r--r--src/testdir/test_functions.vim7
-rw-r--r--src/version.c2
9 files changed, 91 insertions, 8 deletions
diff --git a/Filelist b/Filelist
index 42eaa1d3e..e44c1fd3a 100644
--- a/Filelist
+++ b/Filelist
@@ -197,6 +197,7 @@ SRC_ALL = \
src/testdir/samples/*.txt \
src/testdir/samples/test000 \
src/testdir/color_ramp.vim \
+ src/testdir/emoji_list.vim \
src/testdir/silent.wav \
src/testdir/popupbounce.vim \
src/proto.h \
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt
index 129c1e2e2..e0c847303 100644
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -2425,6 +2425,7 @@ ch_status({handle} [, {options}])
String status of channel {handle}
changenr() Number current change number
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
+charclass({string}) Number character class of {string}
chdir({dir}) String change current working directory
cindent({lnum}) Number C indent for line {lnum}
clearmatches([{win}]) none clear all matches
@@ -3520,6 +3521,18 @@ char2nr({expr} [, {utf8}]) *char2nr()*
Can also be used as a |method|: >
GetChar()->char2nr()
+
+charclass({string}) *charclass()*
+ Return the character class of the first character in {string}.
+ The character class is one of:
+ 0 blank
+ 1 punctuation
+ 2 word character
+ 3 emoji
+ other specific Unicode class
+ The class is used in patterns and word motions.
+
+
chdir({dir}) *chdir()*
Change the current working directory to {dir}. The scope of
the directory change depends on the directory of the current
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index bcd58c9fe..10e53baf7 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -600,6 +600,7 @@ String manipulation: *string-functions*
strtrans() translate a string to make it printable
tolower() turn a string to lowercase
toupper() turn a string to uppercase
+ charclass() class of a character
match() position where a pattern matches in a string
matchend() position where a pattern match ends in a string
matchstr() match of a pattern in a string
diff --git a/src/evalfunc.c b/src/evalfunc.c
index 567bbdfaa..62bbb8888 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -564,6 +564,7 @@ static funcentry_T global_functions[] =
{"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)},
{"changenr", 0, 0, 0, ret_number, f_changenr},
{"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr},
+ {"charclass", 1, 1, FEARG_1, ret_number, f_charclass},
{"chdir", 1, 1, FEARG_1, ret_string, f_chdir},
{"cindent", 1, 1, FEARG_1, ret_number, f_cindent},
{"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches},
diff --git a/src/mbyte.c b/src/mbyte.c
index d72fb9191..3faefa6af 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -132,7 +132,9 @@ static int dbcs_char2cells(int c);
static int dbcs_ptr2cells_len(char_u *p, int size);
static int dbcs_ptr2char(char_u *p);
static int dbcs_head_off(char_u *base, char_u *p);
+#ifdef FEAT_EVAL
static int cw_value(int c);
+#endif
/*
* Lookup table to quickly get the length in bytes of a UTF-8 character from
@@ -1388,8 +1390,7 @@ utf_char2cells(int c)
{0x26ce, 0x26ce},
{0x26d4, 0x26d4},
{0x26ea, 0x26ea},
- {0x26f2, 0x26f3},
- {0x26f5, 0x26f5},
+ {0x26f2, 0x26f5},
{0x26fa, 0x26fa},
{0x26fd, 0x26fd},
{0x2705, 0x2705},
@@ -1490,6 +1491,21 @@ utf_char2cells(int c)
// based on http://unicode.org/emoji/charts/emoji-list.html
static struct interval emoji_wide[] =
{
+ {0x23ed, 0x23ef},
+ {0x23f1, 0x23f2},
+ {0x23f8, 0x23fa},
+ {0x24c2, 0x24c2},
+ {0x261d, 0x261d},
+ {0x26c8, 0x26c8},
+ {0x26cf, 0x26cf},
+ {0x26d1, 0x26d1},
+ {0x26d3, 0x26d3},
+ {0x26e9, 0x26e9},
+ {0x26f0, 0x26f1},
+ {0x26f7, 0x26f9},
+ {0x270c, 0x270d},
+ {0x2934, 0x2935},
+ {0x1f170, 0x1f189},
{0x1f1e6, 0x1f1ff},
{0x1f321, 0x1f321},
{0x1f324, 0x1f32c},
@@ -1533,11 +1549,15 @@ utf_char2cells(int c)
if (c >= 0x100)
{
+#if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS)
int n;
+#endif
+#ifdef FEAT_EVAL
n = cw_value(c);
if (n != 0)
return n;
+#endif
#ifdef USE_WCHAR_FUNCTIONS
/*
@@ -2667,8 +2687,7 @@ static struct interval emoji_all[] =
{0x3299, 0x3299},
{0x1f004, 0x1f004},
{0x1f0cf, 0x1f0cf},
- {0x1f170, 0x1f171},
- {0x1f17e, 0x1f17f},
+ {0x1f170, 0x1f189},
{0x1f18e, 0x1f18e},
{0x1f191, 0x1f19a},
{0x1f1e6, 0x1f1ff},
@@ -2835,6 +2854,10 @@ utf_class_buf(int c, buf_T *buf)
return 1; // punctuation
}
+ // emoji
+ if (intable(emoji_all, sizeof(emoji_all), c))
+ return 3;
+
// binary search in table
while (top >= bot)
{
@@ -2847,10 +2870,6 @@ utf_class_buf(int c, buf_T *buf)
return (int)classes[mid].class;
}
- // emoji
- if (intable(emoji_all, sizeof(emoji_all), c))
- return 3;
-
// most other characters are "word" characters
return 2;
}
@@ -5352,6 +5371,8 @@ string_convert_ext(
return retval;
}
+#if defined(FEAT_EVAL) || defined(PROTO)
+
/*
* Table set by setcellwidths().
*/
@@ -5525,3 +5546,17 @@ f_setcellwidths(typval_T *argvars, typval_T *rettv UNUSED)
cw_table = table;
cw_table_size = l->lv_len;
}
+
+ void
+f_charclass(typval_T *argvars, typval_T *rettv UNUSED)
+{
+ if (argvars[0].v_type != VAR_STRING
+ || argvars[0].vval.v_string == NULL
+ || *argvars[0].vval.v_string == NUL)
+ {
+ emsg(_(e_stringreq));
+ return;
+ }
+ rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string);
+}
+#endif
diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro
index 9385d856d..7a6009ed6 100644
--- a/src/proto/mbyte.pro
+++ b/src/proto/mbyte.pro
@@ -85,4 +85,5 @@ int convert_input_safe(char_u *ptr, int len, int maxlen, char_u **restp, int *re
char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp);
char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp);
void f_setcellwidths(typval_T *argvars, typval_T *rettv);
+void f_charclass(typval_T *argvars, typval_T *rettv);
/* vim: set ft=c : */
diff --git a/src/testdir/emoji_list.vim b/src/testdir/emoji_list.vim
new file mode 100644
index 000000000..e6a73c354
--- /dev/null
+++ b/src/testdir/emoji_list.vim
@@ -0,0 +1,22 @@
+" Script to fill the window with emoji characters, one per line.
+
+if &modified
+ new
+else
+ enew
+endif
+
+" Use a compiled Vim9 function for speed
+def DoIt()
+ let lnum = 1
+ for c in range(0x100, 0x1ffff)
+ let cs = nr2char(c)
+ if charclass(cs) == 3
+ setline(lnum, '|' .. cs .. '| ' .. strwidth(cs))
+ lnum += 1
+ endif
+ endfor
+enddef
+
+call DoIt()
+set nomodified
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index e15199b78..fd9057ddc 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -2077,6 +2077,13 @@ func Test_char2nr()
set encoding=utf-8
endfunc
+func Test_charclass()
+ call assert_equal(0, charclass(' '))
+ call assert_equal(1, charclass('.'))
+ call assert_equal(2, charclass('x'))
+ call assert_equal(3, charclass("\u203c"))
+endfunc
+
func Test_eventhandler()
call assert_equal(0, eventhandler())
endfunc
diff --git a/src/version.c b/src/version.c
index 88332a00f..92e90fe21 100644
--- a/src/version.c
+++ b/src/version.c
@@ -755,6 +755,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 1536,
+/**/
1535,
/**/
1534,