diff options
author | Christian Brabandt <cb@256bit.org> | 2023-04-24 21:09:54 +0100 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2023-04-24 21:09:54 +0100 |
commit | 67672ef097dd708244ff042a8364994da2b91e75 (patch) | |
tree | 728d6713562555f7917bf5297c6ff27b2bb0c792 /src | |
parent | e1b4822137b74d45fde1d47d2e32b3ae89966092 (diff) | |
download | vim-git-67672ef097dd708244ff042a8364994da2b91e75.tar.gz |
patch 9.0.1485: no functions for converting from/to UTF-16 indexv9.0.1485
Problem: no functions for converting from/to UTF-16 index.
Solution: Add UTF-16 flag to existing funtions and add strutf16len() and
utf16idx(). (Yegappan Lakshmanan, closes #12216)
Diffstat (limited to 'src')
-rw-r--r-- | src/evalfunc.c | 10 | ||||
-rw-r--r-- | src/proto/strings.pro | 2 | ||||
-rw-r--r-- | src/strings.c | 166 | ||||
-rw-r--r-- | src/testdir/test_functions.vim | 424 | ||||
-rw-r--r-- | src/version.c | 2 |
5 files changed, 559 insertions, 45 deletions
diff --git a/src/evalfunc.c b/src/evalfunc.c index 5c10f1ec9..10d00d5a1 100644 --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -1751,9 +1751,9 @@ static funcentry_T global_functions[] = ret_number, f_bufwinnr}, {"byte2line", 1, 1, FEARG_1, arg1_number, ret_number, f_byte2line}, - {"byteidx", 2, 2, FEARG_1, arg2_string_number, + {"byteidx", 2, 3, FEARG_1, arg3_string_number_bool, ret_number, f_byteidx}, - {"byteidxcomp", 2, 2, FEARG_1, arg2_string_number, + {"byteidxcomp", 2, 3, FEARG_1, arg3_string_number_bool, ret_number, f_byteidxcomp}, {"call", 2, 3, FEARG_1, arg3_any_list_dict, ret_any, f_call}, @@ -1803,7 +1803,7 @@ static funcentry_T global_functions[] = ret_number, f_charclass}, {"charcol", 1, 2, FEARG_1, arg2_string_or_list_number, ret_number, f_charcol}, - {"charidx", 2, 3, FEARG_1, arg3_string_number_bool, + {"charidx", 2, 4, FEARG_1, arg3_string_number_bool, ret_number, f_charidx}, {"chdir", 1, 1, FEARG_1, arg1_string, ret_string, f_chdir}, @@ -2601,6 +2601,8 @@ static funcentry_T global_functions[] = ret_number, f_strridx}, {"strtrans", 1, 1, FEARG_1, arg1_string, ret_string, f_strtrans}, + {"strutf16len", 1, 2, FEARG_1, arg2_string_bool, + ret_number, f_strutf16len}, {"strwidth", 1, 1, FEARG_1, arg1_string, ret_number, f_strwidth}, {"submatch", 1, 2, FEARG_1, arg2_number_bool, @@ -2785,6 +2787,8 @@ static funcentry_T global_functions[] = ret_dict_any, f_undotree}, {"uniq", 1, 3, FEARG_1, arg13_sortuniq, ret_first_arg, f_uniq}, + {"utf16idx", 2, 4, FEARG_1, arg3_string_number_bool, + ret_number, f_utf16idx}, {"values", 1, 1, FEARG_1, arg1_dict_any, ret_list_member, f_values}, {"virtcol", 1, 2, FEARG_1, arg2_string_or_list_bool, diff --git a/src/proto/strings.pro b/src/proto/strings.pro index 602208831..a72e1ff5e 100644 --- a/src/proto/strings.pro +++ b/src/proto/strings.pro @@ -36,12 +36,14 @@ void f_string(typval_T *argvars, typval_T *rettv); void f_strlen(typval_T *argvars, typval_T *rettv); void f_strcharlen(typval_T *argvars, typval_T *rettv); void f_strchars(typval_T *argvars, typval_T *rettv); +void f_strutf16len(typval_T *argvars, typval_T *rettv); void f_strdisplaywidth(typval_T *argvars, typval_T *rettv); void f_strwidth(typval_T *argvars, typval_T *rettv); void f_strcharpart(typval_T *argvars, typval_T *rettv); void f_strpart(typval_T *argvars, typval_T *rettv); void f_strridx(typval_T *argvars, typval_T *rettv); void f_strtrans(typval_T *argvars, typval_T *rettv); +void f_utf16idx(typval_T *argvars, typval_T *rettv); void f_tolower(typval_T *argvars, typval_T *rettv); void f_toupper(typval_T *argvars, typval_T *rettv); void f_tr(typval_T *argvars, typval_T *rettv); diff --git a/src/strings.c b/src/strings.c index 7c868bf1f..7d4281dcd 100644 --- a/src/strings.c +++ b/src/strings.c @@ -1006,10 +1006,6 @@ string_reduce( static void byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) { - char_u *t; - char_u *str; - varnumber_T idx; - rettv->vval.v_number = -1; if (in_vim9script() @@ -1017,20 +1013,42 @@ byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) || check_for_number_arg(argvars, 1) == FAIL)) return; - str = tv_get_string_chk(&argvars[0]); - idx = tv_get_number_chk(&argvars[1], NULL); + char_u *str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); if (str == NULL || idx < 0) return; - t = str; + varnumber_T utf16idx = FALSE; + if (argvars[2].v_type != VAR_UNKNOWN) + { + utf16idx = tv_get_bool(&argvars[2]); + if (utf16idx < 0 || utf16idx > 1) + { + semsg(_(e_using_number_as_bool_nr), utf16idx); + return; + } + } + + int (*ptr2len)(char_u *); + if (enc_utf8 && comp) + ptr2len = utf_ptr2len; + else + ptr2len = mb_ptr2len; + + char_u *t = str; for ( ; idx > 0; idx--) { if (*t == NUL) // EOL reached return; - if (enc_utf8 && comp) - t += utf_ptr2len(t); - else - t += (*mb_ptr2len)(t); + if (utf16idx) + { + int clen = ptr2len(t); + int c = (clen > 1) ? utf_ptr2char(t) : *t; + if (c > 0xFFFF) + idx--; + } + if (idx > 0) + t += ptr2len(t); } rettv->vval.v_number = (varnumber_T)(t - str); } @@ -1059,42 +1077,49 @@ f_byteidxcomp(typval_T *argvars, typval_T *rettv) void f_charidx(typval_T *argvars, typval_T *rettv) { - char_u *str; - varnumber_T idx; - varnumber_T countcc = FALSE; - char_u *p; - int len; - int (*ptr2len)(char_u *); - rettv->vval.v_number = -1; - if ((check_for_string_arg(argvars, 0) == FAIL + if (check_for_string_arg(argvars, 0) == FAIL || check_for_number_arg(argvars, 1) == FAIL - || check_for_opt_bool_arg(argvars, 2) == FAIL)) + || check_for_opt_bool_arg(argvars, 2) == FAIL + || (argvars[2].v_type != VAR_UNKNOWN + && check_for_opt_bool_arg(argvars, 3) == FAIL)) return; - str = tv_get_string_chk(&argvars[0]); - idx = tv_get_number_chk(&argvars[1], NULL); + char_u *str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); if (str == NULL || idx < 0) return; + varnumber_T countcc = FALSE; + varnumber_T utf16idx = FALSE; if (argvars[2].v_type != VAR_UNKNOWN) - countcc = tv_get_bool(&argvars[2]); - if (countcc < 0 || countcc > 1) { - semsg(_(e_using_number_as_bool_nr), countcc); - return; + countcc = tv_get_bool(&argvars[2]); + if (argvars[3].v_type != VAR_UNKNOWN) + utf16idx = tv_get_bool(&argvars[3]); } + int (*ptr2len)(char_u *); if (enc_utf8 && countcc) ptr2len = utf_ptr2len; else ptr2len = mb_ptr2len; - for (p = str, len = 0; p <= str + idx; len++) + char_u *p; + int len; + for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) { if (*p == NUL) return; + if (utf16idx) + { + idx--; + int clen = ptr2len(p); + int c = (clen > 1) ? utf_ptr2char(p) : *p; + if (c > 0xFFFF) + idx--; + } p += ptr2len(p); } @@ -1359,6 +1384,38 @@ f_strchars(typval_T *argvars, typval_T *rettv) } /* + * "strutf16len()" function + */ + void +f_strutf16len(typval_T *argvars, typval_T *rettv) +{ + rettv->vval.v_number = -1; + + if (check_for_string_arg(argvars, 0) == FAIL + || check_for_opt_bool_arg(argvars, 1) == FAIL) + return; + + varnumber_T countcc = FALSE; + if (argvars[1].v_type != VAR_UNKNOWN) + countcc = tv_get_bool(&argvars[1]); + + char_u *s = tv_get_string(&argvars[0]); + varnumber_T len = 0; + int (*func_mb_ptr2char_adv)(char_u **pp); + int ch; + + func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv; + while (*s != NUL) + { + ch = func_mb_ptr2char_adv(&s); + if (ch > 0xFFFF) + ++len; + ++len; + } + rettv->vval.v_number = len; +} + +/* * "strdisplaywidth()" function */ void @@ -1619,6 +1676,61 @@ f_strtrans(typval_T *argvars, typval_T *rettv) rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); } + +/* + * + * "utf16idx()" function + */ + void +f_utf16idx(typval_T *argvars, typval_T *rettv) +{ + rettv->vval.v_number = -1; + + if (check_for_string_arg(argvars, 0) == FAIL + || check_for_opt_number_arg(argvars, 1) == FAIL + || check_for_opt_bool_arg(argvars, 2) == FAIL + || (argvars[2].v_type != VAR_UNKNOWN + && check_for_opt_bool_arg(argvars, 3) == FAIL)) + return; + + char_u *str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) + return; + + varnumber_T countcc = FALSE; + varnumber_T charidx = FALSE; + if (argvars[2].v_type != VAR_UNKNOWN) + { + countcc = tv_get_bool(&argvars[2]); + if (argvars[3].v_type != VAR_UNKNOWN) + charidx = tv_get_bool(&argvars[3]); + } + + int (*ptr2len)(char_u *); + if (enc_utf8 && countcc) + ptr2len = utf_ptr2len; + else + ptr2len = mb_ptr2len; + + char_u *p; + int len; + for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) + { + if (*p == NUL) + return; + int clen = ptr2len(p); + int c = (clen > 1) ? utf_ptr2char(p) : *p; + if (c > 0xFFFF) + len++; + p += ptr2len(p); + if (charidx) + idx--; + } + + rettv->vval.v_number = len > 0 ? len - 1 : 0; +} + /* * "tolower(string)" function */ diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim index 3bea88df1..e32c4f5ff 100644 --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -1192,19 +1192,14 @@ func Test_byte2line_line2byte() bw! endfunc -" Test for byteidx() and byteidxcomp() functions +" Test for byteidx() using a character index func Test_byteidx() let a = '.é.' " one char of two bytes call assert_equal(0, byteidx(a, 0)) - call assert_equal(0, byteidxcomp(a, 0)) call assert_equal(1, byteidx(a, 1)) - call assert_equal(1, byteidxcomp(a, 1)) call assert_equal(3, byteidx(a, 2)) - call assert_equal(3, byteidxcomp(a, 2)) call assert_equal(4, byteidx(a, 3)) - call assert_equal(4, byteidxcomp(a, 3)) call assert_equal(-1, byteidx(a, 4)) - call assert_equal(-1, byteidxcomp(a, 4)) let b = '.é.' " normal e with composing char call assert_equal(0, b->byteidx(0)) @@ -1212,18 +1207,184 @@ func Test_byteidx() call assert_equal(4, b->byteidx(2)) call assert_equal(5, b->byteidx(3)) call assert_equal(-1, b->byteidx(4)) + + " string with multiple composing characters + let str = '-ą́-ą́' + call assert_equal(0, byteidx(str, 0)) + call assert_equal(1, byteidx(str, 1)) + call assert_equal(6, byteidx(str, 2)) + call assert_equal(7, byteidx(str, 3)) + call assert_equal(12, byteidx(str, 4)) + call assert_equal(-1, byteidx(str, 5)) + + " empty string + call assert_equal(0, byteidx('', 0)) + call assert_equal(-1, byteidx('', 1)) + + " error cases call assert_fails("call byteidx([], 0)", 'E730:') + call assert_fails("call byteidx('abc', [])", 'E745:') +endfunc + +" Test for byteidxcomp() using a character index +func Test_byteidxcomp() + let a = '.é.' " one char of two bytes + call assert_equal(0, byteidxcomp(a, 0)) + call assert_equal(1, byteidxcomp(a, 1)) + call assert_equal(3, byteidxcomp(a, 2)) + call assert_equal(4, byteidxcomp(a, 3)) + call assert_equal(-1, byteidxcomp(a, 4)) + let b = '.é.' " normal e with composing char call assert_equal(0, b->byteidxcomp(0)) call assert_equal(1, b->byteidxcomp(1)) call assert_equal(2, b->byteidxcomp(2)) call assert_equal(4, b->byteidxcomp(3)) call assert_equal(5, b->byteidxcomp(4)) call assert_equal(-1, b->byteidxcomp(5)) + + " string with multiple composing characters + let str = '-ą́-ą́' + call assert_equal(0, byteidxcomp(str, 0)) + call assert_equal(1, byteidxcomp(str, 1)) + call assert_equal(2, byteidxcomp(str, 2)) + call assert_equal(4, byteidxcomp(str, 3)) + call assert_equal(6, byteidxcomp(str, 4)) + call assert_equal(7, byteidxcomp(str, 5)) + call assert_equal(8, byteidxcomp(str, 6)) + call assert_equal(10, byteidxcomp(str, 7)) + call assert_equal(12, byteidxcomp(str, 8)) + call assert_equal(-1, byteidxcomp(str, 9)) + + " empty string + call assert_equal(0, byteidxcomp('', 0)) + call assert_equal(-1, byteidxcomp('', 1)) + + " error cases call assert_fails("call byteidxcomp([], 0)", 'E730:') + call assert_fails("call byteidxcomp('abc', [])", 'E745:') endfunc -" Test for charidx() +" Test for byteidx() using a UTF-16 index +func Test_byteidx_from_utf16_index() + " string with single byte characters + let str = "abc" + for i in range(3) + call assert_equal(i, byteidx(str, i, v:true)) + endfor + call assert_equal(3, byteidx(str, 3, v:true)) + call assert_equal(-1, byteidx(str, 4, v:true)) + + " string with two byte characters + let str = "a©©b" + call assert_equal(0, byteidx(str, 0, v:true)) + call assert_equal(1, byteidx(str, 1, v:true)) + call assert_equal(3, byteidx(str, 2, v:true)) + call assert_equal(5, byteidx(str, 3, v:true)) + call assert_equal(6, byteidx(str, 4, v:true)) + call assert_equal(-1, byteidx(str, 5, v:true)) + + " string with two byte characters + let str = "a😊😊b" + call assert_equal(0, byteidx(str, 0, v:true)) + call assert_equal(1, byteidx(str, 1, v:true)) + call assert_equal(1, byteidx(str, 2, v:true)) + call assert_equal(5, byteidx(str, 3, v:true)) + call assert_equal(5, byteidx(str, 4, v:true)) + call assert_equal(9, byteidx(str, 5, v:true)) + call assert_equal(10, byteidx(str, 6, v:true)) + call assert_equal(-1, byteidx(str, 7, v:true)) + + " string with composing characters + let str = '-á-b́' + call assert_equal(0, byteidx(str, 0, v:true)) + call assert_equal(1, byteidx(str, 1, v:true)) + call assert_equal(4, byteidx(str, 2, v:true)) + call assert_equal(5, byteidx(str, 3, v:true)) + call assert_equal(8, byteidx(str, 4, v:true)) + call assert_equal(-1, byteidx(str, 5, v:true)) + + " string with multiple composing characters + let str = '-ą́-ą́' + call assert_equal(0, byteidx(str, 0, v:true)) + call assert_equal(1, byteidx(str, 1, v:true)) + call assert_equal(6, byteidx(str, 2, v:true)) + call assert_equal(7, byteidx(str, 3, v:true)) + call assert_equal(12, byteidx(str, 4, v:true)) + call assert_equal(-1, byteidx(str, 5, v:true)) + + " empty string + call assert_equal(0, byteidx('', 0, v:true)) + call assert_equal(-1, byteidx('', 1, v:true)) + + " error cases + call assert_fails('call byteidx(str, 0, [])', 'E745:') +endfunc + +" Test for byteidxcomp() using a UTF-16 index +func Test_byteidxcomp_from_utf16_index() + " string with single byte characters + let str = "abc" + for i in range(3) + call assert_equal(i, byteidxcomp(str, i, v:true)) + endfor + call assert_equal(3, byteidxcomp(str, 3, v:true)) + call assert_equal(-1, byteidxcomp(str, 4, v:true)) + + " string with two byte characters + let str = "a©©b" + call assert_equal(0, byteidxcomp(str, 0, v:true)) + call assert_equal(1, byteidxcomp(str, 1, v:true)) + call assert_equal(3, byteidxcomp(str, 2, v:true)) + call assert_equal(5, byteidxcomp(str, 3, v:true)) + call assert_equal(6, byteidxcomp(str, 4, v:true)) + call assert_equal(-1, byteidxcomp(str, 5, v:true)) + + " string with two byte characters + let str = "a😊😊b" + call assert_equal(0, byteidxcomp(str, 0, v:true)) + call assert_equal(1, byteidxcomp(str, 1, v:true)) + call assert_equal(1, byteidxcomp(str, 2, v:true)) + call assert_equal(5, byteidxcomp(str, 3, v:true)) + call assert_equal(5, byteidxcomp(str, 4, v:true)) + call assert_equal(9, byteidxcomp(str, 5, v:true)) + call assert_equal(10, byteidxcomp(str, 6, v:true)) + call assert_equal(-1, byteidxcomp(str, 7, v:true)) + + " string with composing characters + let str = '-á-b́' + call assert_equal(0, byteidxcomp(str, 0, v:true)) + call assert_equal(1, byteidxcomp(str, 1, v:true)) + call assert_equal(2, byteidxcomp(str, 2, v:true)) + call assert_equal(4, byteidxcomp(str, 3, v:true)) + call assert_equal(5, byteidxcomp(str, 4, v:true)) + call assert_equal(6, byteidxcomp(str, 5, v:true)) + call assert_equal(8, byteidxcomp(str, 6, v:true)) + call assert_equal(-1, byteidxcomp(str, 7, v:true)) + call assert_fails('call byteidxcomp(str, 0, [])', 'E745:') + + " string with multiple composing characters + let str = '-ą́-ą́' + call assert_equal(0, byteidxcomp(str, 0, v:true)) + call assert_equal(1, byteidxcomp(str, 1, v:true)) + call assert_equal(2, byteidxcomp(str, 2, v:true)) + call assert_equal(4, byteidxcomp(str, 3, v:true)) + call assert_equal(6, byteidxcomp(str, 4, v:true)) + call assert_equal(7, byteidxcomp(str, 5, v:true)) + call assert_equal(8, byteidxcomp(str, 6, v:true)) + call assert_equal(10, byteidxcomp(str, 7, v:true)) + call assert_equal(12, byteidxcomp(str, 8, v:true)) + call assert_equal(-1, byteidxcomp(str, 9, v:true)) + + " empty string + call assert_equal(0, byteidxcomp('', 0, v:true)) + call assert_equal(-1, byteidxcomp('', 1, v:true)) + + " error cases + call assert_fails('call byteidxcomp(str, 0, [])', 'E745:') +endfunc + +" Test for charidx() using a byte index func Test_charidx() let a = 'xáb́y' call assert_equal(0, charidx(a, 0)) @@ -1232,17 +1393,20 @@ func Test_charidx() call assert_equal(3, charidx(a, 7)) call assert_equal(-1, charidx(a, 8)) call assert_equal(-1, charidx(a, -1)) - call assert_equal(-1, charidx('', 0)) - call assert_equal(-1, charidx(test_null_string(), 0)) " count composing characters - call assert_equal(0, charidx(a, 0, 1)) - call assert_equal(2, charidx(a, 2, 1)) - call assert_equal(3, charidx(a, 4, 1)) - call assert_equal(5, charidx(a, 7, 1)) - call assert_equal(-1, charidx(a, 8, 1)) + call assert_equal(0, a->charidx(0, 1)) + call assert_equal(2, a->charidx(2, 1)) + call assert_equal(3, a->charidx(4, 1)) + call assert_equal(5, a->charidx(7, 1)) + call assert_equal(-1, a->charidx(8, 1)) + + " empty string + call assert_equal(-1, charidx('', 0)) call assert_equal(-1, charidx('', 0, 1)) + " error cases + call assert_equal(-1, charidx(test_null_string(), 0)) call assert_fails('let x = charidx([], 1)', 'E1174:') call assert_fails('let x = charidx("abc", [])', 'E1210:') call assert_fails('let x = charidx("abc", 1, [])', 'E1212:') @@ -1250,6 +1414,237 @@ func Test_charidx() call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:') endfunc +" Test for charidx() using a UTF-16 index +func Test_charidx_from_utf16_index() + " string with single byte characters + let str = "abc" + for i in range(3) + call assert_equal(i, charidx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, charidx(str, 3, v:false, v:true)) + + " string with two byte characters + let str = "a©©b" + call assert_equal(0, charidx(str, 0, v:false, v:true)) + call assert_equal(1, charidx(str, 1, v:false, v:true)) + call assert_equal(2, charidx(str, 2, v:false, v:true)) + call assert_equal(3, charidx(str, 3, v:false, v:true)) + call assert_equal(-1, charidx(str, 4, v:false, v:true)) + + " string with four byte characters + let str = "a😊😊b" + call assert_equal(0, charidx(str, 0, v:false, v:true)) + call assert_equal(1, charidx(str, 1, v:false, v:true)) + call assert_equal(1, charidx(str, 2, v:false, v:true)) + call assert_equal(2, charidx(str, 3, v:false, v:true)) + call assert_equal(2, charidx(str, 4, v:false, v:true)) + call assert_equal(3, charidx(str, 5, v:false, v:true)) + call assert_equal(-1, charidx(str, 6, v:false, v:true)) + + " string with composing characters + let str = '-á-b́' + for i in str->strcharlen()->range() + call assert_equal(i, charidx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, charidx(str, 4, v:false, v:true)) + for i in str->strchars()->range() + call assert_equal(i, charidx(str, i, v:true, v:true)) + endfor + call assert_equal(-1, charidx(str, 6, v:true, v:true)) + + " string with multiple composing characters + let str = '-ą́-ą́' + for i in str->strcharlen()->range() + call assert_equal(i, charidx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, charidx(str, 4, v:false, v:true)) + for i in str->strchars()->range() + call assert_equal(i, charidx(str, i, v:true, v:true)) + endfor + call assert_equal(-1, charidx(str, 8, v:true, v:true)) + + " empty string + call assert_equal(-1, charidx('', 0, v:false, v:true)) + call assert_equal(-1, charidx('', 0, v:true, v:true)) + + " error cases + call assert_equal(-1, charidx('', 0, v:false, v:true)) + call assert_equal(-1, charidx('', 0, v:true, v:true)) + call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true)) + call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:') + call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:') +endfunc + +" Test for utf16idx() using a byte index +func Test_utf16idx_from_byteidx() + " UTF-16 index of a string with single byte characters + let str = "abc" + for i in range(3) + call assert_equal(i, utf16idx(str, i)) + endfor + call assert_equal(-1, utf16idx(str, 3)) + + " UTF-16 index of a string with two byte characters + let str = 'a©©b' + call assert_equal(0, str->utf16idx(0)) + call assert_equal(1, str->utf16idx(1)) + call assert_equal(1, str->utf16idx(2)) + call assert_equal(2, str->utf16idx(3)) + call assert_equal(2, str->utf16idx(4)) + call assert_equal(3, str->utf16idx(5)) + call assert_equal(-1, str->utf16idx(6)) + + " UTF-16 index of a string with four byte characters + let str = 'a😊😊b' + call assert_equal(0, utf16idx(str, 0)) + call assert_equal(2, utf16idx(str, 1)) + call assert_equal(2, utf16idx(str, 2)) + call assert_equal(2, utf16idx(str, 3)) + call assert_equal(2, utf16idx(str, 4)) + call assert_equal(4, utf16idx(str, 5)) + call assert_equal(4, utf16idx(str, 6)) + call assert_equal(4, utf16idx(str, 7)) + call assert_equal(4, utf16idx(str, 8)) + call assert_equal(5, utf16idx(str, 9)) + call assert_equal(-1, utf16idx(str, 10)) + + " UTF-16 index of a string with composing characters + let str = '-á-b́' + call assert_equal(0, utf16idx(str, 0)) + call assert_equal(1, utf16idx(str, 1)) + call assert_equal(1, utf16idx(str, 2)) + call assert_equal(1, utf16idx(str, 3)) + call assert_equal(2, utf16idx(str, 4)) + call assert_equal(3, utf16idx(str, 5)) + call assert_equal(3, utf16idx(str, 6)) + call assert_equal(3, utf16idx(str, 7)) + call assert_equal(-1, utf16idx(str, 8)) + call assert_equal(0, utf16idx(str, 0, v:true)) + call assert_equal(1, utf16idx(str, 1, v:true)) + call assert_equal(2, utf16idx(str, 2, v:true)) + call assert_equal(2, utf16idx(str, 3, v:true)) + call assert_equal(3, utf16idx(str, 4, v:true)) + call assert_equal(4, utf16idx(str, 5, v:true)) + call assert_equal(5, utf16idx(str, 6, v:true)) + call assert_equal(5, utf16idx(str, 7, v:true)) + call assert_equal(-1, utf16idx(str, 8, v:true)) + + " string with multiple composing characters + let str = '-ą́-ą́' + call assert_equal(0, utf16idx(str, 0)) + call assert_equal(1, utf16idx(str, 1)) + call assert_equal(1, utf16idx(str, 2)) + call assert_equal(1, utf16idx(str, 3)) + call assert_equal(1, utf16idx(str, 4)) + call assert_equal(1, utf16idx(str, 5)) + call assert_equal(2, utf16idx(str, 6)) + call assert_equal(3, utf16idx(str, 7)) + call assert_equal(3, utf16idx(str, 8)) + call assert_equal(3, utf16idx(str, 9)) + call assert_equal(3, utf16idx(str, 10)) + call assert_equal(3, utf16idx(str, 11)) + call assert_equal(-1, utf16idx(str, 12)) + call assert_equal(0, utf16idx(str, 0, v:true)) + call assert_equal(1, utf16idx(str, 1, v:true)) + call assert_equal(2, utf16idx(str, 2, v:true)) + call assert_equal(2, utf16idx(str, 3, v:true)) + call assert_equal(3, utf16idx(str, 4, v:true)) + call assert_equal(3, utf16idx(str, 5, v:true)) + call assert_equal(4, utf16idx(str, 6, v:true)) + call assert_equal(5, utf16idx(str, 7, v:true)) + call assert_equal(6, utf16idx(str, 8, v:true)) + call assert_equal(6, utf16idx(str, 9, v:true)) + call assert_equal(7, utf16idx(str, 10, v:true)) + call assert_equal(7, utf16idx(str, 11, v:true)) + call assert_equal(-1, utf16idx(str, 12, v:true)) + + " empty string + call assert_equal(-1, utf16idx('', 0)) + call assert_equal(-1, utf16idx('', 0, v:true)) + + " error cases + call assert_equal(-1, utf16idx("", 0)) + call assert_equal(-1, utf16idx("abc", -1)) + call assert_equal(-1, utf16idx(test_null_string(), 0)) + call assert_fails('let l = utf16idx([], 0)', 'E1174:') + call assert_fails('let l = utf16idx("ab", [])', 'E1210:') + call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:') +endfunc + +" Test for utf16idx() using a character index +func Test_utf16idx_from_charidx() + let str = "abc" + for i in str->strcharlen()->range() + call assert_equal(i, utf16idx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 3, v:false, v:true)) + + " UTF-16 index of a string with two byte characters + let str = "a©©b" + for i in str->strcharlen()->range() + call assert_equal(i, utf16idx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + + " UTF-16 index of a string with four byte characters + let str = "a😊😊b" + call assert_equal(0, utf16idx(str, 0, v:false, v:true)) + call assert_equal(2, utf16idx(str, 1, v:false, v:true)) + call assert_equal(4, utf16idx(str, 2, v:false, v:true)) + call assert_equal(5, utf16idx(str, 3, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + + " UTF-16 index of a string with composing characters + let str = '-á-b́' + for i in str->strcharlen()->range() + call assert_equal(i, utf16idx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + for i in str->strchars()->range() + call assert_equal(i, utf16idx(str, i, v:true, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 6, v:true, v:true)) + + " string with multiple composing characters + let str = '-ą́-ą́' + for i in str->strcharlen()->range() + call assert_equal(i, utf16idx(str, i, v:false, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + for i in str->strchars()->range() + call assert_equal(i, utf16idx(str, i, v:true, v:true)) + endfor + call assert_equal(-1, utf16idx(str, 8, v:true, v:true)) + + " empty string + call assert_equal(-1, utf16idx('', 0, v:false, v:true)) + call assert_equal(-1, utf16idx('', 0, v:true, v:true)) + + " error cases + call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true)) + call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:') +endfunc + +" Test for strutf16len() +func Test_strutf16len() + call assert_equal(3, strutf16len('abc')) + call assert_equal(3, 'abc'->strutf16len(v:true)) + call assert_equal(4, strutf16len('a©©b')) + call assert_equal(4, strutf16len('a©©b', v:true)) + call assert_equal(6, strutf16len('a😊😊b')) + call assert_equal(6, strutf16len('a😊😊b', v:true)) + call assert_equal(4, strutf16len('-á-b́')) + call assert_equal(6, strutf16len('-á-b́', v:true)) + call assert_equal(4, strutf16len('-ą́-ą́')) + call assert_equal(8, strutf16len('-ą́-ą́', v:true)) + call assert_equal(0, strutf16len('')) + + " error cases + call assert_fails('let l = strutf16len([])', 'E1174:') + call assert_fails('let l = strutf16len("a", [])', 'E1212:') + call assert_equal(0, strutf16len(test_null_string())) +endfunc + func Test_count() let l = ['a', 'a', 'A', 'b'] call assert_equal(2, count(l, 'a')) @@ -3074,5 +3469,4 @@ func Test_delfunc_while_listing() call StopVimInTerminal(buf) endfunc - " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index e5099dd47..0ce90ebc5 100644 --- a/src/version.c +++ b/src/version.c @@ -696,6 +696,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1485, +/**/ 1484, /**/ 1483, |