summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brabandt <cb@256bit.org>2023-04-24 21:09:54 +0100
committerBram Moolenaar <Bram@vim.org>2023-04-24 21:09:54 +0100
commit67672ef097dd708244ff042a8364994da2b91e75 (patch)
tree728d6713562555f7917bf5297c6ff27b2bb0c792
parente1b4822137b74d45fde1d47d2e32b3ae89966092 (diff)
downloadvim-git-67672ef097dd708244ff042a8364994da2b91e75.tar.gz
patch 9.0.1485: no functions for converting from/to UTF-16 indexv9.0.1485
Problem: no functions for converting from/to UTF-16 index. Solution: Add UTF-16 flag to existing funtions and add strutf16len() and utf16idx(). (Yegappan Lakshmanan, closes #12216)
-rw-r--r--runtime/doc/builtin.txt98
-rw-r--r--runtime/doc/eval.txt27
-rw-r--r--runtime/doc/usr_41.txt2
-rw-r--r--src/evalfunc.c10
-rw-r--r--src/proto/strings.pro2
-rw-r--r--src/strings.c166
-rw-r--r--src/testdir/test_functions.vim424
-rw-r--r--src/version.c2
8 files changed, 676 insertions, 55 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index 5c2e7ea41..91d468ee1 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -81,8 +81,10 @@ bufnr([{buf} [, {create}]]) Number Number of the buffer {buf}
bufwinid({buf}) Number window ID of buffer {buf}
bufwinnr({buf}) Number window number of buffer {buf}
byte2line({byte}) Number line number at byte count {byte}
-byteidx({expr}, {nr}) Number byte index of {nr}'th char in {expr}
-byteidxcomp({expr}, {nr}) Number byte index of {nr}'th char in {expr}
+byteidx({expr}, {nr} [, {utf16}])
+ Number byte index of {nr}'th char in {expr}
+byteidxcomp({expr}, {nr} [, {utf16}])
+ Number byte index of {nr}'th char in {expr}
call({func}, {arglist} [, {dict}])
any call {func} with arguments {arglist}
ceil({expr}) Float round {expr} up
@@ -117,7 +119,7 @@ changenr() Number current change number
char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr}
charclass({string}) Number character class of {string}
charcol({expr} [, {winid}]) Number column number of cursor or mark
-charidx({string}, {idx} [, {countcc}])
+charidx({string}, {idx} [, {countcc} [, {utf16}]])
Number char index of byte {idx} in {string}
chdir({dir}) String change current working directory
cindent({lnum}) Number C indent for line {lnum}
@@ -604,6 +606,8 @@ strptime({format}, {timestring})
strridx({haystack}, {needle} [, {start}])
Number last index of {needle} in {haystack}
strtrans({expr}) String translate string to make it printable
+strutf16len({string} [, {countcc}])
+ Number number of UTF-16 code units in {string}
strwidth({expr}) Number display cell length of the String {expr}
submatch({nr} [, {list}]) String or List
specific match in ":s" or substitute()
@@ -704,6 +708,8 @@ undofile({name}) String undo file name for {name}
undotree() List undo file tree
uniq({list} [, {func} [, {dict}]])
List remove adjacent duplicates from a list
+utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
+ Number UTF-16 index of byte {idx} in {string}
values({dict}) List values in {dict}
virtcol({expr} [, {list}]) Number or List
screen column of cursor or mark
@@ -1363,7 +1369,7 @@ byte2line({byte}) *byte2line()*
< {not available when compiled without the |+byte_offset|
feature}
-byteidx({expr}, {nr}) *byteidx()*
+byteidx({expr}, {nr} [, {utf16}]) *byteidx()*
Return byte index of the {nr}'th character in the String
{expr}. Use zero for the first character, it then returns
zero.
@@ -1373,6 +1379,13 @@ byteidx({expr}, {nr}) *byteidx()*
length is added to the preceding base character. See
|byteidxcomp()| below for counting composing characters
separately.
+ When {utf16} is present and TRUE, {nr} is used as the UTF-16
+ index in the String {expr} instead of as the character index.
+ The UTF-16 index is the index in the string when it is encoded
+ with 16-bit words. If the specified UTF-16 index is in the
+ middle of a character (e.g. in a 4-byte character), then the
+ byte index of the first byte in the character is returned.
+ Refer to |string-offset-encoding| for more information.
Example : >
echo matchstr(str, ".", byteidx(str, 3))
< will display the fourth character. Another way to do the
@@ -1384,11 +1397,17 @@ byteidx({expr}, {nr}) *byteidx()*
If there are less than {nr} characters -1 is returned.
If there are exactly {nr} characters the length of the string
in bytes is returned.
-
+ See |charidx()| and |utf16idx()| for getting the character and
+ UTF-16 index respectively from the byte index.
+ Examples: >
+ echo byteidx('a馃槉馃槉', 2) returns 5
+ echo byteidx('a馃槉馃槉', 2, 1) returns 1
+ echo byteidx('a馃槉馃槉', 3, 1) returns 5
+<
Can also be used as a |method|: >
GetName()->byteidx(idx)
-byteidxcomp({expr}, {nr}) *byteidxcomp()*
+byteidxcomp({expr}, {nr} [, {utf16}]) *byteidxcomp()*
Like byteidx(), except that a composing character is counted
as a separate character. Example: >
let s = 'e' .. nr2char(0x301)
@@ -1493,27 +1512,36 @@ charcol({expr} [, {winid}]) *charcol()*
GetPos()->col()
<
*charidx()*
-charidx({string}, {idx} [, {countcc}])
+charidx({string}, {idx} [, {countcc} [, {utf16}]])
Return the character index of the byte at {idx} in {string}.
The index of the first character is zero.
If there are no multibyte characters the returned value is
equal to {idx}.
+
When {countcc} is omitted or |FALSE|, then composing characters
- are not counted separately, their byte length is
- added to the preceding base character.
+ are not counted separately, their byte length is added to the
+ preceding base character.
When {countcc} is |TRUE|, then composing characters are
counted as separate characters.
+
+ When {utf16} is present and TRUE, {idx} is used as the UTF-16
+ index in the String {expr} instead of as the byte index.
+
Returns -1 if the arguments are invalid or if {idx} is greater
than the index of the last byte in {string}. An error is
given if the first argument is not a string, the second
argument is not a number or when the third argument is present
and is not zero or one.
+
See |byteidx()| and |byteidxcomp()| for getting the byte index
- from the character index.
+ from the character index and |utf16idx()| for getting the
+ UTF-16 index from the character index.
+ Refer to |string-offset-encoding| for more information.
Examples: >
echo charidx('a虂b虂c虂', 3) returns 1
echo charidx('a虂b虂c虂', 6, 1) returns 4
echo charidx('a虂b虂c虂', 16) returns -1
+ echo charidx('a馃槉馃槉', 4, 0, 1) returns 2
<
Can also be used as a |method|: >
GetName()->charidx(idx)
@@ -9244,6 +9272,28 @@ strtrans({string}) *strtrans()*
Can also be used as a |method|: >
GetString()->strtrans()
+strutf16len({string} [, {countcc}]) *strutf16len()*
+ The result is a Number, which is the number of UTF-16 code
+ units in String {string} (after converting it to UTF-16).
+
+ When {countcc} is TRUE, composing characters are counted
+ separately.
+ When {countcc} is omitted or FALSE, composing characters are
+ ignored.
+
+ Returns zero on error.
+
+ Also see |strlen()| and |strcharlen()|.
+ Examples: >
+ echo strutf16len('a') returns 1
+ echo strutf16len('漏') returns 1
+ echo strutf16len('馃槉') returns 2
+ echo strutf16len('a台虂') returns 1
+ echo strutf16len('a台虂', v:true) returns 3
+
+ Can also be used as a |method|: >
+ GetText()->strutf16len()
+<
strwidth({string}) *strwidth()*
The result is a Number, which is the number of display cells
String {string} occupies. A Tab character is counted as one
@@ -10059,6 +10109,34 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
Can also be used as a |method|: >
mylist->uniq()
+<
+ *utf16idx()*
+utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
+ Same as |charidx()| but returns the UTF-16 index of the byte
+ at {idx} in {string} (after converting it to UTF-16).
+
+ When {charidx} is present and TRUE, {idx} is used as the
+ character index in the String {string} instead of as the byte
+ index.
+ An {idx} in the middle of a UTF-8 sequence is rounded upwards
+ to the end of that sequence.
+
+ See |byteidx()| and |byteidxcomp()| for getting the byte index
+ from the UTF-16 index and |charidx()| for getting the
+ character index from the UTF-16 index.
+ Refer to |string-offset-encoding| for more information.
+ Examples: >
+ echo utf16idx('a馃槉馃槉', 3) returns 2
+ echo utf16idx('a馃槉馃槉', 7) returns 4
+ echo utf16idx('a馃槉馃槉', 1, 0, 1) returns 2
+ echo utf16idx('a馃槉馃槉', 2, 0, 1) returns 4
+ echo utf16idx('aa台虂c', 6) returns 2
+ echo utf16idx('aa台虂c', 6, 1) returns 4
+ echo utf16idx('a馃槉馃槉', 9) returns -1
+<
+ Can also be used as a |method|: >
+ GetName()->utf16idx(idx)
+
values({dict}) *values()*
Return a |List| with all the values of {dict}. The |List| is
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt
index 5c77c796f..b863f42e6 100644
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -1580,6 +1580,33 @@ Examples: >
echo $"The square root of {{9}} is {sqrt(9)}"
< The square root of {9} is 3.0 ~
+ *string-offset-encoding*
+A string consists of multiple characters. How the characters are stored
+depends on 'encoding'. Most common is UTF-8, which uses one byte for ASCII
+characters, two bytes for other latin characters and more bytes for other
+characters.
+
+A string offset can count characters or bytes. Other programs may use
+UTF-16 encoding (16-bit words) and an offset of UTF-16 words. Some functions
+use byte offsets, usually for UTF-8 encoding. Other functions use character
+offsets, in which case the encoding doesn't matter.
+
+The different offsets for the string "a漏馃槉" are below:
+
+ UTF-8 offsets:
+ [0]: 61, [1]: C2, [2]: A9, [3]: F0, [4]: 9F, [5]: 98, [6]: 8A
+ UTF-16 offsets:
+ [0]: 0061, [1]: 00A9, [2]: D83D, [3]: DE0A
+ UTF-32 (character) offsets:
+ [0]: 00000061, [1]: 000000A9, [2]: 0001F60A
+
+You can use the "g8" and "ga" commands on a character to see the
+decimal/hex/octal values.
+
+The functions |byteidx()|, |utf16idx()| and |charidx()| can be used to convert
+between these indices. The functions |strlen()|, |strutf16len()| and
+|strcharlen()| return the number of bytes, UTF-16 code units and characters in
+a string respectively.
option *expr-option* *E112* *E113*
------
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index 4e0cc480c..bc6d8b412 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -754,6 +754,7 @@ String manipulation: *string-functions*
strlen() length of a string in bytes
strcharlen() length of a string in characters
strchars() number of characters in a string
+ strutf16len() number of UTF-16 code units in a string
strwidth() size of string when displayed
strdisplaywidth() size of string when displayed, deals with tabs
setcellwidths() set character cell width overrides
@@ -771,6 +772,7 @@ String manipulation: *string-functions*
byteidx() byte index of a character in a string
byteidxcomp() like byteidx() but count composing characters
charidx() character index of a byte in a string
+ utf16idx() UTF-16 index of a byte in a string
repeat() repeat a string multiple times
eval() evaluate a string expression
execute() execute an Ex command and get the output
diff --git a/src/evalfunc.c b/src/evalfunc.c
index 5c10f1ec9..10d00d5a1 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -1751,9 +1751,9 @@ static funcentry_T global_functions[] =
ret_number, f_bufwinnr},
{"byte2line", 1, 1, FEARG_1, arg1_number,
ret_number, f_byte2line},
- {"byteidx", 2, 2, FEARG_1, arg2_string_number,
+ {"byteidx", 2, 3, FEARG_1, arg3_string_number_bool,
ret_number, f_byteidx},
- {"byteidxcomp", 2, 2, FEARG_1, arg2_string_number,
+ {"byteidxcomp", 2, 3, FEARG_1, arg3_string_number_bool,
ret_number, f_byteidxcomp},
{"call", 2, 3, FEARG_1, arg3_any_list_dict,
ret_any, f_call},
@@ -1803,7 +1803,7 @@ static funcentry_T global_functions[] =
ret_number, f_charclass},
{"charcol", 1, 2, FEARG_1, arg2_string_or_list_number,
ret_number, f_charcol},
- {"charidx", 2, 3, FEARG_1, arg3_string_number_bool,
+ {"charidx", 2, 4, FEARG_1, arg3_string_number_bool,
ret_number, f_charidx},
{"chdir", 1, 1, FEARG_1, arg1_string,
ret_string, f_chdir},
@@ -2601,6 +2601,8 @@ static funcentry_T global_functions[] =
ret_number, f_strridx},
{"strtrans", 1, 1, FEARG_1, arg1_string,
ret_string, f_strtrans},
+ {"strutf16len", 1, 2, FEARG_1, arg2_string_bool,
+ ret_number, f_strutf16len},
{"strwidth", 1, 1, FEARG_1, arg1_string,
ret_number, f_strwidth},
{"submatch", 1, 2, FEARG_1, arg2_number_bool,
@@ -2785,6 +2787,8 @@ static funcentry_T global_functions[] =
ret_dict_any, f_undotree},
{"uniq", 1, 3, FEARG_1, arg13_sortuniq,
ret_first_arg, f_uniq},
+ {"utf16idx", 2, 4, FEARG_1, arg3_string_number_bool,
+ ret_number, f_utf16idx},
{"values", 1, 1, FEARG_1, arg1_dict_any,
ret_list_member, f_values},
{"virtcol", 1, 2, FEARG_1, arg2_string_or_list_bool,
diff --git a/src/proto/strings.pro b/src/proto/strings.pro
index 602208831..a72e1ff5e 100644
--- a/src/proto/strings.pro
+++ b/src/proto/strings.pro
@@ -36,12 +36,14 @@ void f_string(typval_T *argvars, typval_T *rettv);
void f_strlen(typval_T *argvars, typval_T *rettv);
void f_strcharlen(typval_T *argvars, typval_T *rettv);
void f_strchars(typval_T *argvars, typval_T *rettv);
+void f_strutf16len(typval_T *argvars, typval_T *rettv);
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv);
void f_strwidth(typval_T *argvars, typval_T *rettv);
void f_strcharpart(typval_T *argvars, typval_T *rettv);
void f_strpart(typval_T *argvars, typval_T *rettv);
void f_strridx(typval_T *argvars, typval_T *rettv);
void f_strtrans(typval_T *argvars, typval_T *rettv);
+void f_utf16idx(typval_T *argvars, typval_T *rettv);
void f_tolower(typval_T *argvars, typval_T *rettv);
void f_toupper(typval_T *argvars, typval_T *rettv);
void f_tr(typval_T *argvars, typval_T *rettv);
diff --git a/src/strings.c b/src/strings.c
index 7c868bf1f..7d4281dcd 100644
--- a/src/strings.c
+++ b/src/strings.c
@@ -1006,10 +1006,6 @@ string_reduce(
static void
byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
{
- char_u *t;
- char_u *str;
- varnumber_T idx;
-
rettv->vval.v_number = -1;
if (in_vim9script()
@@ -1017,20 +1013,42 @@ byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
|| check_for_number_arg(argvars, 1) == FAIL))
return;
- str = tv_get_string_chk(&argvars[0]);
- idx = tv_get_number_chk(&argvars[1], NULL);
+ char_u *str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
- t = str;
+ varnumber_T utf16idx = FALSE;
+ if (argvars[2].v_type != VAR_UNKNOWN)
+ {
+ utf16idx = tv_get_bool(&argvars[2]);
+ if (utf16idx < 0 || utf16idx > 1)
+ {
+ semsg(_(e_using_number_as_bool_nr), utf16idx);
+ return;
+ }
+ }
+
+ int (*ptr2len)(char_u *);
+ if (enc_utf8 && comp)
+ ptr2len = utf_ptr2len;
+ else
+ ptr2len = mb_ptr2len;
+
+ char_u *t = str;
for ( ; idx > 0; idx--)
{
if (*t == NUL) // EOL reached
return;
- if (enc_utf8 && comp)
- t += utf_ptr2len(t);
- else
- t += (*mb_ptr2len)(t);
+ if (utf16idx)
+ {
+ int clen = ptr2len(t);
+ int c = (clen > 1) ? utf_ptr2char(t) : *t;
+ if (c > 0xFFFF)
+ idx--;
+ }
+ if (idx > 0)
+ t += ptr2len(t);
}
rettv->vval.v_number = (varnumber_T)(t - str);
}
@@ -1059,42 +1077,49 @@ f_byteidxcomp(typval_T *argvars, typval_T *rettv)
void
f_charidx(typval_T *argvars, typval_T *rettv)
{
- char_u *str;
- varnumber_T idx;
- varnumber_T countcc = FALSE;
- char_u *p;
- int len;
- int (*ptr2len)(char_u *);
-
rettv->vval.v_number = -1;
- if ((check_for_string_arg(argvars, 0) == FAIL
+ if (check_for_string_arg(argvars, 0) == FAIL
|| check_for_number_arg(argvars, 1) == FAIL
- || check_for_opt_bool_arg(argvars, 2) == FAIL))
+ || check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && check_for_opt_bool_arg(argvars, 3) == FAIL))
return;
- str = tv_get_string_chk(&argvars[0]);
- idx = tv_get_number_chk(&argvars[1], NULL);
+ char_u *str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0)
return;
+ varnumber_T countcc = FALSE;
+ varnumber_T utf16idx = FALSE;
if (argvars[2].v_type != VAR_UNKNOWN)
- countcc = tv_get_bool(&argvars[2]);
- if (countcc < 0 || countcc > 1)
{
- semsg(_(e_using_number_as_bool_nr), countcc);
- return;
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN)
+ utf16idx = tv_get_bool(&argvars[3]);
}
+ int (*ptr2len)(char_u *);
if (enc_utf8 && countcc)
ptr2len = utf_ptr2len;
else
ptr2len = mb_ptr2len;
- for (p = str, len = 0; p <= str + idx; len++)
+ char_u *p;
+ int len;
+ for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
{
if (*p == NUL)
return;
+ if (utf16idx)
+ {
+ idx--;
+ int clen = ptr2len(p);
+ int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF)
+ idx--;
+ }
p += ptr2len(p);
}
@@ -1359,6 +1384,38 @@ f_strchars(typval_T *argvars, typval_T *rettv)
}
/*
+ * "strutf16len()" function
+ */
+ void
+f_strutf16len(typval_T *argvars, typval_T *rettv)
+{
+ rettv->vval.v_number = -1;
+
+ if (check_for_string_arg(argvars, 0) == FAIL
+ || check_for_opt_bool_arg(argvars, 1) == FAIL)
+ return;
+
+ varnumber_T countcc = FALSE;
+ if (argvars[1].v_type != VAR_UNKNOWN)
+ countcc = tv_get_bool(&argvars[1]);
+
+ char_u *s = tv_get_string(&argvars[0]);
+ varnumber_T len = 0;
+ int (*func_mb_ptr2char_adv)(char_u **pp);
+ int ch;
+
+ func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
+ while (*s != NUL)
+ {
+ ch = func_mb_ptr2char_adv(&s);
+ if (ch > 0xFFFF)
+ ++len;
+ ++len;
+ }
+ rettv->vval.v_number = len;
+}
+
+/*
* "strdisplaywidth()" function
*/
void
@@ -1619,6 +1676,61 @@ f_strtrans(typval_T *argvars, typval_T *rettv)
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
}
+
+/*
+ *
+ * "utf16idx()" function
+ */
+ void
+f_utf16idx(typval_T *argvars, typval_T *rettv)
+{
+ rettv->vval.v_number = -1;
+
+ if (check_for_string_arg(argvars, 0) == FAIL
+ || check_for_opt_number_arg(argvars, 1) == FAIL
+ || check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && check_for_opt_bool_arg(argvars, 3) == FAIL))
+ return;
+
+ char_u *str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0)
+ return;
+
+ varnumber_T countcc = FALSE;
+ varnumber_T charidx = FALSE;
+ if (argvars[2].v_type != VAR_UNKNOWN)
+ {
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN)
+ charidx = tv_get_bool(&argvars[3]);
+ }
+
+ int (*ptr2len)(char_u *);
+ if (enc_utf8 && countcc)
+ ptr2len = utf_ptr2len;
+ else
+ ptr2len = mb_ptr2len;
+
+ char_u *p;
+ int len;
+ for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
+ {
+ if (*p == NUL)
+ return;
+ int clen = ptr2len(p);
+ int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF)
+ len++;
+ p += ptr2len(p);
+ if (charidx)
+ idx--;
+ }
+
+ rettv->vval.v_number = len > 0 ? len - 1 : 0;
+}
+
/*
* "tolower(string)" function
*/
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index 3bea88df1..e32c4f5ff 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -1192,19 +1192,14 @@ func Test_byte2line_line2byte()
bw!
endfunc
-" Test for byteidx() and byteidxcomp() functions
+" Test for byteidx() using a character index
func Test_byteidx()
let a = '.茅.' " one char of two bytes
call assert_equal(0, byteidx(a, 0))
- call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidx(a, 1))
- call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidx(a, 2))
- call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidx(a, 3))
- call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidx(a, 4))
- call assert_equal(-1, byteidxcomp(a, 4))
let b = '.e虂.' " normal e with composing char
call assert_equal(0, b->byteidx(0))
@@ -1212,18 +1207,184 @@ func Test_byteidx()
call assert_equal(4, b->byteidx(2))
call assert_equal(5, b->byteidx(3))
call assert_equal(-1, b->byteidx(4))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ call assert_equal(0, byteidx(str, 0))
+ call assert_equal(1, byteidx(str, 1))
+ call assert_equal(6, byteidx(str, 2))
+ call assert_equal(7, byteidx(str, 3))
+ call assert_equal(12, byteidx(str, 4))
+ call assert_equal(-1, byteidx(str, 5))
+
+ " empty string
+ call assert_equal(0, byteidx('', 0))
+ call assert_equal(-1, byteidx('', 1))
+
+ " error cases
call assert_fails("call byteidx([], 0)", 'E730:')
+ call assert_fails("call byteidx('abc', [])", 'E745:')
+endfunc
+
+" Test for byteidxcomp() using a character index
+func Test_byteidxcomp()
+ let a = '.茅.' " one char of two bytes
+ call assert_equal(0, byteidxcomp(a, 0))
+ call assert_equal(1, byteidxcomp(a, 1))
+ call assert_equal(3, byteidxcomp(a, 2))
+ call assert_equal(4, byteidxcomp(a, 3))
+ call assert_equal(-1, byteidxcomp(a, 4))
+ let b = '.e虂.' " normal e with composing char
call assert_equal(0, b->byteidxcomp(0))
call assert_equal(1, b->byteidxcomp(1))
call assert_equal(2, b->byteidxcomp(2))
call assert_equal(4, b->byteidxcomp(3))
call assert_equal(5, b->byteidxcomp(4))
call assert_equal(-1, b->byteidxcomp(5))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ call assert_equal(0, byteidxcomp(str, 0))
+ call assert_equal(1, byteidxcomp(str, 1))
+ call assert_equal(2, byteidxcomp(str, 2))
+ call assert_equal(4, byteidxcomp(str, 3))
+ call assert_equal(6, byteidxcomp(str, 4))
+ call assert_equal(7, byteidxcomp(str, 5))
+ call assert_equal(8, byteidxcomp(str, 6))
+ call assert_equal(10, byteidxcomp(str, 7))
+ call assert_equal(12, byteidxcomp(str, 8))
+ call assert_equal(-1, byteidxcomp(str, 9))
+
+ " empty string
+ call assert_equal(0, byteidxcomp('', 0))
+ call assert_equal(-1, byteidxcomp('', 1))
+
+ " error cases
call assert_fails("call byteidxcomp([], 0)", 'E730:')
+ call assert_fails("call byteidxcomp('abc', [])", 'E745:')
endfunc
-" Test for charidx()
+" Test for byteidx() using a UTF-16 index
+func Test_byteidx_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, byteidx(str, i, v:true))
+ endfor
+ call assert_equal(3, byteidx(str, 3, v:true))
+ call assert_equal(-1, byteidx(str, 4, v:true))
+
+ " string with two byte characters
+ let str = "a漏漏b"
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(3, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(6, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " string with two byte characters
+ let str = "a馃槉馃槉b"
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(1, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(5, byteidx(str, 4, v:true))
+ call assert_equal(9, byteidx(str, 5, v:true))
+ call assert_equal(10, byteidx(str, 6, v:true))
+ call assert_equal(-1, byteidx(str, 7, v:true))
+
+ " string with composing characters
+ let str = '-a虂-b虂'
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(4, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(8, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(6, byteidx(str, 2, v:true))
+ call assert_equal(7, byteidx(str, 3, v:true))
+ call assert_equal(12, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " empty string
+ call assert_equal(0, byteidx('', 0, v:true))
+ call assert_equal(-1, byteidx('', 1, v:true))
+
+ " error cases
+ call assert_fails('call byteidx(str, 0, [])', 'E745:')
+endfunc
+
+" Test for byteidxcomp() using a UTF-16 index
+func Test_byteidxcomp_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, byteidxcomp(str, i, v:true))
+ endfor
+ call assert_equal(3, byteidxcomp(str, 3, v:true))
+ call assert_equal(-1, byteidxcomp(str, 4, v:true))
+
+ " string with two byte characters
+ let str = "a漏漏b"
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(3, byteidxcomp(str, 2, v:true))
+ call assert_equal(5, byteidxcomp(str, 3, v:true))
+ call assert_equal(6, byteidxcomp(str, 4, v:true))
+ call assert_equal(-1, byteidxcomp(str, 5, v:true))
+
+ " string with two byte characters
+ let str = "a馃槉馃槉b"
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(1, byteidxcomp(str, 2, v:true))
+ call assert_equal(5, byteidxcomp(str, 3, v:true))
+ call assert_equal(5, byteidxcomp(str, 4, v:true))
+ call assert_equal(9, byteidxcomp(str, 5, v:true))
+ call assert_equal(10, byteidxcomp(str, 6, v:true))
+ call assert_equal(-1, byteidxcomp(str, 7, v:true))
+
+ " string with composing characters
+ let str = '-a虂-b虂'
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(2, byteidxcomp(str, 2, v:true))
+ call assert_equal(4, byteidxcomp(str, 3, v:true))
+ call assert_equal(5, byteidxcomp(str, 4, v:true))
+ call assert_equal(6, byteidxcomp(str, 5, v:true))
+ call assert_equal(8, byteidxcomp(str, 6, v:true))
+ call assert_equal(-1, byteidxcomp(str, 7, v:true))
+ call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(2, byteidxcomp(str, 2, v:true))
+ call assert_equal(4, byteidxcomp(str, 3, v:true))
+ call assert_equal(6, byteidxcomp(str, 4, v:true))
+ call assert_equal(7, byteidxcomp(str, 5, v:true))
+ call assert_equal(8, byteidxcomp(str, 6, v:true))
+ call assert_equal(10, byteidxcomp(str, 7, v:true))
+ call assert_equal(12, byteidxcomp(str, 8, v:true))
+ call assert_equal(-1, byteidxcomp(str, 9, v:true))
+
+ " empty string
+ call assert_equal(0, byteidxcomp('', 0, v:true))
+ call assert_equal(-1, byteidxcomp('', 1, v:true))
+
+ " error cases
+ call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
+endfunc
+
+" Test for charidx() using a byte index
func Test_charidx()
let a = 'xa虂b虂y'
call assert_equal(0, charidx(a, 0))
@@ -1232,17 +1393,20 @@ func Test_charidx()
call assert_equal(3, charidx(a, 7))
call assert_equal(-1, charidx(a, 8))
call assert_equal(-1, charidx(a, -1))
- call assert_equal(-1, charidx('', 0))
- call assert_equal(-1, charidx(test_null_string(), 0))
" count composing characters
- call assert_equal(0, charidx(a, 0, 1))
- call assert_equal(2, charidx(a, 2, 1))
- call assert_equal(3, charidx(a, 4, 1))
- call assert_equal(5, charidx(a, 7, 1))
- call assert_equal(-1, charidx(a, 8, 1))
+ call assert_equal(0, a->charidx(0, 1))
+ call assert_equal(2, a->charidx(2, 1))
+ call assert_equal(3, a->charidx(4, 1))
+ call assert_equal(5, a->charidx(7, 1))
+ call assert_equal(-1, a->charidx(8, 1))
+
+ " empty string
+ call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx('', 0, 1))
+ " error cases
+ call assert_equal(-1, charidx(test_null_string(), 0))
call assert_fails('let x = charidx([], 1)', 'E1174:')
call assert_fails('let x = charidx("abc", [])', 'E1210:')
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
@@ -1250,6 +1414,237 @@ func Test_charidx()
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
endfunc
+" Test for charidx() using a UTF-16 index
+func Test_charidx_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 3, v:false, v:true))
+
+ " string with two byte characters
+ let str = "a漏漏b"
+ call assert_equal(0, charidx(str, 0, v:false, v:true))
+ call assert_equal(1, charidx(str, 1, v:false, v:true))
+ call assert_equal(2, charidx(str, 2, v:false, v:true))
+ call assert_equal(3, charidx(str, 3, v:false, v:true))
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+
+ " string with four byte characters
+ let str = "a馃槉馃槉b"
+ call assert_equal(0, charidx(str, 0, v:false, v:true))
+ call assert_equal(1, charidx(str, 1, v:false, v:true))
+ call assert_equal(1, charidx(str, 2, v:false, v:true))
+ call assert_equal(2, charidx(str, 3, v:false, v:true))
+ call assert_equal(2, charidx(str, 4, v:false, v:true))
+ call assert_equal(3, charidx(str, 5, v:false, v:true))
+ call assert_equal(-1, charidx(str, 6, v:false, v:true))
+
+ " string with composing characters
+ let str = '-a虂-b虂'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, charidx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 6, v:true, v:true))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, charidx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 8, v:true, v:true))
+
+ " empty string
+ call assert_equal(-1, charidx('', 0, v:false, v:true))
+ call assert_equal(-1, charidx('', 0, v:true, v:true))
+
+ " error cases
+ call assert_equal(-1, charidx('', 0, v:false, v:true))
+ call assert_equal(-1, charidx('', 0, v:true, v:true))
+ call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true))
+ call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
+ call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
+endfunc
+
+" Test for utf16idx() using a byte index
+func Test_utf16idx_from_byteidx()
+ " UTF-16 index of a string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, utf16idx(str, i))
+ endfor
+ call assert_equal(-1, utf16idx(str, 3))
+
+ " UTF-16 index of a string with two byte characters
+ let str = 'a漏漏b'
+ call assert_equal(0, str->utf16idx(0))
+ call assert_equal(1, str->utf16idx(1))
+ call assert_equal(1, str->utf16idx(2))
+ call assert_equal(2, str->utf16idx(3))
+ call assert_equal(2, str->utf16idx(4))
+ call assert_equal(3, str->utf16idx(5))
+ call assert_equal(-1, str->utf16idx(6))
+
+ " UTF-16 index of a string with four byte characters
+ let str = 'a馃槉馃槉b'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(2, utf16idx(str, 1))
+ call assert_equal(2, utf16idx(str, 2))
+ call assert_equal(2, utf16idx(str, 3))
+ call assert_equal(2, utf16idx(str, 4))
+ call assert_equal(4, utf16idx(str, 5))
+ call assert_equal(4, utf16idx(str, 6))
+ call assert_equal(4, utf16idx(str, 7))
+ call assert_equal(4, utf16idx(str, 8))
+ call assert_equal(5, utf16idx(str, 9))
+ call assert_equal(-1, utf16idx(str, 10))
+
+ " UTF-16 index of a string with composing characters
+ let str = '-a虂-b虂'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(1, utf16idx(str, 1))
+ call assert_equal(1, utf16idx(str, 2))
+ call assert_equal(1, utf16idx(str, 3))
+ call assert_equal(2, utf16idx(str, 4))
+ call assert_equal(3, utf16idx(str, 5))
+ call assert_equal(3, utf16idx(str, 6))
+ call assert_equal(3, utf16idx(str, 7))
+ call assert_equal(-1, utf16idx(str, 8))
+ call assert_equal(0, utf16idx(str, 0, v:true))
+ call assert_equal(1, utf16idx(str, 1, v:true))
+ call assert_equal(2, utf16idx(str, 2, v:true))
+ call assert_equal(2, utf16idx(str, 3, v:true))
+ call assert_equal(3, utf16idx(str, 4, v:true))
+ call assert_equal(4, utf16idx(str, 5, v:true))
+ call assert_equal(5, utf16idx(str, 6, v:true))
+ call assert_equal(5, utf16idx(str, 7, v:true))
+ call assert_equal(-1, utf16idx(str, 8, v:true))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(1, utf16idx(str, 1))
+ call assert_equal(1, utf16idx(str, 2))
+ call assert_equal(1, utf16idx(str, 3))
+ call assert_equal(1, utf16idx(str, 4))
+ call assert_equal(1, utf16idx(str, 5))
+ call assert_equal(2, utf16idx(str, 6))
+ call assert_equal(3, utf16idx(str, 7))
+ call assert_equal(3, utf16idx(str, 8))
+ call assert_equal(3, utf16idx(str, 9))
+ call assert_equal(3, utf16idx(str, 10))
+ call assert_equal(3, utf16idx(str, 11))
+ call assert_equal(-1, utf16idx(str, 12))
+ call assert_equal(0, utf16idx(str, 0, v:true))
+ call assert_equal(1, utf16idx(str, 1, v:true))
+ call assert_equal(2, utf16idx(str, 2, v:true))
+ call assert_equal(2, utf16idx(str, 3, v:true))
+ call assert_equal(3, utf16idx(str, 4, v:true))
+ call assert_equal(3, utf16idx(str, 5, v:true))
+ call assert_equal(4, utf16idx(str, 6, v:true))
+ call assert_equal(5, utf16idx(str, 7, v:true))
+ call assert_equal(6, utf16idx(str, 8, v:true))
+ call assert_equal(6, utf16idx(str, 9, v:true))
+ call assert_equal(7, utf16idx(str, 10, v:true))
+ call assert_equal(7, utf16idx(str, 11, v:true))
+ call assert_equal(-1, utf16idx(str, 12, v:true))
+
+ " empty string
+ call assert_equal(-1, utf16idx('', 0))
+ call assert_equal(-1, utf16idx('', 0, v:true))
+
+ " error cases
+ call assert_equal(-1, utf16idx("", 0))
+ call assert_equal(-1, utf16idx("abc", -1))
+ call assert_equal(-1, utf16idx(test_null_string(), 0))
+ call assert_fails('let l = utf16idx([], 0)', 'E1174:')
+ call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
+ call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
+endfunc
+
+" Test for utf16idx() using a character index
+func Test_utf16idx_from_charidx()
+ let str = "abc"
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
+
+ " UTF-16 index of a string with two byte characters
+ let str = "a漏漏b"
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+
+ " UTF-16 index of a string with four byte characters
+ let str = "a馃槉馃槉b"
+ call assert_equal(0, utf16idx(str, 0, v:false, v:true))
+ call assert_equal(2, utf16idx(str, 1, v:false, v:true))
+ call assert_equal(4, utf16idx(str, 2, v:false, v:true))
+ call assert_equal(5, utf16idx(str, 3, v:false, v:true))
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+
+ " UTF-16 index of a string with composing characters
+ let str = '-a虂-b虂'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, utf16idx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
+
+ " string with multiple composing characters
+ let str = '-a台虂-a台虂'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, utf16idx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
+
+ " empty string
+ call assert_equal(-1, utf16idx('', 0, v:false, v:true))
+ call assert_equal(-1, utf16idx('', 0, v:true, v:true))
+
+ " error cases
+ call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true))
+ call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
+endfunc
+
+" Test for strutf16len()
+func Test_strutf16len()
+ call assert_equal(3, strutf16len('abc'))
+ call assert_equal(3, 'abc'->strutf16len(v:true))
+ call assert_equal(4, strutf16len('a漏漏b'))
+ call assert_equal(4, strutf16len('a漏漏b', v:true))
+ call assert_equal(6, strutf16len('a馃槉馃槉b'))
+ call assert_equal(6, strutf16len('a馃槉馃槉b', v:true))
+ call assert_equal(4, strutf16len('-a虂-b虂'))
+ call assert_equal(6, strutf16len('-a虂-b虂', v:true))
+ call assert_equal(4, strutf16len('-a台虂-a台虂'))
+ call assert_equal(8, strutf16len('-a台虂-a台虂', v:true))
+ call assert_equal(0, strutf16len(''))
+
+ " error cases
+ call assert_fails('let l = strutf16len([])', 'E1174:')
+ call assert_fails('let l = strutf16len("a", [])', 'E1212:')
+ call assert_equal(0, strutf16len(test_null_string()))
+endfunc
+
func Test_count()
let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a'))
@@ -3074,5 +3469,4 @@ func Test_delfunc_while_listing()
call StopVimInTerminal(buf)
endfunc
-
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index e5099dd47..0ce90ebc5 100644
--- a/src/version.c
+++ b/src/version.c
@@ -696,6 +696,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 1485,
+/**/
1484,
/**/
1483,