From 0fa313a71870ccc2ba63da25a7abea850f5b3d02 Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Wed, 10 Aug 2005 21:07:57 +0000 Subject: updated for version 7.0127 --- src/mbyte.c | 92 ++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 37 deletions(-) (limited to 'src/mbyte.c') diff --git a/src/mbyte.c b/src/mbyte.c index a0063120b..61fd6980a 100644 --- a/src/mbyte.c +++ b/src/mbyte.c @@ -126,7 +126,7 @@ static int enc_canon_search __ARGS((char_u *name)); static int dbcs_char2len __ARGS((int c)); static int dbcs_char2bytes __ARGS((int c, char_u *buf)); -static int dbcs_ptr2len_check __ARGS((char_u *p)); +static int dbcs_ptr2len __ARGS((char_u *p)); static int dbcs_char2cells __ARGS((int c)); static int dbcs_ptr2char __ARGS((char_u *p)); @@ -589,7 +589,7 @@ codepage_invalid: */ if (enc_utf8) { - mb_ptr2len_check = utfc_ptr2len_check; + mb_ptr2len = utfc_ptr2len; mb_char2len = utf_char2len; mb_char2bytes = utf_char2bytes; mb_ptr2cells = utf_ptr2cells; @@ -600,7 +600,7 @@ codepage_invalid: } else if (enc_dbcs != 0) { - mb_ptr2len_check = dbcs_ptr2len_check; + mb_ptr2len = dbcs_ptr2len; mb_char2len = dbcs_char2len; mb_char2bytes = dbcs_char2bytes; mb_ptr2cells = dbcs_ptr2cells; @@ -611,7 +611,7 @@ codepage_invalid: } else { - mb_ptr2len_check = latin_ptr2len_check; + mb_ptr2len = latin_ptr2len; mb_char2len = latin_char2len; mb_char2bytes = latin_char2bytes; mb_ptr2cells = latin_ptr2cells; @@ -1054,21 +1054,21 @@ dbcs_char2bytes(c, buf) } /* - * mb_ptr2len_check() function pointer. + * mb_ptr2len() function pointer. * Get byte length of character at "*p" but stop at a NUL. * For UTF-8 this includes following composing characters. * Returns 0 when *p is NUL. * */ int -latin_ptr2len_check(p) +latin_ptr2len(p) char_u *p; { return MB_BYTE2LEN(*p); } static int -dbcs_ptr2len_check(p) +dbcs_ptr2len(p) char_u *p; { int len; @@ -1255,7 +1255,7 @@ utf_ptr2cells(p) { c = utf_ptr2char(p); /* An illegal byte is displayed as . */ - if (utf_ptr2len_check(p) == 1 || c == NUL) + if (utf_ptr2len(p) == 1 || c == NUL) return 4; /* If the char is ASCII it must be an overlong sequence. */ if (c < 0x80) @@ -1411,7 +1411,25 @@ mb_ptr2char_adv(pp) int c; c = (*mb_ptr2char)(*pp); - *pp += (*mb_ptr2len_check)(*pp); + *pp += (*mb_ptr2len)(*pp); + return c; +} + +/* + * Get character at **pp and advance *pp to the next character. + * Note: composing characters are returned as separate characters. + */ + int +mb_cptr2char_adv(pp) + char_u **pp; +{ + int c; + + c = (*mb_ptr2char)(*pp); + if (enc_utf8) + *pp += utf_ptr2len(*pp); + else + *pp += (*mb_ptr2len)(*pp); return c; } @@ -1482,14 +1500,14 @@ utfc_ptr2char(p, p1, p2) int cc; c = utf_ptr2char(p); - len = utf_ptr2len_check(p); + len = utf_ptr2len(p); /* Only accept a composing char when the first char isn't illegal. */ if ((len > 1 || *p < 0x80) && p[len] >= 0x80 && UTF_COMPOSINGLIKE(p, p + len)) { *p1 = utf_ptr2char(p + len); - len += utf_ptr2len_check(p + len); + len += utf_ptr2len(p + len); if (p[len] >= 0x80 && utf_iscomposing(cc = utf_ptr2char(p + len))) *p2 = cc; else @@ -1519,7 +1537,7 @@ utfc_ptr2char_len(p, p1, p2, maxlen) int cc; c = utf_ptr2char(p); - len = utf_ptr2len_check_len(p, maxlen); + len = utf_ptr2len_len(p, maxlen); /* Only accept a composing char when the first char isn't illegal. */ if ((len > 1 || *p < 0x80) && len < maxlen @@ -1527,7 +1545,7 @@ utfc_ptr2char_len(p, p1, p2, maxlen) && UTF_COMPOSINGLIKE(p, p + len)) { *p1 = utf_ptr2char(p + len); - len += utf_ptr2len_check_len(p + len, maxlen - len); + len += utf_ptr2len_len(p + len, maxlen - len); if (len < maxlen && p[len] >= 0x80 && utf_iscomposing(cc = utf_ptr2char(p + len))) @@ -1573,7 +1591,7 @@ utfc_char2bytes(off, buf) * Returns 1 for an illegal byte sequence. */ int -utf_ptr2len_check(p) +utf_ptr2len(p) char_u *p; { int len; @@ -1607,7 +1625,7 @@ utf_byte2len(b) * Returns number > "size" for an incomplete byte sequence. */ int -utf_ptr2len_check_len(p, size) +utf_ptr2len_len(p, size) char_u *p; int size; { @@ -1630,7 +1648,7 @@ utf_ptr2len_check_len(p, size) * This includes following composing characters. */ int -utfc_ptr2len_check(p) +utfc_ptr2len(p) char_u *p; { int len; @@ -1645,7 +1663,7 @@ utfc_ptr2len_check(p) return 1; /* Skip over first UTF-8 char, stopping at a NUL byte. */ - len = utf_ptr2len_check(p); + len = utf_ptr2len(p); /* Check for illegal byte. */ if (len == 1 && b0 >= 0x80) @@ -1667,7 +1685,7 @@ utfc_ptr2len_check(p) #ifdef FEAT_ARABIC prevlen = len; #endif - len += utf_ptr2len_check(p + len); + len += utf_ptr2len(p + len); } } @@ -1677,7 +1695,7 @@ utfc_ptr2len_check(p) * Returns 1 for an illegal char or an incomplete byte sequence. */ int -utfc_ptr2len_check_len(p, size) +utfc_ptr2len_len(p, size) char_u *p; int size; { @@ -1692,7 +1710,7 @@ utfc_ptr2len_check_len(p, size) return 1; /* Skip over first UTF-8 char, stopping at a NUL byte. */ - len = utf_ptr2len_check_len(p, size); + len = utf_ptr2len_len(p, size); /* Check for illegal byte and incomplete byte sequence. */ if ((len == 1 && p[0] >= 0x80) || len > size) @@ -1714,7 +1732,7 @@ utfc_ptr2len_check_len(p, size) #ifdef FEAT_ARABIC prevlen = len; #endif - len += utf_ptr2len_check_len(p + len, size - len); + len += utf_ptr2len_len(p + len, size - len); } return len; } @@ -2276,7 +2294,7 @@ mb_strnicmp(s1, s2, nn) } else { - l = (*mb_ptr2len_check)(s1 + i); + l = (*mb_ptr2len)(s1 + i); if (l <= 1) { /* Single byte: first check normally, then with ignore case. */ @@ -2317,7 +2335,7 @@ show_utf8() /* Get the byte length of the char under the cursor, including composing * characters. */ line = ml_get_cursor(); - len = utfc_ptr2len_check(line); + len = utfc_ptr2len(line); if (len == 0) { MSG("NUL"); @@ -2335,7 +2353,7 @@ show_utf8() STRCPY(IObuff + rlen, "+ "); rlen += 2; } - clen = utf_ptr2len_check(line + i); + clen = utf_ptr2len(line + i); } sprintf((char *)IObuff + rlen, "%02x ", line[i]); --clen; @@ -2377,7 +2395,7 @@ dbcs_head_off(base, p) * byte we are looking for. Return 1 when we went past it, 0 otherwise. */ q = base; while (q < p) - q += dbcs_ptr2len_check(q); + q += dbcs_ptr2len(q); return (q == p) ? 0 : 1; } @@ -2413,7 +2431,7 @@ dbcs_screen_head_off(base, p) if (enc_dbcs == DBCS_JPNU && *q == 0x8e) ++q; else - q += dbcs_ptr2len_check(q); + q += dbcs_ptr2len(q); } return (q == p) ? 0 : 1; } @@ -2485,7 +2503,7 @@ mb_copy_char(fp, tp) char_u **fp; char_u **tp; { - int l = (*mb_ptr2len_check)(*fp); + int l = (*mb_ptr2len)(*fp); mch_memmove(*tp, *fp, (size_t)l); *tp += l; @@ -2677,8 +2695,8 @@ mb_prevptr(line, p) } /* - * Return the character length of "str". Each multi-byte character counts as - * one. + * Return the character length of "str". Each multi-byte character (with + * following composing characters) counts as one. */ int mb_charlen(str) @@ -2690,7 +2708,7 @@ mb_charlen(str) return 0; for (count = 0; *str != NUL; count++) - str += (*mb_ptr2len_check)(str); + str += (*mb_ptr2len)(str); return count; } @@ -2742,7 +2760,7 @@ mb_unescape(pp) /* Return a multi-byte character if it's found. An illegal sequence * will result in a 1 here. */ - if ((*mb_ptr2len_check)(buf) > 1) + if ((*mb_ptr2len)(buf) > 1) { *pp = str + n + 1; return buf; @@ -3158,10 +3176,10 @@ iconv_string(vcp, str, slen, unconvlenp) if ((*mb_ptr2cells)((char_u *)from) > 1) *to++ = '?'; if (enc_utf8) - l = utfc_ptr2len_check_len((char_u *)from, fromlen); + l = utfc_ptr2len_len((char_u *)from, fromlen); else { - l = (*mb_ptr2len_check)((char_u *)from); + l = (*mb_ptr2len)((char_u *)from); if (l > (int)fromlen) l = fromlen; } @@ -3465,7 +3483,7 @@ im_commit_cb(GtkIMContext *context, const gchar *str, gpointer data) else im_str = (char_u *)str; clen = 0; - for (p = im_str; p < im_str + len; p += (*mb_ptr2len_check)(p)) + for (p = im_str; p < im_str + len; p += (*mb_ptr2len)(p)) clen += (*mb_ptr2cells)(p); if (input_conv.vc_type != CONV_NONE) vim_free(im_str); @@ -3731,7 +3749,7 @@ im_get_feedback_attr(int col) /* Get the byte index as used by PangoAttrIterator */ for (index = 0; col > 0 && preedit_string[index] != '\0'; --col) - index += utfc_ptr2len_check((char_u *)preedit_string + index); + index += utfc_ptr2len((char_u *)preedit_string + index); if (preedit_string[index] != '\0') { @@ -5233,7 +5251,7 @@ preedit_draw_cbproc(XIC xic, XPointer client_data, XPointer call_data) } } if (has_mbyte) - ptr += mb_ptr2len_check(ptr); + ptr += (*mb_ptr2len)(ptr); else #endif ptr++; @@ -5846,7 +5864,7 @@ string_convert_ext(vcp, ptr, lenp, unconvlenp) d = retval; for (i = 0; i < len; ++i) { - l = utf_ptr2len_check(ptr + i); + l = utf_ptr2len(ptr + i); if (l == 0) *d++ = NUL; else if (l == 1) -- cgit v1.2.1