diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-07-03 17:24:16 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-07-03 17:24:16 +0400 |
commit | 9ad8ff666c4876de270f80f180b42eceac76b6f0 (patch) | |
tree | 76dc817c9030ba0c61e6f8aaa828d668dd9defd3 /strings/strcoll.ic | |
parent | 95d07ee408abd98769093759a076f4665a176d77 (diff) | |
download | mariadb-git-9ad8ff666c4876de270f80f180b42eceac76b6f0.tar.gz |
MDEV-8415 utf8: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'strings/strcoll.ic')
-rw-r--r-- | strings/strcoll.ic | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 693252b3052..31f610c4397 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -65,7 +65,9 @@ The including source file must define the following macros: IS_MB1_CHAR(x) + IS_MB1_MB2HEAD_GAP(x) - optional, for better performance IS_MB2_CHAR(x,y) + IS_MB3_CHAR(x,y,z) - for character sets with mbmaxlen>2 WEIGHT_PAD_SPACE WEIGHT_MB1(x) WEIGHT_MB2(x,y) @@ -86,6 +88,16 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end) return 1; } +#ifdef IS_MB1_MBHEAD_UNUSED_GAP + /* + Quickly filter out unused bytes that are neither MB1 nor MBHEAD. + E.g. [0x80..0xC1] in utf8. This allows using simplified conditions + in IS_MB2_CHAR(), IS_MB3_CHAR(), etc. + */ + if (IS_MB1_MBHEAD_UNUSED_GAP(*str)) + goto bad; +#endif + if (str + 2 > end) /* The string ended unexpectedly */ goto bad; /* Treat as a bad byte */ |