summaryrefslogtreecommitdiff
path: root/strings/strcoll.ic
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-07-03 17:24:16 +0400
committerAlexander Barkov <bar@mariadb.org>2015-07-03 17:24:16 +0400
commit9ad8ff666c4876de270f80f180b42eceac76b6f0 (patch)
tree76dc817c9030ba0c61e6f8aaa828d668dd9defd3 /strings/strcoll.ic
parent95d07ee408abd98769093759a076f4665a176d77 (diff)
downloadmariadb-git-9ad8ff666c4876de270f80f180b42eceac76b6f0.tar.gz
MDEV-8415 utf8: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'strings/strcoll.ic')
-rw-r--r--strings/strcoll.ic12
1 files changed, 12 insertions, 0 deletions
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index 693252b3052..31f610c4397 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -65,7 +65,9 @@
The including source file must define the following macros:
IS_MB1_CHAR(x)
+ IS_MB1_MB2HEAD_GAP(x) - optional, for better performance
IS_MB2_CHAR(x,y)
+ IS_MB3_CHAR(x,y,z) - for character sets with mbmaxlen>2
WEIGHT_PAD_SPACE
WEIGHT_MB1(x)
WEIGHT_MB2(x,y)
@@ -86,6 +88,16 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
return 1;
}
+#ifdef IS_MB1_MBHEAD_UNUSED_GAP
+ /*
+ Quickly filter out unused bytes that are neither MB1 nor MBHEAD.
+ E.g. [0x80..0xC1] in utf8. This allows using simplified conditions
+ in IS_MB2_CHAR(), IS_MB3_CHAR(), etc.
+ */
+ if (IS_MB1_MBHEAD_UNUSED_GAP(*str))
+ goto bad;
+#endif
+
if (str + 2 > end) /* The string ended unexpectedly */
goto bad; /* Treat as a bad byte */