summaryrefslogtreecommitdiff
path: root/strings/ctype-uca.c
diff options
context:
space:
mode:
authorGeorgi Kodinov <Georgi.Kodinov@Oracle.com>2011-07-22 15:54:47 +0300
committerGeorgi Kodinov <Georgi.Kodinov@Oracle.com>2011-07-22 15:54:47 +0300
commit50af230e655b773bd1024c8f575ac923ceb7b89b (patch)
treecbb0e1350aed9497cec1f75d21e26d289902dcc6 /strings/ctype-uca.c
parent4ba303414924c5adaa0ea11cdc155d141ad72edd (diff)
downloadmariadb-git-50af230e655b773bd1024c8f575ac923ceb7b89b.tar.gz
Bug #12319710: INVALID MEMORY READ AND/OR CRASH IN MY_UCA_CHARCMP
WITH UTF32 The 5.5 version of the UTF32 collation was not enforcing the BMP range that it currently supports when comparing with LIKE. Fixed by backporting the checks for the BMP from trunk. Added a named constant for the maximum character that can have a weight in the weight table.
Diffstat (limited to 'strings/ctype-uca.c')
-rw-r--r--strings/ctype-uca.c54
1 files changed, 45 insertions, 9 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index d6be395535c..70d2df3bab9 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -42,6 +42,7 @@
#define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255
#define MY_UCA_PSHIFT 8
+#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF
uint16 page000data[]= { /* 0000 (4 weights per char) */
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
@@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
return -1;
scanner->sbeg+= mb_len;
- if (wc > 0xFFFF)
+ if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
@@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
+/**
+ Helper function:
+ Find address of weights of the given character.
+
+ @param weights UCA weight array
+ @param lengths UCA length array
+ @param ch character Unicode code point
+
+ @return Weight array
+ @retval pointer to weight array for the given character,
+ or NULL if this page does not have implicit weights.
+*/
+
+static inline uint16 *
+my_char_weight_addr(CHARSET_INFO *cs, uint wc)
+{
+ uint page, ofst;
+ uchar *ucal= cs->sort_order;
+ uint16 **ucaw= cs->sort_order_big;
+
+ return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL :
+ (ucaw[page= (wc >> 8)] ?
+ ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] :
+ NULL);
+}
+
+
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
@@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
- size_t page1= wc1 >> MY_UCA_PSHIFT;
- size_t page2= wc2 >> MY_UCA_PSHIFT;
- uchar *ucal= cs->sort_order;
- uint16 **ucaw= cs->sort_order_big;
- size_t length1= ucal[page1];
- size_t length2= ucal[page2];
- uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1];
- uint16 *weight2= ucaw[page2] + (wc2 & MY_UCA_CMASK) * ucal[page2];
+ size_t length1, length2;
+ uint16 *weight1= my_char_weight_addr(cs, wc1);
+ uint16 *weight2= my_char_weight_addr(cs, wc2);
if (!weight1 || !weight2)
return wc1 != wc2;
+
+ /* Quickly compare first weights */
+ if (weight1[0] != weight2[0])
+ return 1;
+
+ /* Thoroughly compare all weights */
+ length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT];
+ length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT];
if (length1 > length2)
return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
*/
for (i=0; i < rc; i++)
{
+ /* check if the shift or the reset characters are out of range */
+ if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ||
+ rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
+ return 1;
+
if (!rule[i].curr[1]) /* If not a contraction */
{
uint pageb= (rule[i].base >> 8) & 0xFF;