summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2020-05-21 18:16:27 +0400
committerAlexander Barkov <bar@mariadb.com>2020-05-22 07:47:49 +0400
commitcb9c49a9b20dd8d1eee39641176134e207a4d84f (patch)
tree8cf85c16991abe5834eb61fe5f1a592276f231e9
parent836d70899780f28ea2a6870428ff6daac21ca993 (diff)
downloadmariadb-git-cb9c49a9b20dd8d1eee39641176134e207a4d84f.tar.gz
MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
The code incorrectly assumed in multiple places that TYPELIB values cannot have 0x00 bytes inside. In fact they can: CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY); Note, the TYPELIB value encoding used in FRM is ambiguous about 0x00. So this fix is partial. It fixes 0x00 bytes in many (but not all) places: - In the middle or in the end of a value: CREATE TABLE t1 (a ENUM(0x6100) ...); CREATE TABLE t1 (a ENUM(0x610062) ...); - In the beginning of the first value: CREATE TABLE t1 (a ENUM(0x0061)); CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b')); - In the beginning of the second (and following) value of the *last* ENUM/SET in the table: CREATE TABLE t1 (a ENUM('a',0x0061)); CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061)); However, it does not fix 0x00 when: - 0x00 byte is in the beginning of a value of a non-last ENUM/SET causes an error: CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b')); ERROR 1033 (HY000): Incorrect information in file: './test/t1.frm' This is an ambuguous case and will be fixed separately. We need a new TYPELIB encoding to fix this. Details: - unireg.cc The function pack_header() incorrectly used strlen() to detect a TYPELIB value length. Adding a new function typelib_values_packed_length() which uses TYPELIB::type_lengths[n] to detect the n-th value length, and reusing the new function in pack_header() and packed_fields_length() - table.cc fix_type_pointers() assumed in multiple places that values cannot have 0x00 inside and used strlen(TYPELIB::type_names[n]) to set the corresponding TYPELIB::type_lengths[n]. Also, fix_type_pointers() did not check the encoded data for consistency. Rewriting fix_type_pointers() code to populate TYPELIB::type_names[n] and TYPELIB::type_lengths[n] at the same time, so no additional loop with strlen() is needed any more. Adding many data consistency tests. Fixing the main loop in fix_type_pointers() to use memchr() instead of strchr() to handle 0x00 properly. Fixing create_key_infos() to return the result in a LEX_STRING rather that in a char*.
-rw-r--r--mysql-test/r/ctype_binary.result93
-rw-r--r--mysql-test/t/ctype_binary.test60
-rw-r--r--sql/table.cc170
-rw-r--r--sql/unireg.cc23
4 files changed, 286 insertions, 60 deletions
diff --git a/mysql-test/r/ctype_binary.result b/mysql-test/r/ctype_binary.result
index 15b44d07ddf..aad9621966f 100644
--- a/mysql-test/r/ctype_binary.result
+++ b/mysql-test/r/ctype_binary.result
@@ -3171,5 +3171,98 @@ Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'a') and (weight_string(`test`.`t1`.`a`) = 'a'))
DROP TABLE t1;
#
+# MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
+#
+CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary');
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a\0','b\0','c\0') CHARACTER SET binary DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT HEX(a) FROM t1 ORDER BY a;
+HEX(a)
+6100
+6200
+6300
+DROP TABLE t1;
+0x00 in the middle or in the end of a value
+CREATE TABLE t1 (a ENUM(0x6100));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a\0') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1);
+SELECT HEX(a) FROM t1;
+HEX(a)
+6100
+DROP TABLE t1;
+CREATE TABLE t1 (a ENUM(0x610062));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a\0b') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1);
+SELECT HEX(a) FROM t1;
+HEX(a)
+610062
+DROP TABLE t1;
+0x00 in the beginning of the first value:
+CREATE TABLE t1 (a ENUM(0x0061));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('\0a') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES(1);
+SELECT * FROM t1;
+a
+
+DROP TABLE t1;
+CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('\0a') DEFAULT NULL,
+ `b` enum('b') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1,1);
+SELECT HEX(a), HEX(b) FROM t1;
+HEX(a) HEX(b)
+0061 62
+DROP TABLE t1;
+# 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table:
+CREATE TABLE t1 (a ENUM('a',0x0061));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a','\0a') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1),(2);
+SELECT HEX(a) FROM t1 ORDER BY a;
+HEX(a)
+61
+0061
+DROP TABLE t1;
+CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `a` enum('a') DEFAULT NULL,
+ `b` enum('b','\0a') DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (1,2);
+SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b;
+HEX(a) HEX(b)
+61 62
+61 0061
+DROP TABLE t1;
+0x00 in the beginning of a value of a non-last ENUM/SET causes an error:
+CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
+ERROR HY000: Incorrect information in file: 'DIR/t1.frm'
+#
# End of 10.1 tests
#
diff --git a/mysql-test/t/ctype_binary.test b/mysql-test/t/ctype_binary.test
index 155d8548f77..19a58180187 100644
--- a/mysql-test/t/ctype_binary.test
+++ b/mysql-test/t/ctype_binary.test
@@ -74,6 +74,66 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE COERCIBILITY(a)=2 AND a='a';
EXPLAIN EXTENDED SELECT * FROM t1 WHERE WEIGHT_STRING(a)='a' AND a='a';
DROP TABLE t1;
+
+--echo #
+--echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
+--echo #
+
+CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary');
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1),(2),(3);
+SELECT HEX(a) FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+--echo 0x00 in the middle or in the end of a value
+
+CREATE TABLE t1 (a ENUM(0x6100));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a ENUM(0x610062));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1);
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+--echo 0x00 in the beginning of the first value:
+
+CREATE TABLE t1 (a ENUM(0x0061));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES(1);
+SELECT * FROM t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b'));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1,1);
+SELECT HEX(a), HEX(b) FROM t1;
+DROP TABLE t1;
+
+--echo # 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table:
+
+CREATE TABLE t1 (a ENUM('a',0x0061));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1),(2);
+SELECT HEX(a) FROM t1 ORDER BY a;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061));
+SHOW CREATE TABLE t1;
+INSERT INTO t1 VALUES (1,1);
+INSERT INTO t1 VALUES (1,2);
+SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b;
+DROP TABLE t1;
+
+--echo 0x00 in the beginning of a value of a non-last ENUM/SET causes an error:
+--replace_regex /'.*t1.frm'/'DIR\/t1.frm'/
+--error ER_NOT_FORM_FILE
+CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b'));
+
+
--echo #
--echo # End of 10.1 tests
--echo #
diff --git a/sql/table.cc b/sql/table.cc
index f1a4b322a80..192faa0d00e 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -64,8 +64,11 @@ LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") };
/* Functions defined in this file */
-static void fix_type_pointers(const char ***array, TYPELIB *point_to_type,
- uint types, char **names);
+static bool fix_type_pointers(const char ***typelib_value_names,
+ uint **typelib_value_lengths,
+ TYPELIB *point_to_type, uint types,
+ char *names, size_t names_length);
+
static uint find_field(Field **fields, uchar *record, uint start, uint length);
inline bool is_system_table_name(const char *name, uint length);
@@ -670,7 +673,8 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
uint keys, KEY *keyinfo,
uint new_frm_ver, uint &ext_key_parts,
TABLE_SHARE *share, uint len,
- KEY *first_keyinfo, char* &keynames)
+ KEY *first_keyinfo,
+ LEX_STRING *keynames)
{
uint i, j, n_length;
KEY_PART_INFO *key_part= NULL;
@@ -813,10 +817,13 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end,
}
share->ext_key_parts+= keyinfo->ext_key_parts;
}
- keynames=(char*) key_part;
- strpos+= strnmov(keynames, (char *) strpos, frm_image_end - strpos) - keynames;
+ keynames->str= (char*) key_part;
+ keynames->length= strnmov(keynames->str, (char *) strpos,
+ frm_image_end - strpos) - keynames->str;
+ strpos+= keynames->length;
if (*strpos++) // key names are \0-terminated
return 1;
+ keynames->length++; // Include '\0', to make fix_type_pointers() happy.
//reading index comments
for (keyinfo= share->key_info, i=0; i < keys; i++, keyinfo++)
@@ -918,12 +925,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
TABLE_SHARE *share= this;
uint new_frm_ver, field_pack_length, new_field_pack_flag;
uint interval_count, interval_parts, read_length, int_length;
+ uint total_typelib_value_count;
uint db_create_options, keys, key_parts, n_length;
uint com_length, null_bit_pos;
uint extra_rec_buf_length;
uint i;
bool use_hash;
- char *keynames, *names, *comment_pos;
+ LEX_STRING keynames= {NULL, 0};
+ char *names, *comment_pos;
const uchar *forminfo, *extra2;
const uchar *frm_image_end = frm_image + frm_length;
uchar *record, *null_flags, *null_pos;
@@ -935,6 +944,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
KEY_PART_INFO *key_part= NULL;
Field **field_ptr, *reg_field;
const char **interval_array;
+ uint *typelib_value_lengths= NULL;
enum legacy_db_type legacy_db_type;
my_bitmap_map *bitmaps;
bool null_bits_are_used;
@@ -1237,7 +1247,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo,
new_frm_ver, ext_key_parts,
- share, len, &first_keyinfo, keynames))
+ share, len, &first_keyinfo, &keynames))
goto err;
if (next_chunk + 5 < buff_end)
@@ -1330,7 +1340,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
{
if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo,
new_frm_ver, ext_key_parts,
- share, len, &first_keyinfo, keynames))
+ share, len, &first_keyinfo, &keynames))
goto err;
}
@@ -1372,6 +1382,24 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d vcol_screen_length: %d", interval_count,interval_parts, keys,n_length,int_length, com_length, vcol_screen_length));
+ /*
+ We load the following things into TYPELIBs:
+ - One TYPELIB for field names
+ - interval_count TYPELIBs for ENUM/SET values
+ - One TYPELIB for key names
+ Every TYPELIB requires one extra value with a NULL pointer and zero length,
+ which is the end-of-values marker.
+ TODO-10.5+:
+ Note, we should eventually reuse this total_typelib_value_count
+ to allocate interval_array. The above code reserves less space
+ than total_typelib_value_count pointers. So it seems `interval_array`
+ and `names` overlap in the memory. Too dangerous to fix in 10.1.
+ */
+ total_typelib_value_count=
+ (share->fields + 1/*end-of-values marker*/) +
+ (interval_parts + interval_count/*end-of-values markers*/) +
+ (keys + 1/*end-of-values marker*/);
+
if (!(field_ptr = (Field **)
alloc_root(&share->mem_root,
@@ -1383,6 +1411,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
vcol_screen_length)))))
goto err; /* purecov: inspected */
+
+ if (!(typelib_value_lengths= (uint *) alloc_root(&share->mem_root,
+ total_typelib_value_count *
+ sizeof(uint *))))
+ goto err;
+
share->field= field_ptr;
read_length=(uint) (share->fields * field_pack_length +
pos+ (uint) (n_length+int_length+com_length+
@@ -1391,6 +1425,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->intervals= (TYPELIB*) (field_ptr+share->fields+1);
interval_array= (const char **) (share->intervals+interval_count);
+ // This looks wrong: shouldn't it be (+2+interval_count) instread of (+3) ?
names= (char*) (interval_array+share->fields+interval_parts+keys+3);
if (!interval_count)
share->intervals= 0; // For better debugging
@@ -1403,34 +1438,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length,
vcol_screen_length);
- fix_type_pointers(&interval_array, &share->fieldnames, 1, &names);
- if (share->fieldnames.count != share->fields)
+ if (fix_type_pointers(&interval_array, &typelib_value_lengths,
+ &share->fieldnames, 1, names, n_length) ||
+ share->fieldnames.count != share->fields)
goto err;
- fix_type_pointers(&interval_array, share->intervals, interval_count,
- &names);
- {
- /* Set ENUM and SET lengths */
- TYPELIB *interval;
- for (interval= share->intervals;
- interval < share->intervals + interval_count;
- interval++)
- {
- uint count= (uint) (interval->count + 1) * sizeof(uint);
- if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root,
- count)))
- goto err;
- for (count= 0; count < interval->count; count++)
- {
- char *val= (char*) interval->type_names[count];
- interval->type_lengths[count]= strlen(val);
- }
- interval->type_lengths[count]= 0;
- }
- }
+ if (fix_type_pointers(&interval_array, &typelib_value_lengths,
+ share->intervals, interval_count,
+ names + n_length, int_length))
+ goto err;
- if (keynames)
- fix_type_pointers(&interval_array, &share->keynames, 1, &keynames);
+ if (keynames.length &&
+ (fix_type_pointers(&interval_array, &typelib_value_lengths,
+ &share->keynames, 1, keynames.str, keynames.length) ||
+ share->keynames.count != keys))
+ goto err;
/* Allocate handler */
if (!(handler_file= get_new_handler(share, thd->mem_root,
@@ -3216,37 +3238,81 @@ void open_table_error(TABLE_SHARE *share, enum open_frm_error error,
** with a '\0'
*/
-static void
-fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types,
- char **names)
+static bool
+fix_type_pointers(const char ***typelib_value_names,
+ uint **typelib_value_lengths,
+ TYPELIB *point_to_type, uint types,
+ char *ptr, size_t length)
{
- char *type_name, *ptr;
- char chr;
+ const char *end= ptr + length;
- ptr= *names;
while (types--)
{
+ char sep;
point_to_type->name=0;
- point_to_type->type_names= *array;
+ point_to_type->type_names= *typelib_value_names;
+ point_to_type->type_lengths= *typelib_value_lengths;
- if ((chr= *ptr)) /* Test if empty type */
+ /*
+ Typelib can be encoded as:
+ 1) 0x00 - empty typelib
+ 2) 0xFF 0x00 - empty typelib (index names)
+ 3) sep (value sep)... 0x00 - non-empty typelib (where sep is a separator)
+ */
+ if (length == 2 && ptr[0] == (char) 0xFF && ptr[1] == '\0')
+ {
+ /*
+ This is a special case #2.
+ If there are no indexes at all, index names can be encoded
+ as a two byte sequence: 0xFF 0x00
+ TODO: Check if it's a bug in the FRM packing routine.
+ It should probably write just 0x00 instead of 0xFF00.
+ */
+ ptr+= 2;
+ }
+ else if ((sep= *ptr++)) // A non-empty typelib
{
- while ((type_name=strchr(ptr+1,chr)) != NullS)
+ for ( ; ptr < end; )
{
- *((*array)++) = ptr+1;
- *type_name= '\0'; /* End string */
- ptr=type_name;
+ // Now scan the next value+sep pair
+ char *vend= (char*) memchr(ptr, sep, end - ptr);
+ if (!vend)
+ return true; // Bad format
+ *((*typelib_value_names)++)= ptr;
+ *((*typelib_value_lengths)++)= vend - ptr;
+ *vend= '\0'; // Change sep to '\0'
+ ptr= vend + 1; // Shift from sep to the next byte
+ /*
+ Now we can have either:
+ - the end-of-typelib marker (0x00)
+ - more value+sep pairs
+ */
+ if (!*ptr)
+ {
+ /*
+ We have an ambiguity here. 0x00 can be an end-of-typelib marker,
+ but it can also be a part of the next value:
+ CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY);
+ If this is the last ENUM/SET in the table and there is still more
+ packed data left after 0x00, then we know for sure that 0x00
+ is a part of the next value.
+ TODO-10.5+: we should eventually introduce a new unambiguous
+ typelib encoding for FRM.
+ */
+ if (!types && ptr + 1 < end)
+ continue; // A binary value starting with 0x00
+ ptr++; // Consume the end-of-typelib marker
+ break; // End of the current typelib
+ }
}
- ptr+=2; /* Skip end mark and last 0 */
}
- else
- ptr++;
- point_to_type->count= (uint) (*array - point_to_type->type_names);
+ point_to_type->count= (uint) (*typelib_value_names -
+ point_to_type->type_names);
point_to_type++;
- *((*array)++)= NullS; /* End of type */
+ *((*typelib_value_names)++)= NullS; /* End of type */
+ *((*typelib_value_lengths)++)= 0; /* End of type */
}
- *names=ptr; /* Update end */
- return;
+ return ptr != end;
} /* fix_type_pointers */
diff --git a/sql/unireg.cc b/sql/unireg.cc
index b116218b60e..2ccc440f71b 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -498,6 +498,18 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo,
} /* pack_keys */
+static uint typelib_values_packed_length(const TYPELIB *t)
+{
+ uint length= 0;
+ for (uint i= 0; t->type_names[i]; i++)
+ {
+ length+= t->type_lengths[i];
+ length++; /* Separator */
+ }
+ return length;
+}
+
+
/* Make formheader */
static bool pack_header(THD *thd, uchar *forminfo,
@@ -619,9 +631,8 @@ static bool pack_header(THD *thd, uchar *forminfo,
field->interval_id=get_interval_id(&int_count,create_fields,field);
if (old_int_count != int_count)
{
- for (const char **pos=field->interval->type_names ; *pos ; pos++)
- int_length+=(uint) strlen(*pos)+1; // field + suffix prefix
- int_parts+=field->interval->count+1;
+ int_length+= typelib_values_packed_length(field->interval);
+ int_parts+= field->interval->count + 1;
}
}
if (f_maybe_null(field->pack_flag))
@@ -710,11 +721,7 @@ static size_t packed_fields_length(List<Create_field> &create_fields)
{
int_count= field->interval_id;
length++;
- for (int i=0; field->interval->type_names[i]; i++)
- {
- length+= field->interval->type_lengths[i];
- length++;
- }
+ length+= typelib_values_packed_length(field->interval);
length++;
}
if (field->vcol_info)