diff options
author | Anel Husakovic <anel@mariadb.org> | 2019-01-22 00:53:57 -0800 |
---|---|---|
committer | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2020-08-13 14:24:20 +0300 |
commit | b9d8da2ea955c74e79cf182b87dd22117e528d53 (patch) | |
tree | dbbdc2333c0a3e3cb723347ac7df7cce3025283b | |
parent | 863e28ff3ed0a5859561c397cbfb492170989ddd (diff) | |
download | mariadb-git-b9d8da2ea955c74e79cf182b87dd22117e528d53.tar.gz |
MDEV-18323 Convert MySQL JSON type to MariaDB TEXT in mysql_upgrade
24 files changed, 923 insertions, 15 deletions
diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index 445613eb579..87cbbf410db 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -47,7 +47,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ../sql/field.cc ../sql/field_conv.cc ../sql/field_comp.cc ../sql/filesort_utils.cc ../sql/sql_digest.cc ../sql/filesort.cc ../sql/gstream.cc ../sql/slave.cc - ../sql/signal_handler.cc + ../sql/signal_handler.cc ../sql/mysql_json.cc ../sql/handler.cc ../sql/hash_filo.cc ../sql/hostname.cc ../sql/init.cc ../sql/item_buff.cc ../sql/item_cmpfunc.cc ../sql/item.cc ../sql/item_create.cc ../sql/item_func.cc diff --git a/mysql-test/main/mysql_json_table_recreate.result b/mysql-test/main/mysql_json_table_recreate.result new file mode 100644 index 00000000000..77e8d01b6f4 --- /dev/null +++ b/mysql-test/main/mysql_json_table_recreate.result @@ -0,0 +1,169 @@ +# +# The following test takes 2 tables containing a JSON column and attempts +# to repair them. +# +# The tables header is (Description, Expected, Actual), where description +# shows a brief description what the JSON value is testing in the MariaDB +# implementation. Expected is the longtext string and actual is the JSON +# column that needs to be converted to MariaDB's representation of +# LONGTEXT. +# +call mtr.add_suppression("Table rebuild required"); +call mtr.add_suppression("is marked as crashed"); +call mtr.add_suppression("Checking"); +SET NAMES utf8; +# +# Check that only ALTER TABLE ... FORCE is allowed on a MySQL 5.7 table +# with a JSON column. +# +show create table tempty; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.tempty` FORCE" or dump/reload to fix it! +select * from tempty; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.tempty` FORCE" or dump/reload to fix it! +alter table tempty force; +show create table tempty; +Table Create Table +tempty CREATE TABLE `tempty` ( + `t` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +show create table mysql_json_test; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.mysql_json_test` FORCE" or dump/reload to fix it! +select * from mysql_json_test; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.mysql_json_test` FORCE" or dump/reload to fix it! +alter table mysql_json_test force; +select description, expected, actual, expected = actual from mysql_json_test; +description expected actual expected = actual +Array LITERALS: ["prefix", false, "suffix", 1] ["prefix", false, "suffix", 1] 1 +Array LITERALS: ["prefix", null, "suffix", 1] ["prefix", null, "suffix", 1] 1 +Array LITERALS: ["prefix", true, "suffix", 1] ["prefix", true, "suffix", 1] 1 +DateTime as Raw Value: "2015-01-15 23:24:25.000000" "2015-01-15 23:24:25.000000" 1 +DateTime as Raw Value: "2015-01-15 23:24:25.000000" "2015-01-15 23:24:25.000000" 1 +DateTime as Raw Value: "2015-01-15" "2015-01-15" 1 +DateTime as Raw Value: "23:24:25.000000" "23:24:25.000000" 1 +Empty JSON Object/Array: [] [] 1 +Empty JSON Object/Array: {} {} 1 +GeoJSON {"type": "GeometryCollection", "geometries": []} {"type": "GeometryCollection", "geometries": []} 1 +GeoJSON {"type": "LineString", "coordinates": [[0, 5], [5, 10], [10, 15]]} {"type": "LineString", "coordinates": [[0, 5], [5, 10], [10, 15]]} 1 +GeoJSON {"type": "MultiPoint", "coordinates": [[1, 1], [2, 2], [3, 3]]} {"type": "MultiPoint", "coordinates": [[1, 1], [2, 2], [3, 3]]} 1 +GeoJSON {"type": "Point", "coordinates": [11.1111, 12.22222]} {"type": "Point", "coordinates": [11.1111, 12.22222]} 1 +JSON LITERALS: {"val": false} {"val": false} 1 +JSON LITERALS: {"val": null} {"val": null} 1 +JSON LITERALS: {"val": true} {"val": true} 1 +Opaque Types: opaque_mysql_type_binary "base64:type254:YWJjAAAAAAAAAA==" "base64:type254:YWJjAAAAAAAAAA==" 1 +Opaque Types: opaque_mysql_type_bit "base64:type16:yv4=" "base64:type16:yv4=" 1 +Opaque Types: opaque_mysql_type_blob "base64:type252:yv66vg==" "base64:type252:yv66vg==" 1 +Opaque Types: opaque_mysql_type_date "2015-01-15" "2015-01-15" 1 +Opaque Types: opaque_mysql_type_datetime "2015-01-15 23:24:25.000000" "2015-01-15 23:24:25.000000" 1 +Opaque Types: opaque_mysql_type_enum "b" "b" 1 +Opaque Types: opaque_mysql_type_geom {"type": "Point", "coordinates": [1, 1]} {"type": "Point", "coordinates": [1, 1]} 1 +Opaque Types: opaque_mysql_type_longblob "base64:type251:yv66vg==" "base64:type251:yv66vg==" 1 +Opaque Types: opaque_mysql_type_mediumblob "base64:type250:yv66vg==" "base64:type250:yv66vg==" 1 +Opaque Types: opaque_mysql_type_set "b,c" "b,c" 1 +Opaque Types: opaque_mysql_type_time "23:24:25.000000" "23:24:25.000000" 1 +Opaque Types: opaque_mysql_type_tinyblob "base64:type249:yv66vg==" "base64:type249:yv66vg==" 1 +Opaque Types: opaque_mysql_type_varbinary "base64:type15:YWJj" "base64:type15:YWJj" 1 +Opaque Types: opaque_mysql_type_varchar "base64:type15:Zm9v" "base64:type15:Zm9v" 1 +Opaque Types: opaque_mysql_type_year "base64:type13:MjAxOQ==" "base64:type13:MjAxOQ==" 1 +Raw LITERALS: false false 1 +Raw LITERALS: null null 1 +Raw LITERALS: true true 1 +Raw doubles as JSON -2.2250738585072014e-308 -2.2250738585072014e-308 1 +Raw doubles as JSON -5678.987 -5678.987 1 +Raw doubles as JSON 0.0 0.0 1 +Raw doubles as JSON 2.2250738585072014e-308 2.2250738585072014e-308 1 +Raw doubles as JSON 3.14 3.14 1 +Raw integers as JSON -127 -127 1 +Raw integers as JSON -2147483648 -2147483648 1 +Raw integers as JSON -32768 -32768 1 +Raw integers as JSON -9223372036854775807 -9223372036854775807 1 +Raw integers as JSON 0 0 1 +Raw integers as JSON 128 128 1 +Raw integers as JSON 18446744073709551615 18446744073709551615 1 +Raw integers as JSON 2147483647 2147483647 1 +Raw integers as JSON 32767 32767 1 +Raw integers as JSON 4294967295 4294967295 1 +Raw integers as JSON 65535 65535 1 +Raw integers as JSON 65536 65536 1 +Raw integers as JSON 9223372036854775807 9223372036854775807 1 +Simple Array as Base Key [1, 2, 3, 4, 5, [], "a", "b", "c"] [1, 2, 3, 4, 5, [], "a", "b", "c"] 1 +Simple Array as Value {"a": [1, 2], "b": ["x", "y"]} {"a": [1, 2], "b": ["x", "y"]} 1 +Simple JSON test {"key1": "val1", "key2": "val2"} {"key1": "val1", "key2": "val2"} 1 +Special Characters: "" "" 1 +Special Characters: "'" "'" 1 +Special Characters: "'" "'" 1 +Special Characters: "'" "'" 1 +Special Characters: "''" "''" 1 +Special Characters: "\"" "\"" 1 +Special Characters: "\\" "\\" 1 +Special Characters: "\\b" "\\b" 1 +Special Characters: "\b" "\b" 1 +Special Characters: "\f" "\f" 1 +Special Characters: "\n" "\n" 1 +Special Characters: "\r" "\r" 1 +Special Characters: "\t" "\t" 1 +Special Characters: "f" "f" 1 +Special Characters: "key1 - with \" val " "key1 - with \" val " 1 +Special Characters: "q" "q" 1 +Special Characters: "some_string" "some_string" 1 +Special Characters: ["a ' b", "c ' d"] ["a ' b", "c ' d"] 1 +Special Characters: ["a \" b", "c \" d"] ["a \" b", "c \" d"] 1 +Special Characters: ["a \\ b", "c \\ d"] ["a \\ b", "c \\ d"] 1 +Special Characters: ["a \b b", "c \b d"] ["a \b b", "c \b d"] 1 +Special Characters: ["a \f b", "c \f d"] ["a \f b", "c \f d"] 1 +Special Characters: ["a \r b", "c \r d"] ["a \r b", "c \r d"] 1 +Special Characters: ["a \t b", "c \t d"] ["a \t b", "c \t d"] 1 +Special Characters: {"[": "]"} {"[": "]"} 1 +Special Characters: {"key ' key": "val ' val"} {"key ' key": "val ' val"} 1 +Special Characters: {"key \" key": "val \" val"} {"key \" key": "val \" val"} 1 +Special Characters: {"key \\ key": "val \\ val"} {"key \\ key": "val \\ val"} 1 +Special Characters: {"key \\0 key": "val \n val"} {"key \\0 key": "val \n val"} 1 +Special Characters: {"key \\Z key": "val ' val"} {"key \\Z key": "val ' val"} 1 +Special Characters: {"key \b key": "val \b val"} {"key \b key": "val \b val"} 1 +Special Characters: {"key \f key": "val \f val"} {"key \f key": "val \f val"} 1 +Special Characters: {"key \n key": "val \n val"} {"key \n key": "val \n val"} 1 +Special Characters: {"key \r key": "val \r val"} {"key \r key": "val \r val"} 1 +Special Characters: {"key \t key": "val \t val"} {"key \t key": "val \t val"} 1 +Special Characters: {"key1 and \n\"key2\"": "val1\t val2"} {"key1 and \n\"key2\"": "val1\t val2"} 1 +Special Characters: {"{": "}"} {"{": "}"} 1 +Special Characters: {"{": "}"} {"{": "}"} 1 +Special String Cases: [""] [""] 1 +Special String Cases: {"": ""} {"": ""} 1 +Timestamp as RawValue "2019-12-26 19:56:03.000000" "2019-12-26 19:56:03.000000" 1 +UTF8 Characters: "Anel Husaković - test: đžšćč" "Anel Husaković - test: đžšćč" 1 +UTF8 Characters: {"Name": "Anel Husaković - test: đžšćč"} {"Name": "Anel Husaković - test: đžšćč"} 1 +UTF8 Characters: {"Person": "EMP", "details": {"Name": "Anel Husaković - test: đžšćč"}} {"Person": "EMP", "details": {"Name": "Anel Husaković - test: đžšćč"}} 1 +UTF8 Characters: {"details": {"Name": "Anel Husaković - test: đžšćč"}, "\"Anel Husaković - test: đžšćč\"": "EMP"} {"details": {"Name": "Anel Husaković - test: đžšćč"}, "\"Anel Husaković - test: đžšćč\"": "EMP"} 1 +# +# A quick check that all rows match from the original MySQL Table. +# +select count(*) as 'Total_Number_of_Tests', +sum(expected = actual) as 'Succesful_Tests' +from mysql_json_test; +Total_Number_of_Tests Succesful_Tests +100 100 +show create table mysql_json_test; +Table Create Table +mysql_json_test CREATE TABLE `mysql_json_test` ( + `description` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL, + `expected` longtext COLLATE utf8mb4_unicode_ci DEFAULT NULL, + `actual` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci +show create table mysql_json_test_big; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.mysql_json_test_big` FORCE" or dump/reload to fix it! +select * from mysql_json_test_big; +ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.mysql_json_test_big` FORCE" or dump/reload to fix it! +# +# This test checks the long format implementation of MySQL's JSON +# Not printing the actual contents as they are not readable by a human, +# just compare the strings, make sure they match. +# +alter table mysql_json_test_big force; +select count(*) as 'Total_Number_of_Tests', +sum(expected = actual) as 'Succesful_Tests', +sum(JSON_VALID(actual)) as 'String_is_valid_JSON' +from mysql_json_test_big; +Total_Number_of_Tests Succesful_Tests String_is_valid_JSON +1 1 1 +drop table tempty; +drop table mysql_json_test; +drop table mysql_json_test_big; diff --git a/mysql-test/main/mysql_json_table_recreate.test b/mysql-test/main/mysql_json_table_recreate.test new file mode 100644 index 00000000000..dc73a7a0a2f --- /dev/null +++ b/mysql-test/main/mysql_json_table_recreate.test @@ -0,0 +1,86 @@ +--source include/have_utf8.inc + +--echo # +--echo # The following test takes 2 tables containing a JSON column and attempts +--echo # to repair them. +--echo # +--echo # The tables header is (Description, Expected, Actual), where description +--echo # shows a brief description what the JSON value is testing in the MariaDB +--echo # implementation. Expected is the longtext string and actual is the JSON +--echo # column that needs to be converted to MariaDB's representation of +--echo # LONGTEXT. +--echo # + + +call mtr.add_suppression("Table rebuild required"); +call mtr.add_suppression("is marked as crashed"); +call mtr.add_suppression("Checking"); + +let $MYSQLD_DATADIR= `select @@datadir`; + +SET NAMES utf8; + +--copy_file std_data/mysql_json/tempty.frm $MYSQLD_DATADIR/test/tempty.frm +--copy_file std_data/mysql_json/tempty.MYI $MYSQLD_DATADIR/test/tempty.MYI +--copy_file std_data/mysql_json/tempty.MYD $MYSQLD_DATADIR/test/tempty.MYD + +--copy_file std_data/mysql_json/mysql_json_test.frm $MYSQLD_DATADIR/test/mysql_json_test.frm +--copy_file std_data/mysql_json/mysql_json_test.MYI $MYSQLD_DATADIR/test/mysql_json_test.MYI +--copy_file std_data/mysql_json/mysql_json_test.MYD $MYSQLD_DATADIR/test/mysql_json_test.MYD + +--copy_file std_data/mysql_json/mysql_json_test_big.frm $MYSQLD_DATADIR/test/mysql_json_test_big.frm +--copy_file std_data/mysql_json/mysql_json_test_big.MYI $MYSQLD_DATADIR/test/mysql_json_test_big.MYI +--copy_file std_data/mysql_json/mysql_json_test_big.MYD $MYSQLD_DATADIR/test/mysql_json_test_big.MYD + +--echo # +--echo # Check that only ALTER TABLE ... FORCE is allowed on a MySQL 5.7 table +--echo # with a JSON column. +--echo # + +--error ER_TABLE_NEEDS_REBUILD +show create table tempty; +--error ER_TABLE_NEEDS_REBUILD +select * from tempty; + +alter table tempty force; +show create table tempty; + +--error ER_TABLE_NEEDS_REBUILD +show create table mysql_json_test; +--error ER_TABLE_NEEDS_REBUILD +select * from mysql_json_test; + +alter table mysql_json_test force; + +--sorted_result +select description, expected, actual, expected = actual from mysql_json_test; + +--echo # +--echo # A quick check that all rows match from the original MySQL Table. +--echo # +select count(*) as 'Total_Number_of_Tests', + sum(expected = actual) as 'Succesful_Tests' +from mysql_json_test; + +show create table mysql_json_test; + +--error ER_TABLE_NEEDS_REBUILD +show create table mysql_json_test_big; +--error ER_TABLE_NEEDS_REBUILD +select * from mysql_json_test_big; + +--echo # +--echo # This test checks the long format implementation of MySQL's JSON +--echo # Not printing the actual contents as they are not readable by a human, +--echo # just compare the strings, make sure they match. +--echo # +alter table mysql_json_test_big force; + +select count(*) as 'Total_Number_of_Tests', + sum(expected = actual) as 'Succesful_Tests', + sum(JSON_VALID(actual)) as 'String_is_valid_JSON' +from mysql_json_test_big; + +drop table tempty; +drop table mysql_json_test; +drop table mysql_json_test_big; diff --git a/mysql-test/std_data/mysql_json/mysql_json_test.MYD b/mysql-test/std_data/mysql_json/mysql_json_test.MYD Binary files differnew file mode 100644 index 00000000000..0be8c5968b4 --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test.MYD diff --git a/mysql-test/std_data/mysql_json/mysql_json_test.MYI b/mysql-test/std_data/mysql_json/mysql_json_test.MYI Binary files differnew file mode 100644 index 00000000000..ada3ff23836 --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test.MYI diff --git a/mysql-test/std_data/mysql_json/mysql_json_test.frm b/mysql-test/std_data/mysql_json/mysql_json_test.frm Binary files differnew file mode 100644 index 00000000000..94c642f45ac --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test.frm diff --git a/mysql-test/std_data/mysql_json/mysql_json_test_big.MYD b/mysql-test/std_data/mysql_json/mysql_json_test_big.MYD Binary files differnew file mode 100644 index 00000000000..21b1fffc4ba --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test_big.MYD diff --git a/mysql-test/std_data/mysql_json/mysql_json_test_big.MYI b/mysql-test/std_data/mysql_json/mysql_json_test_big.MYI Binary files differnew file mode 100644 index 00000000000..79a1d9ebd89 --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test_big.MYI diff --git a/mysql-test/std_data/mysql_json/mysql_json_test_big.frm b/mysql-test/std_data/mysql_json/mysql_json_test_big.frm Binary files differnew file mode 100644 index 00000000000..94c642f45ac --- /dev/null +++ b/mysql-test/std_data/mysql_json/mysql_json_test_big.frm diff --git a/mysql-test/std_data/mysql_json/tempty.MYD b/mysql-test/std_data/mysql_json/tempty.MYD Binary files differnew file mode 100644 index 00000000000..b4bf921b99f --- /dev/null +++ b/mysql-test/std_data/mysql_json/tempty.MYD diff --git a/mysql-test/std_data/mysql_json/tempty.MYI b/mysql-test/std_data/mysql_json/tempty.MYI Binary files differnew file mode 100644 index 00000000000..003b7c8842f --- /dev/null +++ b/mysql-test/std_data/mysql_json/tempty.MYI diff --git a/mysql-test/std_data/mysql_json/tempty.frm b/mysql-test/std_data/mysql_json/tempty.frm Binary files differnew file mode 100644 index 00000000000..15e6955dfb3 --- /dev/null +++ b/mysql-test/std_data/mysql_json/tempty.frm diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 8ed75f86067..78b2643bb4b 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -69,7 +69,7 @@ SET (SQL_SOURCE filesort_utils.cc filesort.cc gstream.cc signal_handler.cc - handler.cc + handler.cc mysql_json.cc hostname.cc init.cc item.cc item_buff.cc item_cmpfunc.cc item_create.cc item_func.cc item_geofunc.cc item_row.cc item_strfunc.cc item_subselect.cc item_sum.cc item_timefunc.cc diff --git a/sql/compat56.cc b/sql/compat56.cc index 58cf5287cee..ea81bb2d605 100644 --- a/sql/compat56.cc +++ b/sql/compat56.cc @@ -280,6 +280,18 @@ void TIME_from_longlong_datetime_packed(MYSQL_TIME *ltime, longlong tmp) /** + Convert packed numeric date representation to MYSQL_TIME. + @param OUT ltime The date variable to convert to. + @param tmp The packed numeric date value. +*/ +void TIME_from_longlong_date_packed(MYSQL_TIME *ltime, longlong tmp) +{ + TIME_from_longlong_datetime_packed(ltime, tmp); + ltime->time_type= MYSQL_TIMESTAMP_DATE; +} + + +/** Calculate binary size of MySQL56 packed datetime representation. @param dec Precision. */ diff --git a/sql/compat56.h b/sql/compat56.h index 7f72c26c03a..d3a89b4cf8b 100644 --- a/sql/compat56.h +++ b/sql/compat56.h @@ -28,6 +28,7 @@ longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME *); longlong TIME_to_longlong_time_packed(const MYSQL_TIME *); void TIME_from_longlong_datetime_packed(MYSQL_TIME *ltime, longlong nr); +void TIME_from_longlong_date_packed(MYSQL_TIME *ltime, longlong nr); void TIME_from_longlong_time_packed(MYSQL_TIME *ltime, longlong nr); void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec); diff --git a/sql/field.cc b/sql/field.cc index f100963903a..0c60550a3fa 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -42,7 +42,7 @@ #include "filesort.h" // change_double_for_sort #include "log_event.h" // class Table_map_log_event #include <m_ctype.h> - +#include "mysql_json.h" // Maximum allowed exponent value for converting string to decimal #define MAX_EXPONENT 1024 @@ -10859,6 +10859,13 @@ Field *make_field(TABLE_SHARE *share, f_packtype(pack_flag)); uint pack_length= tmp->calc_pack_length(field_length); + if (handler == &type_handler_mysql_json) + { + return new (mem_root) Field_mysql_json(ptr, null_pos, null_bit, + unireg_check, field_name, share, + pack_length, field_charset); + } + #ifdef HAVE_SPATIAL if (f_is_geom(pack_flag)) { @@ -11366,6 +11373,40 @@ uint32 Field_blob::max_display_length() const } } +/***************************************************************************** + Mysql table 5.7 with json data handling +*****************************************************************************/ +bool Field_mysql_json::parse_mysql(String *dest, + const char *data, size_t length) const +{ + if (!data) + return false; + + /* Each JSON blob must start with a type specifier. */ + if (length < 2) + return true; + + if (parse_mysql_json_value(dest, static_cast<JSONB_TYPES>(data[0]), + data + 1, length - 1, 0)) + return true; + + return false; +} + +String *Field_mysql_json::val_str(String *val_buffer, String *val_ptr) +{ + ASSERT_COLUMN_MARKED_FOR_READ; + String *raw_value= Field_blob::val_str(val_buffer, val_ptr); + + const char* data = raw_value->ptr(); + size_t length = raw_value->length(); + + val_ptr->length(0); + if (this->parse_mysql(val_ptr, data, length)) + val_ptr->length(0); + return val_ptr; +} + /***************************************************************************** Warning handling diff --git a/sql/field.h b/sql/field.h index be3a648617b..772331383b8 100644 --- a/sql/field.h +++ b/sql/field.h @@ -3682,7 +3682,9 @@ public: return do_field_int; */ if (!(from->flags & BLOB_FLAG) || from->charset() != charset() || - !from->compression_method() != !compression_method()) + !from->compression_method() != !compression_method() || + /* Always perform conversion from mysql_json. */ + from->type_handler() == &type_handler_mysql_json) return do_conv_blob; if (from->pack_length() != Field_blob::pack_length()) return do_copy_blob; @@ -4837,6 +4839,28 @@ public: void (*do_copy2)(Copy_field *); // Used to handle null values }; +/* + Field used to handle MySQL JSON datatype fields from 5.7. + + A MySQL JSON field is always stored as utf8mb4_bin. +*/ +class Field_mysql_json :public Field_blob +{ +public: + Field_mysql_json(uchar *ptr_arg, uchar *null_ptr_arg, + uchar null_bit_arg, enum utype unireg_check_arg, + const LEX_CSTRING *field_name_arg, TABLE_SHARE *share, + uint blob_pack_length, const DTCollation &collation) + : Field_blob(ptr_arg, null_ptr_arg, null_bit_arg, unireg_check_arg, + field_name_arg, share, blob_pack_length, + &my_charset_utf8mb4_bin) + {} + + String *val_str(String *val_buffer, String *val_str); + enum_field_types type() const { return MYSQL_TYPE_LONG_BLOB; } + const Type_handler *type_handler() const { return &type_handler_mysql_json; } + bool parse_mysql(String *dest, const char *data, size_t length) const; +}; uint pack_length_to_packflag(uint type); enum_field_types get_blob_type_from_length(ulong length); diff --git a/sql/mysql_json.cc b/sql/mysql_json.cc new file mode 100644 index 00000000000..78b56c10813 --- /dev/null +++ b/sql/mysql_json.cc @@ -0,0 +1,491 @@ +#include <algorithm> + +#include "mysql_json.h" + +#include "compat56.h" +#include "my_decimal.h" +#include "sql_time.h" + +/* + Json values in MySQL comprises the standard set of JSON values plus a MySQL + specific set. A JSON number type is subdivided into int, uint, double and + decimal. + + MySQL also adds four built-in date/time values: date, time, datetime and + timestamp. An additional opaque value can store any other MySQL type. +*/ + + +enum JSONB_LITERAL_TYPES { + JSONB_NULL_LITERAL= 0x0, + JSONB_TRUE_LITERAL= 0x1, + JSONB_FALSE_LITERAL= 0x2, +}; + + +/* + The size of offset or size fields in the small and the large storage + format for JSON objects and JSON arrays. +*/ +static const uchar SMALL_OFFSET_SIZE= 2; +static const uchar LARGE_OFFSET_SIZE= 4; + +/* + The size of key entries for objects when using the small storage + format or the large storage format. In the small format it is 4 + bytes (2 bytes for key length and 2 bytes for key offset). In the + large format it is 6 (2 bytes for length, 4 bytes for offset). +*/ +static const uchar KEY_ENTRY_SIZE_SMALL= (2 + SMALL_OFFSET_SIZE); +static const uchar KEY_ENTRY_SIZE_LARGE= (2 + LARGE_OFFSET_SIZE); + +/* + The size of value entries for objects or arrays. When using the + small storage format, the entry size is 3 (1 byte for type, 2 bytes + for offset). When using the large storage format, it is 5 (1 byte + for type, 4 bytes for offset). +*/ +static const uchar VALUE_ENTRY_SIZE_SMALL= (1 + SMALL_OFFSET_SIZE); +static const uchar VALUE_ENTRY_SIZE_LARGE= (1 + LARGE_OFFSET_SIZE); + +/* The maximum number of nesting levels allowed in a JSON document. */ +static const uchar JSON_DOCUMENT_MAX_DEPTH= 100; + + +/** + Read an offset or size field from a buffer. The offset could be either + a two byte unsigned integer or a four byte unsigned integer. + + @param data the buffer to read from + @param large tells if the large or small storage format is used; true + means read four bytes, false means read two bytes +*/ +static inline size_t read_offset_or_size(const char *data, bool large) +{ + return large ? uint4korr(data) : uint2korr(data); +} + + +static inline size_t key_size(bool large) +{ + return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL; +} + + +static inline size_t value_size(bool large) +{ + return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL; +} + + +/** + Inlined values are a space optimization. The actual value is stored + instead of the offset pointer to the location where a non-inlined + value would be located. + + @param[in] type The type to check. + @param[in] large tells if the large or small storage format is used; +*/ +static inline bool type_is_stored_inline(JSONB_TYPES type, bool large) +{ + return (type == JSONB_TYPE_INT16 || + type == JSONB_TYPE_UINT16 || + type == JSONB_TYPE_LITERAL || + (large && (type == JSONB_TYPE_INT32 || + type == JSONB_TYPE_UINT32))); +} + +/** + Read a variable length integer. A variable length integer uses the 8th bit in + each byte to mark if there are more bytes needed to store the integer. The + other 7 bits in the byte are used to store the actual integer's bits. + + @param[in] data the buffer to read from + @param[in] data_length the maximum number of bytes to read from data + @param[out] length the length that was read + @param[out] num the number of bytes needed to represent the length + @return false on success, true on error +*/ +static inline bool read_variable_length(const char *data, size_t data_length, + size_t *length, size_t *num) +{ + /* + It takes five bytes to represent UINT_MAX32, which is the largest + supported length, so don't look any further. + + Use data_length as max value to prevent segfault when reading a corrupted + JSON document. + */ + const size_t max_bytes= std::min(data_length, static_cast<size_t>(5)); + + size_t len= 0; + for (size_t i= 0; i < max_bytes; i++) + { + /* Get the next 7 bits of the length. */ + len|= (data[i] & 0x7f) << (7 * i); + if ((data[i] & 0x80) == 0) + { + /* The length shouldn't exceed 32 bits. */ + if (len > UINT_MAX32) + return true; + + /* This was the last byte. Return successfully. */ + *num= i + 1; + *length= len; + return false; + } + } + + /* No more available bytes. Return true to signal error. This implies a + corrupted JSON document. */ + return true; +} + + +/** + JSON formatting in MySQL escapes a few special characters to prevent + ambiguity. +*/ +static bool append_string_json(String *buffer, const char *data, size_t len) +{ + const char *last= data + len; + for (; data < last; data++) + { + const uchar c= *data; + switch (c) { + case '\\': + buffer->append("\\\\"); + break; + case '\n': + buffer->append("\\n"); + break; + case '\r': + buffer->append("\\r"); + break; + case '"': + buffer->append("\\\""); + break; + case '\b': + buffer->append("\\b"); + break; + case '\f': + buffer->append("\\f"); + break; + case '\t': + buffer->append("\\t"); + break; + default: + buffer->append(c); + break; + } + } + return false; +} + + +static bool print_mysql_datetime_value(String *buffer, enum_field_types type, + const char *data, size_t len) +{ + if (len < 8) + return true; + + MYSQL_TIME t; + switch (type) + { + case MYSQL_TYPE_TIME: + TIME_from_longlong_time_packed(&t, sint8korr(data)); + break; + case MYSQL_TYPE_DATE: + TIME_from_longlong_date_packed(&t, sint8korr(data)); + break; + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + TIME_from_longlong_datetime_packed(&t, sint8korr(data)); + break; + default: + DBUG_ASSERT(0); + return true; + } + /* Wrap all datetime strings within double quotes. */ + buffer->append('\"'); + buffer->reserve(MAX_DATE_STRING_REP_LENGTH); + buffer->length(buffer->length() + + my_TIME_to_str(&t, const_cast<char *>(buffer->end()), 6)); + buffer->append('\"'); + return false; +} + + +static bool parse_mysql_scalar(String *buffer, size_t value_json_type, + const char *data, size_t len) +{ + switch (value_json_type) { + case JSONB_TYPE_LITERAL: + { + if (len < 1) + return true; + switch (static_cast<JSONB_LITERAL_TYPES>(*data)) { + case JSONB_NULL_LITERAL: + return buffer->append("null"); + case JSONB_TRUE_LITERAL: + return buffer->append("true"); + case JSONB_FALSE_LITERAL: + return buffer->append("false"); + default: /* Invalid literal constant, malformed JSON. */ + return true; + } + } + case JSONB_TYPE_INT16: + return len < 2 || buffer->append_longlong(sint2korr(data)); + case JSONB_TYPE_INT32: + return len < 4 || buffer->append_longlong(sint4korr(data)); + case JSONB_TYPE_INT64: + return len < 8 || buffer->append_longlong(sint8korr(data)); + case JSONB_TYPE_UINT16: + return len < 2 || buffer->append_ulonglong(uint2korr(data)); + case JSONB_TYPE_UINT32: + return len < 4 || buffer->append_ulonglong(uint4korr(data)); + case JSONB_TYPE_UINT64: + return len < 8 || buffer->append_ulonglong(uint8korr(data)); + case JSONB_TYPE_DOUBLE: + if (len < 8) + return true; + buffer->reserve(FLOATING_POINT_BUFFER, 2 * FLOATING_POINT_BUFFER); + buffer->qs_append(reinterpret_cast<const double *>(data)); + return false; + case JSONB_TYPE_STRING: + { + size_t string_length, length_bytes; + + return read_variable_length(data, len, &string_length, &length_bytes) || + len < length_bytes + string_length || + buffer->append('"') || + append_string_json(buffer, data + length_bytes, string_length) || + buffer->append('"'); + } + case JSONB_TYPE_OPAQUE: + { + /* The field_type maps directly to enum_field_types. */ + const uchar type_value= static_cast<uchar>(*data); + const enum_field_types field_type= static_cast<enum_field_types>(type_value); + + size_t blob_length, length_bytes; + const char *blob_start; + + if (read_variable_length(data + 1, len, &blob_length, &length_bytes) || + len < length_bytes + blob_length) + return true; + blob_start= data + length_bytes + 1; + + switch (field_type) { + case MYSQL_TYPE_TIME: + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_DATETIME: + case MYSQL_TYPE_TIMESTAMP: + return print_mysql_datetime_value(buffer, field_type, + blob_start, blob_length); + case MYSQL_TYPE_NEWDECIMAL: + { + /* Expect at least two bytes, which contain precision and scale. */ + if (blob_length < 2) + return true; + + const int precision= blob_start[0]; + const int scale= blob_start[1]; + + my_decimal d; + + /* The decimal value is encoded after the two prec/scale bytes. */ + const size_t dec_size= my_decimal_get_binary_size(precision, scale); + if (dec_size != blob_length - 2 || + binary2my_decimal(E_DEC_ERROR, + reinterpret_cast<const uchar *>(blob_start + 2), + &d, precision, scale) != E_DEC_OK) + return true; + d.fix_buffer_pointer(); + + if (my_decimal2string(E_DEC_ERROR, &d, 0, 0, ' ', buffer) != E_DEC_OK) + return true; + return false; + } + default: + { + /* Any other MySQL type is presented as a base64 encoded string. */ + if (buffer->append("\"base64:type") || + buffer->append_longlong(field_type) || + buffer->append(':')) + return true; + + const size_t needed= my_base64_needed_encoded_length( + static_cast<int>(blob_length)); + if (buffer->reserve(needed) || + my_base64_encode(blob_start, blob_length, + const_cast<char*>(buffer->end()))) + return true; + /* -1 to override the null terminator from my_base64_encode */ + DBUG_ASSERT(*(buffer->end() + needed) == '\0'); + buffer->length(buffer->length() + needed - 1); + return buffer->append('"'); + } + } + } + default: + return true; + } +} + + +/** + Read a value from a JSON Object or Array, given the position of it. + This function handles both inlined values as well as values stored at + an offset. + + @param[out] buffer Where to print the results. + @param[in] data The raw binary data of the Object or Array. + @param[in] len The length of the binary data. + @param[in] value_type_offset Where the type of the value is stored. + @param[in] large true if the large storage format is used; + @param[in] depth How deep the JSON object is in the hierarchy. +*/ +static bool parse_mysql_scalar_or_value(String *buffer, const char *data, + size_t len, size_t value_type_offset, + bool large, size_t depth) +{ + /* Get the type of the value stored at the key. */ + const JSONB_TYPES value_type= + static_cast<JSONB_TYPES>(data[value_type_offset]); + + if (type_is_stored_inline(value_type, large)) + { + const size_t value_start = value_type_offset + 1; + if (parse_mysql_scalar(buffer, value_type, data + value_start, + len - value_start)) + return true; + } + else + { + /* The offset to where the value is stored is relative to the start + of the Object / Array */ + const size_t value_start= read_offset_or_size( + data + value_type_offset + 1, large); + if (parse_mysql_json_value(buffer, value_type, data + value_start, + len - value_start, depth)) + return true; + } + return false; + +} + + +static bool parse_array_or_object(String *buffer, const char *data, size_t len, + bool handle_as_object, bool large, + size_t depth) +{ + if (depth > JSON_DOCUMENT_MAX_DEPTH) + return true; + + /* + Make sure the document is long enough to contain the two length fields + (both number of elements or members, and number of bytes). + */ + const size_t offset_size= large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE; + /* The length has to be at least double offset size (header). */ + if (len < 2 * offset_size) + return true; + + + /* + Every JSON Object or Array contains two numbers in the header: + - The number of elements in the Object / Array (Keys) + - The total number of bytes occupied by the JSON Object / Array, including + the two numbers in the header. + Depending on the Object / Array type (small / large) the numbers are stored + in 2 bytes or 4 bytes each. + */ + const size_t element_count= read_offset_or_size(data, large); + const size_t bytes= read_offset_or_size(data + offset_size, large); + + /* The value can't have more bytes than what's available in the buffer. */ + if (bytes > len) + return true; + + if (buffer->append(handle_as_object ? '{' : '[')) + return true; + + + for (size_t i= 0; i < element_count; i++) + { + if (handle_as_object) + { + /* + The JSON Object is stored as a header part and a data part. + Header consists of: + - two length fields, + - an array of pointers to keys. + - an array of tuples (type, pointer to values) + * For certain types, the pointer to values is replaced by the actual + value. (see type_is_stored_inline) + Data consists of: + - All Key data, in order + - All Value data, in order + */ + const size_t key_offset= 2 * offset_size + i * key_size(large); + const size_t key_start= read_offset_or_size(data + key_offset, large); + /* The length of keys is always stored in 2 bytes (large == false) */ + const size_t key_len= read_offset_or_size( + data + key_offset + offset_size, false); + + const size_t value_type_offset= 2 * offset_size + + element_count * key_size(large) + + i * value_size(large); + + /* First print the key. */ + if (buffer->append('"') || + append_string_json(buffer, data + key_start, key_len) || + buffer->append("\": ")) + { + return true; + } + + /* Then print the value. */ + if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset, + large, depth)) + return true; + } + else + { + /* + Arrays do not have the keys vector and its associated data. + We jump straight to reading values. + */ + const size_t value_type_offset= 2 * offset_size + value_size(large) * i; + + if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset, + large, depth)) + return true; + } + + if (i != element_count - 1 && buffer->append(", ")) + return true; + } + + return buffer->append(handle_as_object ? '}' : ']'); +} + + +bool parse_mysql_json_value(String *buffer, JSONB_TYPES type, const char *data, + size_t len, size_t depth) +{ + switch (type) { + case JSONB_TYPE_SMALL_OBJECT: + return parse_array_or_object(buffer, data, len, true, false, depth + 1); + case JSONB_TYPE_LARGE_OBJECT: + return parse_array_or_object(buffer, data, len, true, true, depth + 1); + case JSONB_TYPE_SMALL_ARRAY: + return parse_array_or_object(buffer, data, len, false, false, depth + 1); + case JSONB_TYPE_LARGE_ARRAY: + return parse_array_or_object(buffer, data, len, false, true, depth + 1); + default: + return parse_mysql_scalar(buffer, type, data, len); + } +} diff --git a/sql/mysql_json.h b/sql/mysql_json.h new file mode 100644 index 00000000000..e57ab298311 --- /dev/null +++ b/sql/mysql_json.h @@ -0,0 +1,26 @@ +#ifndef MYSQL_JSON_INCLUDED +#define MYSQL_JSON_INCLUDED + +#include "my_global.h" +#include "sql_string.h" // String + +enum JSONB_TYPES { + JSONB_TYPE_SMALL_OBJECT= 0x0, + JSONB_TYPE_LARGE_OBJECT= 0x1, + JSONB_TYPE_SMALL_ARRAY= 0x2, + JSONB_TYPE_LARGE_ARRAY= 0x3, + JSONB_TYPE_LITERAL= 0x4, + JSONB_TYPE_INT16= 0x5, + JSONB_TYPE_UINT16= 0x6, + JSONB_TYPE_INT32= 0x7, + JSONB_TYPE_UINT32= 0x8, + JSONB_TYPE_INT64= 0x9, + JSONB_TYPE_UINT64= 0xA, + JSONB_TYPE_DOUBLE= 0xB, + JSONB_TYPE_STRING= 0xC, + JSONB_TYPE_OPAQUE= 0xF, +}; + +bool parse_mysql_json_value(String *buffer, JSONB_TYPES type, const char *data, + size_t len, size_t depth); +#endif /* MYSQL_JSON_INCLUDED */ diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 0cc653c29a9..f398db459a4 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -811,7 +811,7 @@ void String::qs_append(double d) NULL); } -void String::qs_append(double *d) +void String::qs_append(const double *d) { double ld; float8get(ld, (char*) d); diff --git a/sql/sql_string.h b/sql/sql_string.h index f56540c2975..c1c0b659b97 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -607,7 +607,7 @@ public: void qs_append(const char *str, size_t len); void qs_append_hex(const char *str, uint32 len); void qs_append(double d); - void qs_append(double *d); + void qs_append(const double *d); inline void qs_append(const char c) { Ptr[str_length]= c; diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 9d3a47adfa5..373e65fdb23 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -68,6 +68,7 @@ static Type_handler_blob_compressed type_handler_blob_compressed; Type_handler_geometry type_handler_geometry; #endif +Type_handler_mysql_json type_handler_mysql_json; Schema *Type_handler::schema() const { @@ -499,6 +500,8 @@ const Name Type_handler_datetime_common::m_name_datetime(STRING_WITH_LEN("datetime")), Type_handler_timestamp_common::m_name_timestamp(STRING_WITH_LEN("timestamp")); +const Name + Type_handler_mysql_json::m_name_mysql_json(STRING_WITH_LEN("mysql_json")); const Type_limits_int Type_handler_tiny::m_limits_sint8= Type_limits_sint8(), @@ -2484,7 +2487,22 @@ Field *Type_handler_set::make_table_field(const LEX_CSTRING *name, } /*************************************************************************/ - +Field *Type_handler_mysql_json::make_table_field(const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const +{ + /* + DBUG_ASSERT will be removed when we reuse make_table_field() + for make_field() in field.cc + */ + DBUG_ASSERT(0); + return new (table->in_use->mem_root) + Field_mysql_json(addr.ptr, addr.null_ptr, addr.null_bit, + Field::NONE, name, table->s, + 4, attr.collation); +} +/*************************************************************************/ /* If length is not specified for a varchar parameter, set length to the maximum length of the actual argument. Goals are: diff --git a/sql/sql_type.h b/sql/sql_type.h index f20ccff0752..78f96699def 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -3666,6 +3666,21 @@ public: bool init(); }; +class Type_handler_mysql_json: public Type_handler_long_blob +{ +private: + static const Name m_name_mysql_json; +public: + virtual ~Type_handler_mysql_json() {} + const Name name() const { return m_name_mysql_json; } + enum_field_types field_type() const { return MYSQL_TYPE_LONG_BLOB; } + Field *make_table_field(const LEX_CSTRING *name, + const Record_addr &addr, + const Type_all_attributes &attr, + TABLE *table) const; +}; + +extern MYSQL_PLUGIN_IMPORT Type_handler_mysql_json type_handler_mysql_json; extern Type_handler_data *type_handler_data; diff --git a/sql/table.cc b/sql/table.cc index 403b68551a0..7c1431f39fb 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1845,6 +1845,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, Field::utype unireg_check; const Type_handler *handler; uint32 flags= 0; + bool handle_field_as_mysql_json= false; if (new_frm_ver >= 3) { @@ -1899,15 +1900,37 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if ((uchar)field_type == (uchar)MYSQL_TYPE_VIRTUAL) { - if (!interval_nr) // Expect non-null expression - goto err; /* - MariaDB version 10.0 version. - The interval_id byte in the .frm file stores the length of the - expression statement for a virtual column. + Special handling to be able to read MySQL JSON types when + converting a MySQL table to a MariaDB table. MYSQL_TYPE_VIRTUAL + has the same value as MySQL's JSON type number, which we should + interpret as a special long blob. */ - vcol_info_length= interval_nr; - interval_nr= 0; + if (unlikely(share->mysql_version >= 50700 && + share->mysql_version < 100000)) + { + if(thd->lex->sql_command != SQLCOM_ALTER_TABLE || + thd->lex->alter_info.flags != ALTER_RECREATE) + { + // Raise an error for every operation except `alter table force`. + open_table_error(share, OPEN_FRM_NEEDS_REBUILD, share->open_errno); + goto err; + } + handle_field_as_mysql_json= true; + field_type= MYSQL_TYPE_LONG_BLOB; + } + else + { + if (!interval_nr) // Expect non-null expression + goto err; + /* + MariaDB version 10.0 version. + The interval_id byte in the .frm file stores the length of the + expression statement for a virtual column. + */ + vcol_info_length= interval_nr; + interval_nr= 0; + } } if (!comment_length) @@ -2089,7 +2112,9 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, unireg_check= (Field::utype) MTYP_TYPENR(unireg_type); name.str= fieldnames.type_names[i]; name.length= strlen(name.str); - if (!(handler= Type_handler::get_handler_by_real_type(field_type))) + if (handle_field_as_mysql_json) + handler= &type_handler_mysql_json; + else if (!(handler= Type_handler::get_handler_by_real_type(field_type))) goto err; // Not supported field type *field_ptr= reg_field= make_field(share, &share->mem_root, record+recpos, (uint32) field_length, |