diff options
author | Alexander Barkov <bar@mysql.com> | 2010-02-11 08:17:25 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2010-02-11 08:17:25 +0400 |
commit | 702166bcdec5705dd90d8567a88056893433c719 (patch) | |
tree | d6306efc9e4d7e6bc6bf4ae5a58bd6aec40420e9 /sql/item.h | |
parent | 6dd93757263fd219fcbd3d1ed100a681ecbc7c92 (diff) | |
download | mariadb-git-702166bcdec5705dd90d8567a88056893433c719.tar.gz |
WL#2649 Number-to-string conversions
added:
include/ctype_numconv.inc
mysql-test/include/ctype_numconv.inc
mysql-test/r/ctype_binary.result
mysql-test/t/ctype_binary.test
Adding tests
modified:
mysql-test/r/bigint.result
mysql-test/r/case.result
mysql-test/r/create.result
mysql-test/r/ctype_cp1251.result
mysql-test/r/ctype_latin1.result
mysql-test/r/ctype_ucs.result
mysql-test/r/func_gconcat.result
mysql-test/r/func_str.result
mysql-test/r/metadata.result
mysql-test/r/ps_1general.result
mysql-test/r/ps_2myisam.result
mysql-test/r/ps_3innodb.result
mysql-test/r/ps_4heap.result
mysql-test/r/ps_5merge.result
mysql-test/r/show_check.result
mysql-test/r/type_datetime.result
mysql-test/r/type_ranges.result
mysql-test/r/union.result
mysql-test/suite/ndb/r/ps_7ndb.result
mysql-test/t/ctype_cp1251.test
mysql-test/t/ctype_latin1.test
mysql-test/t/ctype_ucs.test
mysql-test/t/func_str.test
Fixing tests
@ sql/field.cc
- Return str result using my_charset_numeric.
- Using real multi-byte aware str_to_XXX functions
to handle tricky charset values propely (e.g. UCS2)
@ sql/field.h
- Changing derivation of non-string field types to DERIVATION_NUMERIC.
- Changing binary() for numeric/datetime fields to always
return TRUE even if charset is not my_charset_bin. We need
this to keep ha_base_keytype() return HA_KEYTYPE_BINARY.
- Adding BINARY_FLAG into some fields, because it's not
being set automatically anymore with
"my_charset_bin to my_charset_numeric" change.
- Changing derivation for numeric/datetime datatypes to a weaker
value, to make "SELECT concat('string', field)" use character
set of the string literal for the result of the function.
@ sql/item.cc
- Implementing generic val_str_ascii().
- Using max_char_length() instead of direct read of max_length
to make "tricky" charsets like UCS2 work.
NOTE: in the future we'll possibly remove all direct reads of max_length
- Fixing Item_num::safe_charset_converter().
Previously it alligned binary string to
character string (for example by adding leading 0x00
when doing binary->UCS2 conversion). Now it just
converts from my_charset_numbner to "tocs".
- Using val_str_ascii() in Item::get_time() to make UCS2 arguments work.
- Other misc changes
@ sql/item.h
- Changing MY_COLL_CMP_CONV and MY_COLL_ALLOW_CONV to
bit operations instead of hard-coded bit masks.
- Addding new method DTCollation.set_numeric().
- Adding new methods to Item.
- Adding helper functions to make code look nicer:
agg_item_charsets_for_string_result()
agg_item_charsets_for_comparison()
- Changing charset for Item_num-derived items
from my_charset_bin to my_charset_numeric
(which is an alias for latin1).
@ sql/item_cmpfunc.cc
- Using new helper functions
- Other misc changes
@ sql/item_cmpfunc.h
- Fixing strcmp() to return max_length=2.
Previously it returned 1, which was wrong,
because it did not fit '-1'.
@ sql/item_func.cc
- Using new helper functions
- Other minor changes
@ sql/item_func.h
- Removing unused functions
- Adding helper functions
agg_arg_charsets_for_string_result()
agg_arg_charsets_for_comparison()
- Adding set_numeric() into constructors of numeric items.
- Using fix_length_and_charset() and fix_char_length()
instead of direct write to max_length.
@ sql/item_geofunc.cc
- Changing class for Item_func_geometry_type and
Item_func_as_wkt from Item_str_func to
Item_str_ascii_func, to make them return UCS2 result
properly (when character_set_connection=ucs2).
@ sql/item_geofunc.h
- Changing class for Item_func_geometry_type and
Item_func_as_wkt from Item_str_func to
Item_str_ascii_func, to make them return UCS2 result
properly (when @@character_set_connection=ucs2).
@ sql/item_strfunc.cc
- Implementing Item_str_func::val_str().
- Renaming val_str to val_str_ascii for some items,
to make them work with UCS2 properly.
- Using new helper functions
- All single-argument functions that expect string
result now call this method:
agg_arg_charsets_for_string_result(collation, args, 1);
This enables character set conversion to @@character_set_connection
in case of pure numeric input.
@ sql/item_strfunc.h
- Introducing Item_str_ascii_func - for functions
which return pure ASCII data, for performance purposes,
as well as for the cases when the old implementation
of val_str() was heavily 8-bit oriented and implementing
a UCS2-aware version is tricky.
@ sql/item_sum.cc
- Using new helper functions.
@ sql/item_timefunc.cc
- Using my_charset_numeric instead of my_charset_bin.
- Using fix_char_length(), fix_length_and_charset()
and fix_length_and_charset_datetime()
instead of direct write to max_length.
- Using tricky-charset aware function str_to_time_with_warn()
@ sql/item_timefunc.h
- Using new helper functions for charset and length initialization.
- Changing base class for Item_func_get_format() to make
it return UCS2 properly (when character_set_connection=ucs2).
@ sql/item_xmlfunc.cc
- Using new helper function
@ sql/my_decimal.cc
- Adding a new DECIMAL to CHAR converter
with real multibyte support (e.g. UCS2)
@ sql/mysql_priv.h
- Introducing a new derivation level for numeric/datetime data types.
- Adding macros for my_charset_numeric and MY_REPERTOIRE_NUMERIC.
- Adding prototypes for str_set_decimal()
- Adding prototypes for character-set aware str_to_xxx() functions.
@ sql/protocol.cc
- Changing charsetnr to "binary" client-side metadata for
numeric/datetime data types.
@ sql/time.cc
- Adding to_ascii() helper function, to convert a string
in any character set to ascii representation. In the
future can be extended to understand digits written
in various non-Latin word scripts.
- Adding real multy-byte character set aware versions for str_to_XXXX,
to make these these type of queries work correct:
INSERT INTO t1 SET datetime_column=ucs2_expression;
@ strings/ctype-ucs2.c
- endptr was not calculated correctly. INSERTing of UCS2
values into numeric columns returned warnings about
truncated wrong data.
Diffstat (limited to 'sql/item.h')
-rw-r--r-- | sql/item.h | 131 |
1 files changed, 127 insertions, 4 deletions
diff --git a/sql/item.h b/sql/item.h index b7e6cc6c204..0bfb2673121 100644 --- a/sql/item.h +++ b/sql/item.h @@ -44,9 +44,10 @@ class Item_field; #define MY_COLL_ALLOW_SUPERSET_CONV 1 #define MY_COLL_ALLOW_COERCIBLE_CONV 2 -#define MY_COLL_ALLOW_CONV 3 #define MY_COLL_DISALLOW_NONE 4 -#define MY_COLL_CMP_CONV 7 + +#define MY_COLL_ALLOW_CONV (MY_COLL_ALLOW_SUPERSET_CONV | MY_COLL_ALLOW_COERCIBLE_CONV) +#define MY_COLL_CMP_CONV (MY_COLL_ALLOW_CONV | MY_COLL_DISALLOW_NONE) class DTCollation { public: @@ -91,6 +92,12 @@ public: derivation= derivation_arg; repertoire= repertoire_arg; } + void set_numeric() + { + collation= &my_charset_numeric; + derivation= DERIVATION_NUMERIC; + repertoire= MY_REPERTOIRE_NUMERIC; + } void set(CHARSET_INFO *collation_arg) { collation= collation_arg; @@ -105,6 +112,7 @@ public: { switch(derivation) { + case DERIVATION_NUMERIC: return "NUMERIC"; case DERIVATION_IGNORABLE: return "IGNORABLE"; case DERIVATION_COERCIBLE: return "COERCIBLE"; case DERIVATION_IMPLICIT: return "IMPLICIT"; @@ -690,6 +698,77 @@ public: If value is not null null_value flag will be reset to FALSE. */ virtual String *val_str(String *str)=0; + + /* + Returns string representation of this item in ASCII format. + + SYNOPSIS + val_str_ascii() + str - similar to val_str(); + + NOTE + This method is introduced for performance optimization purposes. + + 1. val_str() result of some Items in string context + depends on @@character_set_results. + @@character_set_results can be set to a "real multibyte" character + set like UCS2, UTF16, UTF32. (We'll use only UTF32 in the examples + below for convenience.) + + So the default string result of such functions + in these circumstances is real multi-byte character set, like UTF32. + + For example, all numbers in string context + return result in @@character_set_results: + + SELECT CONCAT(20010101); -> UTF32 + + We do sprintf() first (to get ASCII representation) + and then convert to UTF32; + + So these kind "data sources" can use ASCII representation + internally, but return multi-byte data only because + @@character_set_results wants so. + Therefore, conversion from ASCII to UTF32 is applied internally. + + + 2. Some other functions need in fact ASCII input. + + For example, + inet_aton(), GeometryFromText(), Convert_TZ(), GET_FORMAT(). + + Similar, fields of certain type, like DATE, TIME, + when you insert string data into them, expect in fact ASCII input. + If they get non-ASCII input, for example UTF32, they + convert input from UTF32 to ASCII, and then use ASCII + representation to do further processing. + + + 3. Now imagine we pass result of a data source of the first type + to a data destination of the second type. + + What happens: + a. data source converts data from ASCII to UTF32, because + @@character_set_results wants so and passes the result to + data destination. + b. data destination gets UTF32 string. + c. data destination converts UTF32 string to ASCII, + because it needs ASCII representation to be able to handle data + correctly. + + As a result we get two steps of unnecessary conversion: + From ASCII to UTF32, then from UTF32 to ASCII. + + A better way to handle these situations is to pass ASCII + representation directly from the source to the destination. + + This is why val_str_ascii() introduced. + + RETURN + Similar to val_str() + */ + virtual String *val_str_ascii(String *str); + /* Return decimal representation of item with fixed point. @@ -864,6 +943,16 @@ public: static CHARSET_INFO *default_charset(); virtual CHARSET_INFO *compare_collation() { return NULL; } + /* + For backward compatibility, to make numeric + data types return "binary" charset in client-side metadata. + */ + virtual CHARSET_INFO *charset_for_protocol(void) const + { + return result_type() == STRING_RESULT ? collation.collation : + &my_charset_bin; + }; + virtual bool walk(Item_processor processor, bool walk_subquery, uchar *arg) { return (this->*processor)(arg); @@ -1069,6 +1158,20 @@ public: { return Field::GEOM_GEOMETRY; }; String *check_well_formed_result(String *str, bool send_error= 0); bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); + uint32 max_char_length() const + { return max_length / collation.collation->mbmaxlen; } + void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs) + { + max_length= max_char_length_arg * cs->mbmaxlen; + collation.collation= cs; + } + void fix_char_length(uint32 max_char_length_arg) + { max_length= max_char_length_arg * collation.collation->mbmaxlen; } + void fix_length_and_charset_datetime(uint32 max_char_length_arg) + { + collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII); + fix_char_length(max_char_length_arg); + } }; @@ -1371,12 +1474,30 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname, Item **args, uint nargs, uint flags, int item_sep); bool agg_item_charsets(DTCollation &c, const char *name, Item **items, uint nitems, uint flags, int item_sep); - +inline bool +agg_item_charsets_for_string_result(DTCollation &c, const char *name, + Item **items, uint nitems, + int item_sep= 1) +{ + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV; + return agg_item_charsets(c, name, items, nitems, flags, item_sep); +} +inline bool +agg_item_charsets_for_comparison(DTCollation &c, const char *name, + Item **items, uint nitems, + int item_sep= 1) +{ + uint flags= MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV | + MY_COLL_DISALLOW_NONE; + return agg_item_charsets(c, name, items, nitems, flags, item_sep); +} class Item_num: public Item_basic_constant { public: - Item_num() {} /* Remove gcc warning */ + Item_num() { collation.set_numeric(); } /* Remove gcc warning */ virtual Item_num *neg()= 0; Item *safe_charset_converter(CHARSET_INFO *tocs); bool check_partition_func_processor(uchar *int_arg) { return FALSE;} @@ -1561,6 +1682,8 @@ public: DBUG_ASSERT(field_type() == MYSQL_TYPE_GEOMETRY); return field->get_geometry_type(); } + CHARSET_INFO *charset_for_protocol(void) const + { return field->charset_for_protocol(); } friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; |