summaryrefslogtreecommitdiff
path: root/sql/item.h
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2010-02-11 08:17:25 +0400
committerAlexander Barkov <bar@mysql.com>2010-02-11 08:17:25 +0400
commit702166bcdec5705dd90d8567a88056893433c719 (patch)
treed6306efc9e4d7e6bc6bf4ae5a58bd6aec40420e9 /sql/item.h
parent6dd93757263fd219fcbd3d1ed100a681ecbc7c92 (diff)
downloadmariadb-git-702166bcdec5705dd90d8567a88056893433c719.tar.gz
WL#2649 Number-to-string conversions
added: include/ctype_numconv.inc mysql-test/include/ctype_numconv.inc mysql-test/r/ctype_binary.result mysql-test/t/ctype_binary.test Adding tests modified: mysql-test/r/bigint.result mysql-test/r/case.result mysql-test/r/create.result mysql-test/r/ctype_cp1251.result mysql-test/r/ctype_latin1.result mysql-test/r/ctype_ucs.result mysql-test/r/func_gconcat.result mysql-test/r/func_str.result mysql-test/r/metadata.result mysql-test/r/ps_1general.result mysql-test/r/ps_2myisam.result mysql-test/r/ps_3innodb.result mysql-test/r/ps_4heap.result mysql-test/r/ps_5merge.result mysql-test/r/show_check.result mysql-test/r/type_datetime.result mysql-test/r/type_ranges.result mysql-test/r/union.result mysql-test/suite/ndb/r/ps_7ndb.result mysql-test/t/ctype_cp1251.test mysql-test/t/ctype_latin1.test mysql-test/t/ctype_ucs.test mysql-test/t/func_str.test Fixing tests @ sql/field.cc - Return str result using my_charset_numeric. - Using real multi-byte aware str_to_XXX functions to handle tricky charset values propely (e.g. UCS2) @ sql/field.h - Changing derivation of non-string field types to DERIVATION_NUMERIC. - Changing binary() for numeric/datetime fields to always return TRUE even if charset is not my_charset_bin. We need this to keep ha_base_keytype() return HA_KEYTYPE_BINARY. - Adding BINARY_FLAG into some fields, because it's not being set automatically anymore with "my_charset_bin to my_charset_numeric" change. - Changing derivation for numeric/datetime datatypes to a weaker value, to make "SELECT concat('string', field)" use character set of the string literal for the result of the function. @ sql/item.cc - Implementing generic val_str_ascii(). - Using max_char_length() instead of direct read of max_length to make "tricky" charsets like UCS2 work. NOTE: in the future we'll possibly remove all direct reads of max_length - Fixing Item_num::safe_charset_converter(). Previously it alligned binary string to character string (for example by adding leading 0x00 when doing binary->UCS2 conversion). Now it just converts from my_charset_numbner to "tocs". - Using val_str_ascii() in Item::get_time() to make UCS2 arguments work. - Other misc changes @ sql/item.h - Changing MY_COLL_CMP_CONV and MY_COLL_ALLOW_CONV to bit operations instead of hard-coded bit masks. - Addding new method DTCollation.set_numeric(). - Adding new methods to Item. - Adding helper functions to make code look nicer: agg_item_charsets_for_string_result() agg_item_charsets_for_comparison() - Changing charset for Item_num-derived items from my_charset_bin to my_charset_numeric (which is an alias for latin1). @ sql/item_cmpfunc.cc - Using new helper functions - Other misc changes @ sql/item_cmpfunc.h - Fixing strcmp() to return max_length=2. Previously it returned 1, which was wrong, because it did not fit '-1'. @ sql/item_func.cc - Using new helper functions - Other minor changes @ sql/item_func.h - Removing unused functions - Adding helper functions agg_arg_charsets_for_string_result() agg_arg_charsets_for_comparison() - Adding set_numeric() into constructors of numeric items. - Using fix_length_and_charset() and fix_char_length() instead of direct write to max_length. @ sql/item_geofunc.cc - Changing class for Item_func_geometry_type and Item_func_as_wkt from Item_str_func to Item_str_ascii_func, to make them return UCS2 result properly (when character_set_connection=ucs2). @ sql/item_geofunc.h - Changing class for Item_func_geometry_type and Item_func_as_wkt from Item_str_func to Item_str_ascii_func, to make them return UCS2 result properly (when @@character_set_connection=ucs2). @ sql/item_strfunc.cc - Implementing Item_str_func::val_str(). - Renaming val_str to val_str_ascii for some items, to make them work with UCS2 properly. - Using new helper functions - All single-argument functions that expect string result now call this method: agg_arg_charsets_for_string_result(collation, args, 1); This enables character set conversion to @@character_set_connection in case of pure numeric input. @ sql/item_strfunc.h - Introducing Item_str_ascii_func - for functions which return pure ASCII data, for performance purposes, as well as for the cases when the old implementation of val_str() was heavily 8-bit oriented and implementing a UCS2-aware version is tricky. @ sql/item_sum.cc - Using new helper functions. @ sql/item_timefunc.cc - Using my_charset_numeric instead of my_charset_bin. - Using fix_char_length(), fix_length_and_charset() and fix_length_and_charset_datetime() instead of direct write to max_length. - Using tricky-charset aware function str_to_time_with_warn() @ sql/item_timefunc.h - Using new helper functions for charset and length initialization. - Changing base class for Item_func_get_format() to make it return UCS2 properly (when character_set_connection=ucs2). @ sql/item_xmlfunc.cc - Using new helper function @ sql/my_decimal.cc - Adding a new DECIMAL to CHAR converter with real multibyte support (e.g. UCS2) @ sql/mysql_priv.h - Introducing a new derivation level for numeric/datetime data types. - Adding macros for my_charset_numeric and MY_REPERTOIRE_NUMERIC. - Adding prototypes for str_set_decimal() - Adding prototypes for character-set aware str_to_xxx() functions. @ sql/protocol.cc - Changing charsetnr to "binary" client-side metadata for numeric/datetime data types. @ sql/time.cc - Adding to_ascii() helper function, to convert a string in any character set to ascii representation. In the future can be extended to understand digits written in various non-Latin word scripts. - Adding real multy-byte character set aware versions for str_to_XXXX, to make these these type of queries work correct: INSERT INTO t1 SET datetime_column=ucs2_expression; @ strings/ctype-ucs2.c - endptr was not calculated correctly. INSERTing of UCS2 values into numeric columns returned warnings about truncated wrong data.
Diffstat (limited to 'sql/item.h')
-rw-r--r--sql/item.h131
1 files changed, 127 insertions, 4 deletions
diff --git a/sql/item.h b/sql/item.h
index b7e6cc6c204..0bfb2673121 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -44,9 +44,10 @@ class Item_field;
#define MY_COLL_ALLOW_SUPERSET_CONV 1
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
-#define MY_COLL_ALLOW_CONV 3
#define MY_COLL_DISALLOW_NONE 4
-#define MY_COLL_CMP_CONV 7
+
+#define MY_COLL_ALLOW_CONV (MY_COLL_ALLOW_SUPERSET_CONV | MY_COLL_ALLOW_COERCIBLE_CONV)
+#define MY_COLL_CMP_CONV (MY_COLL_ALLOW_CONV | MY_COLL_DISALLOW_NONE)
class DTCollation {
public:
@@ -91,6 +92,12 @@ public:
derivation= derivation_arg;
repertoire= repertoire_arg;
}
+ void set_numeric()
+ {
+ collation= &my_charset_numeric;
+ derivation= DERIVATION_NUMERIC;
+ repertoire= MY_REPERTOIRE_NUMERIC;
+ }
void set(CHARSET_INFO *collation_arg)
{
collation= collation_arg;
@@ -105,6 +112,7 @@ public:
{
switch(derivation)
{
+ case DERIVATION_NUMERIC: return "NUMERIC";
case DERIVATION_IGNORABLE: return "IGNORABLE";
case DERIVATION_COERCIBLE: return "COERCIBLE";
case DERIVATION_IMPLICIT: return "IMPLICIT";
@@ -690,6 +698,77 @@ public:
If value is not null null_value flag will be reset to FALSE.
*/
virtual String *val_str(String *str)=0;
+
+ /*
+ Returns string representation of this item in ASCII format.
+
+ SYNOPSIS
+ val_str_ascii()
+ str - similar to val_str();
+
+ NOTE
+ This method is introduced for performance optimization purposes.
+
+ 1. val_str() result of some Items in string context
+ depends on @@character_set_results.
+ @@character_set_results can be set to a "real multibyte" character
+ set like UCS2, UTF16, UTF32. (We'll use only UTF32 in the examples
+ below for convenience.)
+
+ So the default string result of such functions
+ in these circumstances is real multi-byte character set, like UTF32.
+
+ For example, all numbers in string context
+ return result in @@character_set_results:
+
+ SELECT CONCAT(20010101); -> UTF32
+
+ We do sprintf() first (to get ASCII representation)
+ and then convert to UTF32;
+
+ So these kind "data sources" can use ASCII representation
+ internally, but return multi-byte data only because
+ @@character_set_results wants so.
+ Therefore, conversion from ASCII to UTF32 is applied internally.
+
+
+ 2. Some other functions need in fact ASCII input.
+
+ For example,
+ inet_aton(), GeometryFromText(), Convert_TZ(), GET_FORMAT().
+
+ Similar, fields of certain type, like DATE, TIME,
+ when you insert string data into them, expect in fact ASCII input.
+ If they get non-ASCII input, for example UTF32, they
+ convert input from UTF32 to ASCII, and then use ASCII
+ representation to do further processing.
+
+
+ 3. Now imagine we pass result of a data source of the first type
+ to a data destination of the second type.
+
+ What happens:
+ a. data source converts data from ASCII to UTF32, because
+ @@character_set_results wants so and passes the result to
+ data destination.
+ b. data destination gets UTF32 string.
+ c. data destination converts UTF32 string to ASCII,
+ because it needs ASCII representation to be able to handle data
+ correctly.
+
+ As a result we get two steps of unnecessary conversion:
+ From ASCII to UTF32, then from UTF32 to ASCII.
+
+ A better way to handle these situations is to pass ASCII
+ representation directly from the source to the destination.
+
+ This is why val_str_ascii() introduced.
+
+ RETURN
+ Similar to val_str()
+ */
+ virtual String *val_str_ascii(String *str);
+
/*
Return decimal representation of item with fixed point.
@@ -864,6 +943,16 @@ public:
static CHARSET_INFO *default_charset();
virtual CHARSET_INFO *compare_collation() { return NULL; }
+ /*
+ For backward compatibility, to make numeric
+ data types return "binary" charset in client-side metadata.
+ */
+ virtual CHARSET_INFO *charset_for_protocol(void) const
+ {
+ return result_type() == STRING_RESULT ? collation.collation :
+ &my_charset_bin;
+ };
+
virtual bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
{
return (this->*processor)(arg);
@@ -1069,6 +1158,20 @@ public:
{ return Field::GEOM_GEOMETRY; };
String *check_well_formed_result(String *str, bool send_error= 0);
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
+ uint32 max_char_length() const
+ { return max_length / collation.collation->mbmaxlen; }
+ void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs)
+ {
+ max_length= max_char_length_arg * cs->mbmaxlen;
+ collation.collation= cs;
+ }
+ void fix_char_length(uint32 max_char_length_arg)
+ { max_length= max_char_length_arg * collation.collation->mbmaxlen; }
+ void fix_length_and_charset_datetime(uint32 max_char_length_arg)
+ {
+ collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII);
+ fix_char_length(max_char_length_arg);
+ }
};
@@ -1371,12 +1474,30 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
Item **args, uint nargs, uint flags, int item_sep);
bool agg_item_charsets(DTCollation &c, const char *name,
Item **items, uint nitems, uint flags, int item_sep);
-
+inline bool
+agg_item_charsets_for_string_result(DTCollation &c, const char *name,
+ Item **items, uint nitems,
+ int item_sep= 1)
+{
+ uint flags= MY_COLL_ALLOW_SUPERSET_CONV |
+ MY_COLL_ALLOW_COERCIBLE_CONV;
+ return agg_item_charsets(c, name, items, nitems, flags, item_sep);
+}
+inline bool
+agg_item_charsets_for_comparison(DTCollation &c, const char *name,
+ Item **items, uint nitems,
+ int item_sep= 1)
+{
+ uint flags= MY_COLL_ALLOW_SUPERSET_CONV |
+ MY_COLL_ALLOW_COERCIBLE_CONV |
+ MY_COLL_DISALLOW_NONE;
+ return agg_item_charsets(c, name, items, nitems, flags, item_sep);
+}
class Item_num: public Item_basic_constant
{
public:
- Item_num() {} /* Remove gcc warning */
+ Item_num() { collation.set_numeric(); } /* Remove gcc warning */
virtual Item_num *neg()= 0;
Item *safe_charset_converter(CHARSET_INFO *tocs);
bool check_partition_func_processor(uchar *int_arg) { return FALSE;}
@@ -1561,6 +1682,8 @@ public:
DBUG_ASSERT(field_type() == MYSQL_TYPE_GEOMETRY);
return field->get_geometry_type();
}
+ CHARSET_INFO *charset_for_protocol(void) const
+ { return field->charset_for_protocol(); }
friend class Item_default_value;
friend class Item_insert_value;
friend class st_select_lex_unit;