From 1536bc4664a248faf81c62326fe1be3dbe18b8cd Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 13 Jan 2014 14:05:05 -0500 Subject: - The MySQL CAST compilation now takes into account aspects of a string type such as "charset" and "collation". While MySQL wants all character- based CAST calls to use the CHAR type, we now create a real CHAR object at CAST time and copy over all the parameters it has, so that an expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will render ``CAST(t.col AS CHAR CHARACTER SET utf8)``. - Added new "unicode returns" detection to the MySQL dialect and to the default dialect system overall, such that any dialect can add extra "tests" to the on-first-connect "does this DBAPI return unicode directly?" detection. In this case, we are adding a check specifically against the "utf8" encoding with an explicit "utf8_bin" collation type (after checking that this collation is available) to test for some buggy unicode behavior observed with MySQLdb version 1.2.3. While MySQLdb has resolved this issue as of 1.2.4, the check here should guard against regressions. The change also allows the "unicode" checks to log in the engine logs, which was not previously the case. [ticket:2906] --- lib/sqlalchemy/connectors/mysqldb.py | 1 + lib/sqlalchemy/dialects/mysql/base.py | 34 +++++++++++++++------ lib/sqlalchemy/dialects/mysql/mysqldb.py | 26 +++++++++++++++- lib/sqlalchemy/engine/default.py | 51 +++++++++++++++++++------------- 4 files changed, 81 insertions(+), 31 deletions(-) (limited to 'lib/sqlalchemy') diff --git a/lib/sqlalchemy/connectors/mysqldb.py b/lib/sqlalchemy/connectors/mysqldb.py index 0f250dfdb..33e59218b 100644 --- a/lib/sqlalchemy/connectors/mysqldb.py +++ b/lib/sqlalchemy/connectors/mysqldb.py @@ -62,6 +62,7 @@ class MySQLDBConnector(Connector): # is overridden when pymysql is used return __import__('MySQLdb') + def do_executemany(self, cursor, statement, parameters, context=None): rowcount = cursor.executemany(statement, parameters) if context is not None: diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index a3942e89c..22675e592 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -976,6 +976,25 @@ class CHAR(_StringType, sqltypes.CHAR): """ super(CHAR, self).__init__(length=length, **kwargs) + @classmethod + def _adapt_string_for_cast(self, type_): + # copy the given string type into a CHAR + # for the purposes of rendering a CAST expression + type_ = sqltypes.to_instance(type_) + if isinstance(type_, sqltypes.CHAR): + return type_ + elif isinstance(type_, _StringType): + return CHAR( + length=type_.length, + charset=type_.charset, + collation=type_.collation, + ascii=type_.ascii, + binary=type_.binary, + unicode=type_.unicode, + national=False # not supported in CAST + ) + else: + return CHAR(length=type_.length) class NVARCHAR(_StringType, sqltypes.NVARCHAR): """MySQL NVARCHAR type. @@ -1397,14 +1416,9 @@ class MySQLCompiler(compiler.SQLCompiler): elif isinstance(type_, (sqltypes.DECIMAL, sqltypes.DateTime, sqltypes.Date, sqltypes.Time)): return self.dialect.type_compiler.process(type_) - elif isinstance(type_, sqltypes.Text): - return 'CHAR' - elif (isinstance(type_, sqltypes.String) and not - isinstance(type_, (ENUM, SET))): - if getattr(type_, 'length'): - return 'CHAR(%s)' % type_.length - else: - return 'CHAR' + elif isinstance(type_, sqltypes.String) and not isinstance(type_, (ENUM, SET)): + adapted = CHAR._adapt_string_for_cast(type_) + return self.dialect.type_compiler.process(adapted) elif isinstance(type_, sqltypes._Binary): return 'BINARY' elif isinstance(type_, sqltypes.NUMERIC): @@ -2165,7 +2179,6 @@ class MySQLDialect(default.DefaultDialect): rs.close() def initialize(self, connection): - default.DefaultDialect.initialize(self, connection) self._connection_charset = self._detect_charset(connection) self._detect_ansiquotes(connection) if self._server_ansiquotes: @@ -2174,6 +2187,8 @@ class MySQLDialect(default.DefaultDialect): self.identifier_preparer = self.preparer(self, server_ansiquotes=self._server_ansiquotes) + default.DefaultDialect.initialize(self, connection) + @property def _supports_cast(self): return self.server_version_info is None or \ @@ -2443,6 +2458,7 @@ class MySQLDialect(default.DefaultDialect): # as of MySQL 5.0.1 self._backslash_escapes = 'NO_BACKSLASH_ESCAPES' not in mode + def _show_create_table(self, connection, table, charset=None, full_name=None): """Run SHOW CREATE TABLE for a ``Table``.""" diff --git a/lib/sqlalchemy/dialects/mysql/mysqldb.py b/lib/sqlalchemy/dialects/mysql/mysqldb.py index c6942ae2d..84e8299d5 100644 --- a/lib/sqlalchemy/dialects/mysql/mysqldb.py +++ b/lib/sqlalchemy/dialects/mysql/mysqldb.py @@ -56,7 +56,8 @@ from ...connectors.mysqldb import ( MySQLDBIdentifierPreparer, MySQLDBConnector ) - +from .base import TEXT +from ... import sql class MySQLExecutionContext_mysqldb(MySQLDBExecutionContext, MySQLExecutionContext): pass @@ -75,4 +76,27 @@ class MySQLDialect_mysqldb(MySQLDBConnector, MySQLDialect): statement_compiler = MySQLCompiler_mysqldb preparer = MySQLIdentifierPreparer_mysqldb + def _check_unicode_returns(self, connection): + # work around issue fixed in + # https://github.com/farcepest/MySQLdb1/commit/cd44524fef63bd3fcb71947392326e9742d520e8 + # specific issue w/ the utf8_bin collation and unicode returns + + has_utf8_bin = connection.scalar( + "show collation where %s = 'utf8' and %s = 'utf8_bin'" + % ( + self.identifier_preparer.quote("Charset"), + self.identifier_preparer.quote("Collation") + )) + if has_utf8_bin: + additional_tests = [ + sql.collate(sql.cast( + sql.literal_column( + "'test collated returns'"), + TEXT(charset='utf8')), "utf8_bin") + ] + else: + additional_tests = [] + return super(MySQLDBConnector, self)._check_unicode_returns( + connection, additional_tests) + dialect = MySQLDialect_mysqldb diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index 509d772aa..bcb9960b1 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -228,46 +228,55 @@ class DefaultDialect(interfaces.Dialect): """ return None - def _check_unicode_returns(self, connection): + def _check_unicode_returns(self, connection, additional_tests=None): if util.py2k and not self.supports_unicode_statements: cast_to = util.binary_type else: cast_to = util.text_type - def check_unicode(formatstr, type_): + if self.positional: + parameters = self.execute_sequence_format() + else: + parameters = {} + + def check_unicode(test): cursor = connection.connection.cursor() try: try: - cursor.execute( - cast_to( - expression.select( - [expression.cast( - expression.literal_column( - "'test %s returns'" % formatstr), - type_) - ]).compile(dialect=self) - ) - ) + statement = cast_to(expression.select([test]).compile(dialect=self)) + connection._cursor_execute(cursor, statement, parameters) row = cursor.fetchone() return isinstance(row[0], util.text_type) - except self.dbapi.Error as de: + except exc.DBAPIError as de: util.warn("Exception attempting to " "detect unicode returns: %r" % de) return False finally: cursor.close() - # detect plain VARCHAR - unicode_for_varchar = check_unicode("plain", sqltypes.VARCHAR(60)) - - # detect if there's an NVARCHAR type with different behavior available - unicode_for_unicode = check_unicode("unicode", sqltypes.Unicode(60)) - - if unicode_for_unicode and not unicode_for_varchar: + tests = [ + # detect plain VARCHAR + expression.cast( + expression.literal_column("'test plain returns'"), + sqltypes.VARCHAR(60) + ), + # detect if there's an NVARCHAR type with different behavior available + expression.cast( + expression.literal_column("'test unicode returns'"), + sqltypes.Unicode(60) + ), + ] + + if additional_tests: + tests += additional_tests + + results = set([check_unicode(test) for test in tests]) + + if results.issuperset([True, False]): return "conditional" else: - return unicode_for_varchar + return results == set([True]) def _check_unicode_description(self, connection): # all DBAPIs on Py2K return cursor.description as encoded, -- cgit v1.2.1