From 1536bc4664a248faf81c62326fe1be3dbe18b8cd Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Mon, 13 Jan 2014 14:05:05 -0500
Subject: - The MySQL CAST compilation now takes into account aspects of a
 string type such as "charset" and "collation".  While MySQL wants all
 character- based CAST calls to use the CHAR type, we now create a real CHAR
 object at CAST time and copy over all the parameters it has, so that an
 expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will render
 ``CAST(t.col AS CHAR CHARACTER SET utf8)``.

- Added new "unicode returns" detection to the MySQL dialect and
to the default dialect system overall, such that any dialect
can add extra "tests" to the on-first-connect "does this DBAPI
return unicode directly?" detection. In this case, we are
adding a check specifically against the "utf8" encoding with
an explicit "utf8_bin" collation type (after checking that
this collation is available) to test for some buggy unicode
behavior observed with MySQLdb version 1.2.3.  While MySQLdb
has resolved this issue as of 1.2.4, the check here should
guard against regressions.  The change also allows the "unicode"
checks to log in the engine logs, which was not previously
the case. [ticket:2906]
---
 lib/sqlalchemy/connectors/mysqldb.py     |  1 +
 lib/sqlalchemy/dialects/mysql/base.py    | 34 +++++++++++++++------
 lib/sqlalchemy/dialects/mysql/mysqldb.py | 26 +++++++++++++++-
 lib/sqlalchemy/engine/default.py         | 51 +++++++++++++++++++-------------
 4 files changed, 81 insertions(+), 31 deletions(-)

(limited to 'lib/sqlalchemy')

diff --git a/lib/sqlalchemy/connectors/mysqldb.py b/lib/sqlalchemy/connectors/mysqldb.py
index 0f250dfdb..33e59218b 100644
--- a/lib/sqlalchemy/connectors/mysqldb.py
+++ b/lib/sqlalchemy/connectors/mysqldb.py
@@ -62,6 +62,7 @@ class MySQLDBConnector(Connector):
         # is overridden when pymysql is used
         return __import__('MySQLdb')
 
+
     def do_executemany(self, cursor, statement, parameters, context=None):
         rowcount = cursor.executemany(statement, parameters)
         if context is not None:
diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py
index a3942e89c..22675e592 100644
--- a/lib/sqlalchemy/dialects/mysql/base.py
+++ b/lib/sqlalchemy/dialects/mysql/base.py
@@ -976,6 +976,25 @@ class CHAR(_StringType, sqltypes.CHAR):
         """
         super(CHAR, self).__init__(length=length, **kwargs)
 
+    @classmethod
+    def _adapt_string_for_cast(self, type_):
+        # copy the given string type into a CHAR
+        # for the purposes of rendering a CAST expression
+        type_ = sqltypes.to_instance(type_)
+        if isinstance(type_, sqltypes.CHAR):
+            return type_
+        elif isinstance(type_, _StringType):
+            return CHAR(
+                length=type_.length,
+                charset=type_.charset,
+                collation=type_.collation,
+                ascii=type_.ascii,
+                binary=type_.binary,
+                unicode=type_.unicode,
+                national=False # not supported in CAST
+            )
+        else:
+            return CHAR(length=type_.length)
 
 class NVARCHAR(_StringType, sqltypes.NVARCHAR):
     """MySQL NVARCHAR type.
@@ -1397,14 +1416,9 @@ class MySQLCompiler(compiler.SQLCompiler):
         elif isinstance(type_, (sqltypes.DECIMAL, sqltypes.DateTime,
                                             sqltypes.Date, sqltypes.Time)):
             return self.dialect.type_compiler.process(type_)
-        elif isinstance(type_, sqltypes.Text):
-            return 'CHAR'
-        elif (isinstance(type_, sqltypes.String) and not
-              isinstance(type_, (ENUM, SET))):
-            if getattr(type_, 'length'):
-                return 'CHAR(%s)' % type_.length
-            else:
-                return 'CHAR'
+        elif isinstance(type_, sqltypes.String) and not isinstance(type_, (ENUM, SET)):
+            adapted = CHAR._adapt_string_for_cast(type_)
+            return self.dialect.type_compiler.process(adapted)
         elif isinstance(type_, sqltypes._Binary):
             return 'BINARY'
         elif isinstance(type_, sqltypes.NUMERIC):
@@ -2165,7 +2179,6 @@ class MySQLDialect(default.DefaultDialect):
                 rs.close()
 
     def initialize(self, connection):
-        default.DefaultDialect.initialize(self, connection)
         self._connection_charset = self._detect_charset(connection)
         self._detect_ansiquotes(connection)
         if self._server_ansiquotes:
@@ -2174,6 +2187,8 @@ class MySQLDialect(default.DefaultDialect):
             self.identifier_preparer = self.preparer(self,
                                 server_ansiquotes=self._server_ansiquotes)
 
+        default.DefaultDialect.initialize(self, connection)
+
     @property
     def _supports_cast(self):
         return self.server_version_info is None or \
@@ -2443,6 +2458,7 @@ class MySQLDialect(default.DefaultDialect):
         # as of MySQL 5.0.1
         self._backslash_escapes = 'NO_BACKSLASH_ESCAPES' not in mode
 
+
     def _show_create_table(self, connection, table, charset=None,
                            full_name=None):
         """Run SHOW CREATE TABLE for a ``Table``."""
diff --git a/lib/sqlalchemy/dialects/mysql/mysqldb.py b/lib/sqlalchemy/dialects/mysql/mysqldb.py
index c6942ae2d..84e8299d5 100644
--- a/lib/sqlalchemy/dialects/mysql/mysqldb.py
+++ b/lib/sqlalchemy/dialects/mysql/mysqldb.py
@@ -56,7 +56,8 @@ from ...connectors.mysqldb import (
                         MySQLDBIdentifierPreparer,
                         MySQLDBConnector
                     )
-
+from .base import TEXT
+from ... import sql
 
 class MySQLExecutionContext_mysqldb(MySQLDBExecutionContext, MySQLExecutionContext):
     pass
@@ -75,4 +76,27 @@ class MySQLDialect_mysqldb(MySQLDBConnector, MySQLDialect):
     statement_compiler = MySQLCompiler_mysqldb
     preparer = MySQLIdentifierPreparer_mysqldb
 
+    def _check_unicode_returns(self, connection):
+        # work around issue fixed in
+        # https://github.com/farcepest/MySQLdb1/commit/cd44524fef63bd3fcb71947392326e9742d520e8
+        # specific issue w/ the utf8_bin collation and unicode returns
+
+        has_utf8_bin = connection.scalar(
+                                "show collation where %s = 'utf8' and %s = 'utf8_bin'"
+                                    % (
+                                    self.identifier_preparer.quote("Charset"),
+                                    self.identifier_preparer.quote("Collation")
+                                ))
+        if has_utf8_bin:
+            additional_tests = [
+                sql.collate(sql.cast(
+                        sql.literal_column(
+                            "'test collated returns'"),
+                            TEXT(charset='utf8')), "utf8_bin")
+            ]
+        else:
+            additional_tests = []
+        return super(MySQLDBConnector, self)._check_unicode_returns(
+                            connection, additional_tests)
+
 dialect = MySQLDialect_mysqldb
diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py
index 509d772aa..bcb9960b1 100644
--- a/lib/sqlalchemy/engine/default.py
+++ b/lib/sqlalchemy/engine/default.py
@@ -228,46 +228,55 @@ class DefaultDialect(interfaces.Dialect):
         """
         return None
 
-    def _check_unicode_returns(self, connection):
+    def _check_unicode_returns(self, connection, additional_tests=None):
         if util.py2k and not self.supports_unicode_statements:
             cast_to = util.binary_type
         else:
             cast_to = util.text_type
 
-        def check_unicode(formatstr, type_):
+        if self.positional:
+            parameters = self.execute_sequence_format()
+        else:
+            parameters = {}
+
+        def check_unicode(test):
             cursor = connection.connection.cursor()
             try:
                 try:
-                    cursor.execute(
-                        cast_to(
-                            expression.select(
-                                [expression.cast(
-                                    expression.literal_column(
-                                        "'test %s returns'" % formatstr),
-                                        type_)
-                            ]).compile(dialect=self)
-                        )
-                    )
+                    statement = cast_to(expression.select([test]).compile(dialect=self))
+                    connection._cursor_execute(cursor, statement, parameters)
                     row = cursor.fetchone()
 
                     return isinstance(row[0], util.text_type)
-                except self.dbapi.Error as de:
+                except exc.DBAPIError as de:
                     util.warn("Exception attempting to "
                             "detect unicode returns: %r" % de)
                     return False
             finally:
                 cursor.close()
 
-        # detect plain VARCHAR
-        unicode_for_varchar = check_unicode("plain", sqltypes.VARCHAR(60))
-
-        # detect if there's an NVARCHAR type with different behavior available
-        unicode_for_unicode = check_unicode("unicode", sqltypes.Unicode(60))
-
-        if unicode_for_unicode and not unicode_for_varchar:
+        tests = [
+            # detect plain VARCHAR
+            expression.cast(
+                expression.literal_column("'test plain returns'"),
+                sqltypes.VARCHAR(60)
+            ),
+            # detect if there's an NVARCHAR type with different behavior available
+            expression.cast(
+                expression.literal_column("'test unicode returns'"),
+                sqltypes.Unicode(60)
+            ),
+        ]
+
+        if additional_tests:
+            tests += additional_tests
+
+        results = set([check_unicode(test) for test in tests])
+
+        if results.issuperset([True, False]):
             return "conditional"
         else:
-            return unicode_for_varchar
+            return results == set([True])
 
     def _check_unicode_description(self, connection):
         # all DBAPIs on Py2K return cursor.description as encoded,
-- 
cgit v1.2.1