summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2014-01-17 17:36:43 -0500
committerMike Bayer <mike_mp@zzzcomputing.com>2014-01-17 17:36:43 -0500
commit882f615c68cd2d244a8d2cf480f3532a84bdb6fa (patch)
tree546c82bc04351bca317f570f1a696ebc3ae5674e
parent4765895d10ff4bc89f30c99fa709438fa9764b6c (diff)
downloadsqlalchemy-882f615c68cd2d244a8d2cf480f3532a84bdb6fa.tar.gz
- rework Oracle to no longer do its own unicode conversion; this has been observed
to be very slow. this now has the effect of producing "conditional" unicode conversion for the Oracle backend, as it still returns NVARCHAR etc. as unicode [ticket:2911] - add new "conditional" functionality to unicode processors; the C-level function now uses PyUnicode_Check() as a fast alternative to the isinstance() check in Python
-rw-r--r--doc/build/changelog/changelog_09.rst37
-rw-r--r--lib/sqlalchemy/cextension/processors.c41
-rw-r--r--lib/sqlalchemy/dialects/oracle/cx_oracle.py3
-rw-r--r--lib/sqlalchemy/processors.py23
-rw-r--r--lib/sqlalchemy/sql/sqltypes.py17
5 files changed, 104 insertions, 17 deletions
diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst
index cfb4a5b24..369eb6c42 100644
--- a/doc/build/changelog/changelog_09.rst
+++ b/doc/build/changelog/changelog_09.rst
@@ -15,6 +15,43 @@
:version: 0.9.2
.. change::
+ :tags: bug, oracle
+ :tickets: 2911
+
+ It's been observed that the usage of a cx_Oracle "outputtypehandler"
+ in Python 2.xx in order to coerce string values to Unicode is inordinately
+ expensive; even though cx_Oracle is written in C, when you pass the
+ Python ``unicode`` primitive to cursor.var() and associate with an output
+ handler, the library counts every conversion as a Python function call
+ with all the requisite overhead being recorded; this *despite* the fact
+ when running in Python 3, all strings are also unconditionally coerced
+ to unicode but it does *not* incur this overhead,
+ meaning that cx_Oracle is failing to use performant techniques in Py2K.
+ As SQLAlchemy cannot easily select for this style of type handler on a
+ per-column basis, the handler was assembled unconditionally thereby
+ adding the overhead to all string access.
+
+ So this logic has been replaced with SQLAlchemy's own unicode
+ conversion system, which now
+ only takes effect in Py2K for columns that are requested as unicode.
+ When C extensions are used, SQLAlchemy's system appears to be 2-3x faster than
+ cx_Oracle's. Additionally, SQLAlchemy's unicode conversion has been
+ enhanced such that when the "conditional" converter is required
+ (now needed for the Oracle backend), the check for "already unicode" is now
+ performed in C and no longer introduces significant overhead.
+
+ This change has two impacts on the cx_Oracle backend. One is that
+ string values in Py2K which aren't specifically requested with the
+ Unicode type or convert_unicode=True will now come back as ``str``,
+ not ``unicode`` - this behavior is similar to a backend such as
+ MySQL. Additionally, when unicode values are requested with the cx_Oracle
+ backend, if the C extensions are *not* used, there is now an additional
+ overhead of an isinstance() check per column. This tradeoff has been
+ made as it can be worked around and no longer places a performance burden
+ on the likely majority of Oracle result columns that are non-unicode
+ strings.
+
+ .. change::
:tags: bug, orm
:tickets: 2908
diff --git a/lib/sqlalchemy/cextension/processors.c b/lib/sqlalchemy/cextension/processors.c
index c1e68fe0f..d56817763 100644
--- a/lib/sqlalchemy/cextension/processors.c
+++ b/lib/sqlalchemy/cextension/processors.c
@@ -409,6 +409,45 @@ UnicodeResultProcessor_process(UnicodeResultProcessor *self, PyObject *value)
return PyUnicode_Decode(str, len, encoding, errors);
}
+static PyObject *
+UnicodeResultProcessor_conditional_process(UnicodeResultProcessor *self, PyObject *value)
+{
+ const char *encoding, *errors;
+ char *str;
+ Py_ssize_t len;
+
+ if (value == Py_None)
+ Py_RETURN_NONE;
+
+#if PY_MAJOR_VERSION >= 3
+ if (PyUnicode_Check(value) == 1) {
+ Py_INCREF(value);
+ return value;
+ }
+
+ if (PyBytes_AsStringAndSize(value, &str, &len))
+ return NULL;
+
+ encoding = PyBytes_AS_STRING(self->encoding);
+ errors = PyBytes_AS_STRING(self->errors);
+#else
+
+ if (PyUnicode_Check(value) == 1) {
+ Py_INCREF(value);
+ return value;
+ }
+
+ if (PyString_AsStringAndSize(value, &str, &len))
+ return NULL;
+
+
+ encoding = PyString_AS_STRING(self->encoding);
+ errors = PyString_AS_STRING(self->errors);
+#endif
+
+ return PyUnicode_Decode(str, len, encoding, errors);
+}
+
static void
UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self)
{
@@ -424,6 +463,8 @@ UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self)
static PyMethodDef UnicodeResultProcessor_methods[] = {
{"process", (PyCFunction)UnicodeResultProcessor_process, METH_O,
"The value processor itself."},
+ {"conditional_process", (PyCFunction)UnicodeResultProcessor_conditional_process, METH_O,
+ "Conditional version of the value processor."},
{NULL} /* Sentinel */
};
diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
index c427e4bca..599eb21a3 100644
--- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py
+++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
@@ -748,9 +748,6 @@ class OracleDialect_cx_oracle(OracleDialect):
255,
outconverter=self._detect_decimal,
arraysize=cursor.arraysize)
- # allow all strings to come back natively as Unicode
- elif defaultType in (cx_Oracle.STRING, cx_Oracle.FIXED_CHAR):
- return cursor.var(util.text_type, size, cursor.arraysize)
def on_connect(conn):
conn.outputtypehandler = output_type_handler
diff --git a/lib/sqlalchemy/processors.py b/lib/sqlalchemy/processors.py
index 0abf063b3..d0f52e42b 100644
--- a/lib/sqlalchemy/processors.py
+++ b/lib/sqlalchemy/processors.py
@@ -15,6 +15,7 @@ They all share one common characteristic: None is passed through unchanged.
import codecs
import re
import datetime
+from . import util
def str_to_datetime_processor_factory(regexp, type_):
@@ -66,6 +67,21 @@ def py_fallback():
return decoder(value, errors)[0]
return process
+ def to_conditional_unicode_processor_factory(encoding, errors=None):
+ decoder = codecs.getdecoder(encoding)
+
+ def process(value):
+ if value is None:
+ return None
+ elif isinstance(value, util.text_type):
+ return value
+ else:
+ # decoder returns a tuple: (value, len). Simply dropping the
+ # len part is safe: it is done that way in the normal
+ # 'xx'.decode(encoding) code path.
+ return decoder(value, errors)[0]
+ return process
+
def to_decimal_processor_factory(target_class, scale):
fstring = "%%.%df" % scale
@@ -113,12 +129,17 @@ try:
str_to_date
def to_unicode_processor_factory(encoding, errors=None):
- # this is cumbersome but it would be even more so on the C side
if errors is not None:
return UnicodeResultProcessor(encoding, errors).process
else:
return UnicodeResultProcessor(encoding).process
+ def to_conditional_unicode_processor_factory(encoding, errors=None):
+ if errors is not None:
+ return UnicodeResultProcessor(encoding, errors).conditional_process
+ else:
+ return UnicodeResultProcessor(encoding).conditional_process
+
def to_decimal_processor_factory(target_class, scale):
# Note that the scale argument is not taken into account for integer
# values in the C implementation while it is in the Python one.
diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py
index 702e77360..0cc90f26b 100644
--- a/lib/sqlalchemy/sql/sqltypes.py
+++ b/lib/sqlalchemy/sql/sqltypes.py
@@ -204,20 +204,11 @@ class String(Concatenable, TypeEngine):
dialect.encoding, self.unicode_error)
if needs_isinstance:
- # we wouldn't be here unless convert_unicode='force'
- # was specified, or the driver has erratic unicode-returning
- # habits. since we will be getting back unicode
- # in most cases, we check for it (decode will fail).
- def process(value):
- if isinstance(value, util.text_type):
- return value
- else:
- return to_unicode(value)
- return process
+ return processors.to_conditional_unicode_processor_factory(
+ dialect.encoding, self.unicode_error)
else:
- # here, we assume that the object is not unicode,
- # avoiding expensive isinstance() check.
- return to_unicode
+ return processors.to_unicode_processor_factory(
+ dialect.encoding, self.unicode_error)
else:
return None