diff options
author | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2010-12-18 05:02:11 +0000 |
---|---|---|
committer | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2010-12-21 04:02:14 +0000 |
commit | e182201e6ee8075a88b63c9d8338f833bd2471b0 (patch) | |
tree | 6b4adf73afd5f61c6cc549422e2be38fd720172e | |
parent | ae06fb03e75c47f662606490203f365be26afd46 (diff) | |
download | psycopg2-e182201e6ee8075a88b63c9d8338f833bd2471b0.tar.gz |
Added Python codec name to the connection.
This allows dropping repeated dictionary lookups with unicode
query/parameters.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | NEWS-2.3 | 1 | ||||
-rw-r--r-- | psycopg/adapter_qstring.c | 23 | ||||
-rw-r--r-- | psycopg/adapter_qstring.h | 4 | ||||
-rw-r--r-- | psycopg/connection.h | 1 | ||||
-rw-r--r-- | psycopg/connection_int.c | 126 | ||||
-rw-r--r-- | psycopg/connection_type.c | 40 | ||||
-rw-r--r-- | psycopg/cursor_type.c | 20 | ||||
-rw-r--r-- | psycopg/typecast_basic.c | 15 |
9 files changed, 140 insertions, 95 deletions
@@ -1,3 +1,8 @@ +2010-12-18 Daniele Varrazzo <daniele.varrazzo@gmail.com> + + * connection.h: added codec attribute to avoid repeated codec name + lookups during unicode query/params manipulations. + 2010-12-15 Daniele Varrazzo <daniele.varrazzo@gmail.com> * psycopg/utils.c: Added psycopg_strdup function. @@ -4,6 +4,7 @@ What's new in psycopg 2.3.2 - Improved PostgreSQL-Python encodings mapping. Added a few missing encodings: EUC_CN, EUC_JIS_2004, ISO885910, ISO885916, LATIN10, SHIFT_JIS_2004. + - Dropped repeated dictionary lookups with unicode query/parameters. What's new in psycopg 2.3.1 diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c index 9da6a21..72240c8 100644 --- a/psycopg/adapter_qstring.c +++ b/psycopg/adapter_qstring.c @@ -49,22 +49,9 @@ qstring_quote(qstringObject *self) Dprintf("qstring_quote: encoding to %s", self->encoding); if (PyUnicode_Check(self->wrapped) && self->encoding) { - PyObject *enc = PyDict_GetItemString(psycoEncodings, self->encoding); - /* note that enc is a borrowed reference */ - - if (enc) { - const char *s = PyString_AsString(enc); - Dprintf("qstring_quote: encoding unicode object to %s", s); - str = PyUnicode_AsEncodedString(self->wrapped, s, NULL); - Dprintf("qstring_quote: got encoded object at %p", str); - if (str == NULL) return NULL; - } - else { - /* can't find the right encoder, raise exception */ - PyErr_Format(InterfaceError, - "can't encode unicode string to %s", self->encoding); - return NULL; - } + str = PyUnicode_AsEncodedString(self->wrapped, self->encoding, NULL); + Dprintf("qstring_quote: got encoded object at %p", str); + if (str == NULL) return NULL; } /* if the wrapped object is a simple string, we don't know how to @@ -144,8 +131,8 @@ qstring_prepare(qstringObject *self, PyObject *args) we don't need the encoding if that's not the case */ if (PyUnicode_Check(self->wrapped)) { if (self->encoding) free(self->encoding); - self->encoding = strdup(conn->encoding); - Dprintf("qstring_prepare: set encoding to %s", conn->encoding); + self->encoding = strdup(conn->codec); + Dprintf("qstring_prepare: set encoding to %s", conn->codec); } Py_CLEAR(self->conn); diff --git a/psycopg/adapter_qstring.h b/psycopg/adapter_qstring.h index db986be..d825fe0 100644 --- a/psycopg/adapter_qstring.h +++ b/psycopg/adapter_qstring.h @@ -37,6 +37,10 @@ typedef struct { PyObject *wrapped; PyObject *buffer; + /* NOTE: this used to be a PostgreSQL encoding: changed in 2.3.2 to be a + * Python codec name. I don't expect there has been any user for this + * object other than adapting str/unicode, so I don't expect client code + * broken for this reason. */ char *encoding; PyObject *conn; diff --git a/psycopg/connection.h b/psycopg/connection.h index 477c690..41e0cdc 100644 --- a/psycopg/connection.h +++ b/psycopg/connection.h @@ -83,6 +83,7 @@ typedef struct { char *dsn; /* data source name */ char *critical; /* critical error on this connection */ char *encoding; /* current backend encoding */ + char *codec; /* python codec name for encoding */ long int closed; /* 1 means connection has been closed; 2 that something horrible happened */ diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c index e04d510..aa4eca7 100644 --- a/psycopg/connection_int.c +++ b/psycopg/connection_int.c @@ -212,38 +212,89 @@ conn_get_standard_conforming_strings(PGconn *pgconn) return equote; } -/* Return a string containing the client_encoding setting. +/* Convert a PostgreSQL encoding to a Python codec. * - * Return a new string allocated by malloc(): use free() to free it. - * Return NULL in case of failure. + * Return a new copy of the codec name allocated on the Python heap, + * NULL with exception in case of error. */ static char * -conn_get_encoding(PGconn *pgconn) +conn_encoding_to_codec(const char *enc) { - const char *tmp, *i; - char *encoding, *j; + char *tmp; + Py_ssize_t size; + PyObject *pyenc; + char *rv = NULL; + + if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) { + PyErr_Format(OperationalError, + "no Python codec for client encoding '%s'", enc); + goto exit; + } + if (-1 == PyString_AsStringAndSize(pyenc, &tmp, &size)) { + goto exit; + } + + /* have our own copy of the python codec name */ + rv = psycopg_strdup(tmp, size); + +exit: + /* pyenc is borrowed: no decref. */ + return rv; +} + +/* Read the client encoding from the connection. + * + * Store the encoding in the pgconn->encoding field and the name of the + * matching python codec in codec. The buffers are allocated on the Python + * heap. + * + * Return 0 on success, else nonzero. + */ +static int +conn_read_encoding(connectionObject *self, PGconn *pgconn) +{ + char *enc = NULL, *codec = NULL, *j; + const char *tmp; + int rv = -1; tmp = PQparameterStatus(pgconn, "client_encoding"); Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)"); if (!tmp) { PyErr_SetString(OperationalError, "server didn't return client encoding"); - return NULL; + goto exit; } - encoding = malloc(strlen(tmp)+1); - if (encoding == NULL) { + if (!(enc = PyMem_Malloc(strlen(tmp)+1))) { PyErr_NoMemory(); - return NULL; + goto exit; } - /* return in uppercase */ - i = tmp; - j = encoding; - while (*i) { *j++ = toupper(*i++); } + /* turn encoding in uppercase */ + j = enc; + while (*tmp) { *j++ = toupper(*tmp++); } *j = '\0'; - return encoding; + /* Look for this encoding in Python codecs. */ + if (!(codec = conn_encoding_to_codec(enc))) { + goto exit; + } + + /* Good, success: store the encoding/codec in the connection. */ + PyMem_Free(self->encoding); + self->encoding = enc; + enc = NULL; + + PyMem_Free(self->codec); + self->codec = codec; + codec = NULL; + + rv = 0; + +exit: + PyMem_Free(enc); + PyMem_Free(codec); + return rv; } int @@ -319,9 +370,8 @@ conn_setup(connectionObject *self, PGconn *pgconn) PyErr_SetString(InterfaceError, "only protocol 3 supported"); return -1; } - /* conn_get_encoding returns a malloc'd string */ - self->encoding = conn_get_encoding(pgconn); - if (self->encoding == NULL) { + + if (conn_read_encoding(self, pgconn)) { return -1; } @@ -651,9 +701,7 @@ _conn_poll_setup_async(connectionObject *self) PyErr_SetString(InterfaceError, "only protocol 3 supported"); break; } - /* conn_get_encoding returns a malloc'd string */ - self->encoding = conn_get_encoding(self->pgconn); - if (self->encoding == NULL) { + if (conn_read_encoding(self, self->pgconn)) { break; } self->cancel = conn_get_cancel(self->pgconn); @@ -873,11 +921,15 @@ conn_set_client_encoding(connectionObject *self, const char *enc) char *error = NULL; char query[48]; int res = 0; + char *codec; /* If the current encoding is equal to the requested one we don't issue any query to the backend */ if (strcmp(self->encoding, enc) == 0) return 0; + /* We must know what python codec this encoding is. */ + if (!(codec = conn_encoding_to_codec(enc))) { return -1; } + Py_BEGIN_ALLOW_THREADS; pthread_mutex_lock(&self->lock); @@ -886,19 +938,29 @@ conn_set_client_encoding(connectionObject *self, const char *enc) /* abort the current transaction, to set the encoding ouside of transactions */ - res = pq_abort_locked(self, &pgres, &error, &_save); - - if (res == 0) { - res = pq_execute_command_locked(self, query, &pgres, &error, &_save); - if (res == 0) { - /* no error, we can proceeed and store the new encoding */ - if (self->encoding) free(self->encoding); - self->encoding = strdup(enc); - Dprintf("conn_set_client_encoding: set encoding to %s", - self->encoding); - } + if ((res = pq_abort_locked(self, &pgres, &error, &_save))) { + goto endlock; + } + + if ((res = pq_execute_command_locked(self, query, &pgres, &error, &_save))) { + goto endlock; } + /* no error, we can proceeed and store the new encoding */ + PyMem_Free(self->encoding); + if (!(self->encoding = psycopg_strdup(enc, 0))) { + res = 1; /* don't call pq_complete_error below */ + goto endlock; + } + + /* Store the python codec too. */ + PyMem_Free(self->codec); + self->codec = codec; + + Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)", + self->encoding, self->codec); + +endlock: pthread_mutex_unlock(&self->lock); Py_END_ALLOW_THREADS; diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c index 7bfdd34..9e799be 100644 --- a/psycopg/connection_type.c +++ b/psycopg/connection_type.c @@ -425,35 +425,38 @@ psyco_conn_set_isolation_level(connectionObject *self, PyObject *args) static PyObject * psyco_conn_set_client_encoding(connectionObject *self, PyObject *args) { - const char *enc = NULL; - char *buffer; - size_t i, j; + const char *enc; + char *buffer, *dest; + PyObject *rv = NULL; + Py_ssize_t len; EXC_IF_CONN_CLOSED(self); EXC_IF_CONN_ASYNC(self, set_client_encoding); EXC_IF_TPC_PREPARED(self, set_client_encoding); - if (!PyArg_ParseTuple(args, "s", &enc)) return NULL; + if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL; /* convert to upper case and remove '-' and '_' from string */ - buffer = PyMem_Malloc(strlen(enc)+1); - for (i=j=0 ; i < strlen(enc) ; i++) { - if (enc[i] == '_' || enc[i] == '-') - continue; - else - buffer[j++] = toupper(enc[i]); + if (!(dest = buffer = PyMem_Malloc(len+1))) { + return PyErr_NoMemory(); + } + + while (*enc) { + if (*enc == '_' || *enc == '-') { + ++enc; + } + else { + *dest++ = toupper(*enc++); + } } - buffer[j] = '\0'; + *dest = '\0'; if (conn_set_client_encoding(self, buffer) == 0) { - PyMem_Free(buffer); Py_INCREF(Py_None); - return Py_None; - } - else { - PyMem_Free(buffer); - return NULL; + rv = Py_None; } + PyMem_Free(buffer); + return rv; } /* get_transaction_status method - Get backend transaction status */ @@ -892,7 +895,8 @@ connection_dealloc(PyObject* obj) conn_notice_clean(self); if (self->dsn) free(self->dsn); - if (self->encoding) free(self->encoding); + PyMem_Free(self->encoding); + PyMem_Free(self->codec); if (self->critical) free(self->critical); Py_CLEAR(self->async_cursor); diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index cd62d50..d9e925f 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -272,21 +272,11 @@ static PyObject *_psyco_curs_validate_sql_basic( Py_INCREF(sql); } else if (PyUnicode_Check(sql)) { - PyObject *enc = PyDict_GetItemString(psycoEncodings, - self->conn->encoding); - /* enc is a borrowed reference; we won't decref it */ - - if (enc) { - sql = PyUnicode_AsEncodedString(sql, PyString_AsString(enc), NULL); - /* if there was an error during the encoding from unicode to the - target encoding, we just let the exception propagate */ - if (sql == NULL) { goto fail; } - } else { - PyErr_Format(InterfaceError, - "can't encode unicode SQL statement to %s", - self->conn->encoding); - goto fail; - } + char *enc = self->conn->codec; + sql = PyUnicode_AsEncodedString(sql, enc, NULL); + /* if there was an error during the encoding from unicode to the + target encoding, we just let the exception propagate */ + if (sql == NULL) { goto fail; } } else { /* the is not unicode or string, raise an error */ diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c index e9ad527..634fc45 100644 --- a/psycopg/typecast_basic.c +++ b/psycopg/typecast_basic.c @@ -82,21 +82,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs) static PyObject * typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs) { - PyObject *enc; + char *enc; if (s == NULL) {Py_INCREF(Py_None); return Py_None;} - enc = PyDict_GetItemString(psycoEncodings, - ((cursorObject*)curs)->conn->encoding); - if (enc) { - return PyUnicode_Decode(s, len, PyString_AsString(enc), NULL); - } - else { - PyErr_Format(InterfaceError, - "can't decode into unicode string from %s", - ((cursorObject*)curs)->conn->encoding); - return NULL; - } + enc = ((cursorObject*)curs)->conn->codec; + return PyUnicode_Decode(s, len, enc, NULL); } /** BOOLEAN - cast boolean value into right python object **/ |