summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniele Varrazzo <daniele.varrazzo@gmail.com>2010-12-18 05:02:11 +0000
committerDaniele Varrazzo <daniele.varrazzo@gmail.com>2010-12-21 04:02:14 +0000
commite182201e6ee8075a88b63c9d8338f833bd2471b0 (patch)
tree6b4adf73afd5f61c6cc549422e2be38fd720172e
parentae06fb03e75c47f662606490203f365be26afd46 (diff)
downloadpsycopg2-e182201e6ee8075a88b63c9d8338f833bd2471b0.tar.gz
Added Python codec name to the connection.
This allows dropping repeated dictionary lookups with unicode query/parameters.
-rw-r--r--ChangeLog5
-rw-r--r--NEWS-2.31
-rw-r--r--psycopg/adapter_qstring.c23
-rw-r--r--psycopg/adapter_qstring.h4
-rw-r--r--psycopg/connection.h1
-rw-r--r--psycopg/connection_int.c126
-rw-r--r--psycopg/connection_type.c40
-rw-r--r--psycopg/cursor_type.c20
-rw-r--r--psycopg/typecast_basic.c15
9 files changed, 140 insertions, 95 deletions
diff --git a/ChangeLog b/ChangeLog
index ac6cc66..844062e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2010-12-18 Daniele Varrazzo <daniele.varrazzo@gmail.com>
+
+ * connection.h: added codec attribute to avoid repeated codec name
+ lookups during unicode query/params manipulations.
+
2010-12-15 Daniele Varrazzo <daniele.varrazzo@gmail.com>
* psycopg/utils.c: Added psycopg_strdup function.
diff --git a/NEWS-2.3 b/NEWS-2.3
index 312d80e..0e2229a 100644
--- a/NEWS-2.3
+++ b/NEWS-2.3
@@ -4,6 +4,7 @@ What's new in psycopg 2.3.2
- Improved PostgreSQL-Python encodings mapping. Added a few
missing encodings: EUC_CN, EUC_JIS_2004, ISO885910, ISO885916,
LATIN10, SHIFT_JIS_2004.
+ - Dropped repeated dictionary lookups with unicode query/parameters.
What's new in psycopg 2.3.1
diff --git a/psycopg/adapter_qstring.c b/psycopg/adapter_qstring.c
index 9da6a21..72240c8 100644
--- a/psycopg/adapter_qstring.c
+++ b/psycopg/adapter_qstring.c
@@ -49,22 +49,9 @@ qstring_quote(qstringObject *self)
Dprintf("qstring_quote: encoding to %s", self->encoding);
if (PyUnicode_Check(self->wrapped) && self->encoding) {
- PyObject *enc = PyDict_GetItemString(psycoEncodings, self->encoding);
- /* note that enc is a borrowed reference */
-
- if (enc) {
- const char *s = PyString_AsString(enc);
- Dprintf("qstring_quote: encoding unicode object to %s", s);
- str = PyUnicode_AsEncodedString(self->wrapped, s, NULL);
- Dprintf("qstring_quote: got encoded object at %p", str);
- if (str == NULL) return NULL;
- }
- else {
- /* can't find the right encoder, raise exception */
- PyErr_Format(InterfaceError,
- "can't encode unicode string to %s", self->encoding);
- return NULL;
- }
+ str = PyUnicode_AsEncodedString(self->wrapped, self->encoding, NULL);
+ Dprintf("qstring_quote: got encoded object at %p", str);
+ if (str == NULL) return NULL;
}
/* if the wrapped object is a simple string, we don't know how to
@@ -144,8 +131,8 @@ qstring_prepare(qstringObject *self, PyObject *args)
we don't need the encoding if that's not the case */
if (PyUnicode_Check(self->wrapped)) {
if (self->encoding) free(self->encoding);
- self->encoding = strdup(conn->encoding);
- Dprintf("qstring_prepare: set encoding to %s", conn->encoding);
+ self->encoding = strdup(conn->codec);
+ Dprintf("qstring_prepare: set encoding to %s", conn->codec);
}
Py_CLEAR(self->conn);
diff --git a/psycopg/adapter_qstring.h b/psycopg/adapter_qstring.h
index db986be..d825fe0 100644
--- a/psycopg/adapter_qstring.h
+++ b/psycopg/adapter_qstring.h
@@ -37,6 +37,10 @@ typedef struct {
PyObject *wrapped;
PyObject *buffer;
+ /* NOTE: this used to be a PostgreSQL encoding: changed in 2.3.2 to be a
+ * Python codec name. I don't expect there has been any user for this
+ * object other than adapting str/unicode, so I don't expect client code
+ * broken for this reason. */
char *encoding;
PyObject *conn;
diff --git a/psycopg/connection.h b/psycopg/connection.h
index 477c690..41e0cdc 100644
--- a/psycopg/connection.h
+++ b/psycopg/connection.h
@@ -83,6 +83,7 @@ typedef struct {
char *dsn; /* data source name */
char *critical; /* critical error on this connection */
char *encoding; /* current backend encoding */
+ char *codec; /* python codec name for encoding */
long int closed; /* 1 means connection has been closed;
2 that something horrible happened */
diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c
index e04d510..aa4eca7 100644
--- a/psycopg/connection_int.c
+++ b/psycopg/connection_int.c
@@ -212,38 +212,89 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
return equote;
}
-/* Return a string containing the client_encoding setting.
+/* Convert a PostgreSQL encoding to a Python codec.
*
- * Return a new string allocated by malloc(): use free() to free it.
- * Return NULL in case of failure.
+ * Return a new copy of the codec name allocated on the Python heap,
+ * NULL with exception in case of error.
*/
static char *
-conn_get_encoding(PGconn *pgconn)
+conn_encoding_to_codec(const char *enc)
{
- const char *tmp, *i;
- char *encoding, *j;
+ char *tmp;
+ Py_ssize_t size;
+ PyObject *pyenc;
+ char *rv = NULL;
+
+ if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) {
+ PyErr_Format(OperationalError,
+ "no Python codec for client encoding '%s'", enc);
+ goto exit;
+ }
+ if (-1 == PyString_AsStringAndSize(pyenc, &tmp, &size)) {
+ goto exit;
+ }
+
+ /* have our own copy of the python codec name */
+ rv = psycopg_strdup(tmp, size);
+
+exit:
+ /* pyenc is borrowed: no decref. */
+ return rv;
+}
+
+/* Read the client encoding from the connection.
+ *
+ * Store the encoding in the pgconn->encoding field and the name of the
+ * matching python codec in codec. The buffers are allocated on the Python
+ * heap.
+ *
+ * Return 0 on success, else nonzero.
+ */
+static int
+conn_read_encoding(connectionObject *self, PGconn *pgconn)
+{
+ char *enc = NULL, *codec = NULL, *j;
+ const char *tmp;
+ int rv = -1;
tmp = PQparameterStatus(pgconn, "client_encoding");
Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)");
if (!tmp) {
PyErr_SetString(OperationalError,
"server didn't return client encoding");
- return NULL;
+ goto exit;
}
- encoding = malloc(strlen(tmp)+1);
- if (encoding == NULL) {
+ if (!(enc = PyMem_Malloc(strlen(tmp)+1))) {
PyErr_NoMemory();
- return NULL;
+ goto exit;
}
- /* return in uppercase */
- i = tmp;
- j = encoding;
- while (*i) { *j++ = toupper(*i++); }
+ /* turn encoding in uppercase */
+ j = enc;
+ while (*tmp) { *j++ = toupper(*tmp++); }
*j = '\0';
- return encoding;
+ /* Look for this encoding in Python codecs. */
+ if (!(codec = conn_encoding_to_codec(enc))) {
+ goto exit;
+ }
+
+ /* Good, success: store the encoding/codec in the connection. */
+ PyMem_Free(self->encoding);
+ self->encoding = enc;
+ enc = NULL;
+
+ PyMem_Free(self->codec);
+ self->codec = codec;
+ codec = NULL;
+
+ rv = 0;
+
+exit:
+ PyMem_Free(enc);
+ PyMem_Free(codec);
+ return rv;
}
int
@@ -319,9 +370,8 @@ conn_setup(connectionObject *self, PGconn *pgconn)
PyErr_SetString(InterfaceError, "only protocol 3 supported");
return -1;
}
- /* conn_get_encoding returns a malloc'd string */
- self->encoding = conn_get_encoding(pgconn);
- if (self->encoding == NULL) {
+
+ if (conn_read_encoding(self, pgconn)) {
return -1;
}
@@ -651,9 +701,7 @@ _conn_poll_setup_async(connectionObject *self)
PyErr_SetString(InterfaceError, "only protocol 3 supported");
break;
}
- /* conn_get_encoding returns a malloc'd string */
- self->encoding = conn_get_encoding(self->pgconn);
- if (self->encoding == NULL) {
+ if (conn_read_encoding(self, self->pgconn)) {
break;
}
self->cancel = conn_get_cancel(self->pgconn);
@@ -873,11 +921,15 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
char *error = NULL;
char query[48];
int res = 0;
+ char *codec;
/* If the current encoding is equal to the requested one we don't
issue any query to the backend */
if (strcmp(self->encoding, enc) == 0) return 0;
+ /* We must know what python codec this encoding is. */
+ if (!(codec = conn_encoding_to_codec(enc))) { return -1; }
+
Py_BEGIN_ALLOW_THREADS;
pthread_mutex_lock(&self->lock);
@@ -886,19 +938,29 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
/* abort the current transaction, to set the encoding ouside of
transactions */
- res = pq_abort_locked(self, &pgres, &error, &_save);
-
- if (res == 0) {
- res = pq_execute_command_locked(self, query, &pgres, &error, &_save);
- if (res == 0) {
- /* no error, we can proceeed and store the new encoding */
- if (self->encoding) free(self->encoding);
- self->encoding = strdup(enc);
- Dprintf("conn_set_client_encoding: set encoding to %s",
- self->encoding);
- }
+ if ((res = pq_abort_locked(self, &pgres, &error, &_save))) {
+ goto endlock;
+ }
+
+ if ((res = pq_execute_command_locked(self, query, &pgres, &error, &_save))) {
+ goto endlock;
}
+ /* no error, we can proceeed and store the new encoding */
+ PyMem_Free(self->encoding);
+ if (!(self->encoding = psycopg_strdup(enc, 0))) {
+ res = 1; /* don't call pq_complete_error below */
+ goto endlock;
+ }
+
+ /* Store the python codec too. */
+ PyMem_Free(self->codec);
+ self->codec = codec;
+
+ Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
+ self->encoding, self->codec);
+
+endlock:
pthread_mutex_unlock(&self->lock);
Py_END_ALLOW_THREADS;
diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c
index 7bfdd34..9e799be 100644
--- a/psycopg/connection_type.c
+++ b/psycopg/connection_type.c
@@ -425,35 +425,38 @@ psyco_conn_set_isolation_level(connectionObject *self, PyObject *args)
static PyObject *
psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
{
- const char *enc = NULL;
- char *buffer;
- size_t i, j;
+ const char *enc;
+ char *buffer, *dest;
+ PyObject *rv = NULL;
+ Py_ssize_t len;
EXC_IF_CONN_CLOSED(self);
EXC_IF_CONN_ASYNC(self, set_client_encoding);
EXC_IF_TPC_PREPARED(self, set_client_encoding);
- if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
+ if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL;
/* convert to upper case and remove '-' and '_' from string */
- buffer = PyMem_Malloc(strlen(enc)+1);
- for (i=j=0 ; i < strlen(enc) ; i++) {
- if (enc[i] == '_' || enc[i] == '-')
- continue;
- else
- buffer[j++] = toupper(enc[i]);
+ if (!(dest = buffer = PyMem_Malloc(len+1))) {
+ return PyErr_NoMemory();
+ }
+
+ while (*enc) {
+ if (*enc == '_' || *enc == '-') {
+ ++enc;
+ }
+ else {
+ *dest++ = toupper(*enc++);
+ }
}
- buffer[j] = '\0';
+ *dest = '\0';
if (conn_set_client_encoding(self, buffer) == 0) {
- PyMem_Free(buffer);
Py_INCREF(Py_None);
- return Py_None;
- }
- else {
- PyMem_Free(buffer);
- return NULL;
+ rv = Py_None;
}
+ PyMem_Free(buffer);
+ return rv;
}
/* get_transaction_status method - Get backend transaction status */
@@ -892,7 +895,8 @@ connection_dealloc(PyObject* obj)
conn_notice_clean(self);
if (self->dsn) free(self->dsn);
- if (self->encoding) free(self->encoding);
+ PyMem_Free(self->encoding);
+ PyMem_Free(self->codec);
if (self->critical) free(self->critical);
Py_CLEAR(self->async_cursor);
diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c
index cd62d50..d9e925f 100644
--- a/psycopg/cursor_type.c
+++ b/psycopg/cursor_type.c
@@ -272,21 +272,11 @@ static PyObject *_psyco_curs_validate_sql_basic(
Py_INCREF(sql);
}
else if (PyUnicode_Check(sql)) {
- PyObject *enc = PyDict_GetItemString(psycoEncodings,
- self->conn->encoding);
- /* enc is a borrowed reference; we won't decref it */
-
- if (enc) {
- sql = PyUnicode_AsEncodedString(sql, PyString_AsString(enc), NULL);
- /* if there was an error during the encoding from unicode to the
- target encoding, we just let the exception propagate */
- if (sql == NULL) { goto fail; }
- } else {
- PyErr_Format(InterfaceError,
- "can't encode unicode SQL statement to %s",
- self->conn->encoding);
- goto fail;
- }
+ char *enc = self->conn->codec;
+ sql = PyUnicode_AsEncodedString(sql, enc, NULL);
+ /* if there was an error during the encoding from unicode to the
+ target encoding, we just let the exception propagate */
+ if (sql == NULL) { goto fail; }
}
else {
/* the is not unicode or string, raise an error */
diff --git a/psycopg/typecast_basic.c b/psycopg/typecast_basic.c
index e9ad527..634fc45 100644
--- a/psycopg/typecast_basic.c
+++ b/psycopg/typecast_basic.c
@@ -82,21 +82,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
static PyObject *
typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
{
- PyObject *enc;
+ char *enc;
if (s == NULL) {Py_INCREF(Py_None); return Py_None;}
- enc = PyDict_GetItemString(psycoEncodings,
- ((cursorObject*)curs)->conn->encoding);
- if (enc) {
- return PyUnicode_Decode(s, len, PyString_AsString(enc), NULL);
- }
- else {
- PyErr_Format(InterfaceError,
- "can't decode into unicode string from %s",
- ((cursorObject*)curs)->conn->encoding);
- return NULL;
- }
+ enc = ((cursorObject*)curs)->conn->codec;
+ return PyUnicode_Decode(s, len, enc, NULL);
}
/** BOOLEAN - cast boolean value into right python object **/