Merge branch 'devel'2_4_1

author: Federico Di Gregorio <fog@initd.org> 2011-05-11 09:58:49 +0200
committer: Federico Di Gregorio <fog@initd.org> 2011-05-11 09:58:49 +0200
commit: ab685c2fc0a04651041957af7419a1ecfeeb9e53 (patch)
tree: 409828624d0d78257928497c22951c206e099265 /psycopg
parent: 29f83f05c4f6565ea67d8b5424b1ec66d55c0858 (diff)
parent: 9080b30741e9a6f6b80a6700197445bff48df917 (diff)
download: psycopg2-ab685c2fc0a04651041957af7419a1ecfeeb9e53.tar.gz
4 files changed, 234 insertions, 114 deletions
diff --git a/psycopg/connection_int.c b/psycopg/connection_int.c
index fa714f6..22c5bc5 100644
--- a/psycopg/connection_int.c
+++ b/psycopg/connection_int.c
@@ -236,10 +236,45 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
     return equote;
 }
 
+
+/* Remove irrelevant chars from encoding name and turn it uppercase.
+ *
+ * Return a buffer allocated on Python heap,
+ * NULL and set an exception on error.
+ */
+static char *
+clean_encoding_name(const char *enc)
+{
+    const char *i = enc;
+    char *rv, *j;
+
+    /* convert to upper case and remove '-' and '_' from string */
+    if (!(j = rv = PyMem_Malloc(strlen(enc) + 1))) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    while (*i) {
+        if (!isalnum(*i)) {
+            ++i;
+        }
+        else {
+            *j++ = toupper(*i++);
+        }
+    }
+    *j = '\0';
+
+    Dprintf("clean_encoding_name: %s -> %s", enc, rv);
+
+    return rv;
+}
+
 /* Convert a PostgreSQL encoding to a Python codec.
  *
  * Return a new copy of the codec name allocated on the Python heap,
  * NULL with exception in case of error.
+ *
+ * 'enc' should be already normalized (uppercase, no - or _).
  */
 static char *
 conn_encoding_to_codec(const char *enc)
@@ -285,7 +320,7 @@ exit:
 static int
 conn_read_encoding(connectionObject *self, PGconn *pgconn)
 {
-    char *enc = NULL, *codec = NULL, *j;
+    char *enc = NULL, *codec = NULL;
     const char *tmp;
     int rv = -1;
 
@@ -297,16 +332,10 @@ conn_read_encoding(connectionObject *self, PGconn *pgconn)
         goto exit;
     }
 
-    if (!(enc = PyMem_Malloc(strlen(tmp)+1))) {
-        PyErr_NoMemory();
+    if (!(enc = clean_encoding_name(tmp))) {
         goto exit;
     }
 
-    /* turn encoding in uppercase */
-    j = enc;
-    while (*tmp) { *j++ = toupper(*tmp++); }
-    *j = '\0';
-
     /* Look for this encoding in Python codecs. */
     if (!(codec = conn_encoding_to_codec(enc))) {
         goto exit;
@@ -965,21 +994,23 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
     PGresult *pgres = NULL;
     char *error = NULL;
     char query[48];
-    int res = 0;
-    char *codec;
+    int res = 1;
+    char *codec = NULL;
+    char *clean_enc = NULL;
 
     /* If the current encoding is equal to the requested one we don't
        issue any query to the backend */
     if (strcmp(self->encoding, enc) == 0) return 0;
 
     /* We must know what python codec this encoding is. */
-    if (!(codec = conn_encoding_to_codec(enc))) { return -1; }
+    if (!(clean_enc = clean_encoding_name(enc))) { goto exit; }
+    if (!(codec = conn_encoding_to_codec(clean_enc))) { goto exit; }
 
     Py_BEGIN_ALLOW_THREADS;
     pthread_mutex_lock(&self->lock);
 
     /* set encoding, no encoding string is longer than 24 bytes */
-    PyOS_snprintf(query, 47, "SET client_encoding = '%s'", enc);
+    PyOS_snprintf(query, 47, "SET client_encoding = '%s'", clean_enc);
 
     /* abort the current transaction, to set the encoding ouside of
        transactions */
@@ -994,21 +1025,18 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
     /* no error, we can proceeed and store the new encoding */
     {
         char *tmp = self->encoding;
-        self->encoding = NULL;
+        self->encoding = clean_enc;
         PyMem_Free(tmp);
-    }
-    if (!(self->encoding = psycopg_strdup(enc, 0))) {
-        res = 1;  /* don't call pq_complete_error below */
-        goto endlock;
+        clean_enc = NULL;
     }
 
     /* Store the python codec too. */
     {
         char *tmp = self->codec;
-        self->codec = NULL;
+        self->codec = codec;
         PyMem_Free(tmp);
+        codec = NULL;
     }
-    self->codec = codec;
 
     Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
             self->encoding, self->codec);
@@ -1021,6 +1049,10 @@ endlock:
     if (res < 0)
         pq_complete_error(self, &pgres, &error);
 
+exit:
+    PyMem_Free(clean_enc);
+    PyMem_Free(codec);
+
     return res;
 }
 
diff --git a/psycopg/connection_type.c b/psycopg/connection_type.c
index b0c9ddc..7ca395d 100644
--- a/psycopg/connection_type.c
+++ b/psycopg/connection_type.c
@@ -423,36 +423,18 @@ static PyObject *
 psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
 {
     const char *enc;
-    char *buffer, *dest;
     PyObject *rv = NULL;
-    Py_ssize_t len;
 
     EXC_IF_CONN_CLOSED(self);
     EXC_IF_CONN_ASYNC(self, set_client_encoding);
     EXC_IF_TPC_PREPARED(self, set_client_encoding);
 
-    if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL;
+    if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
 
-    /* convert to upper case and remove '-' and '_' from string */
-    if (!(dest = buffer = PyMem_Malloc(len+1))) {
-        return PyErr_NoMemory();
-    }
-
-    while (*enc) {
-        if (*enc == '_' || *enc == '-') {
-            ++enc;
-        }
-        else {
-            *dest++ = toupper(*enc++);
-        }
-    }
-    *dest = '\0';
-
-    if (conn_set_client_encoding(self, buffer) == 0) {
+    if (conn_set_client_encoding(self, enc) == 0) {
         Py_INCREF(Py_None);
         rv = Py_None;
     }
-    PyMem_Free(buffer);
     return rv;
 }
 
diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c
index 8136d0a..6a6d05a 100644
--- a/psycopg/pqpath.c
+++ b/psycopg/pqpath.c
@@ -172,16 +172,19 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult *pgres)
     if (pgres) {
         err = PQresultErrorMessage(pgres);
         if (err != NULL) {
+            Dprintf("pq_raise: PQresultErrorMessage: err=%s", err);
             code = PQresultErrorField(pgres, PG_DIAG_SQLSTATE);
         }
     }
-    if (err == NULL)
+    if (err == NULL) {
         err = PQerrorMessage(conn->pgconn);
+        Dprintf("pq_raise: PQerrorMessage: err=%s", err);
+    }
 
     /* if the is no error message we probably called pq_raise without reason:
        we need to set an exception anyway because the caller will probably
        raise and a meaningful message is better than an empty one */
-    if (err == NULL) {
+    if (err == NULL || err[0] == '\0') {
         PyErr_SetString(Error, "psycopg went psycotic without error set");
         return;
     }
@@ -191,9 +194,15 @@ pq_raise(connectionObject *conn, cursorObject *curs, PGresult *pgres)
     if (code != NULL) {
         exc = exception_from_sqlstate(code);
     }
+    else {
+        /* Fallback if there is no exception code (reported happening e.g.
+         * when the connection is closed). */
+        exc = DatabaseError;
+    }
 
     /* try to remove the initial "ERROR: " part from the postgresql error */
     err2 = strip_severity(err);
+    Dprintf("pq_raise: err2=%s", err2);
 
     psyco_set_error(exc, curs, err2, err, code);
 }
@@ -1355,6 +1364,13 @@ pq_fetch(cursorObject *curs)
         /* don't clear curs->pgres, because it contains the results! */
         break;
 
+    case PGRES_EMPTY_QUERY:
+        PyErr_SetString(ProgrammingError,
+            "can't execute an empty query");
+        IFCLEARPGRES(curs->pgres);
+        ex = -1;
+        break;
+
     default:
         Dprintf("pq_fetch: uh-oh, something FAILED: pgconn = %p", curs->conn);
         pq_raise(curs->conn, curs, NULL);
diff --git a/psycopg/typecast_binary.c b/psycopg/typecast_binary.c
index fa371e2..b145b1b 100644
--- a/psycopg/typecast_binary.c
+++ b/psycopg/typecast_binary.c
@@ -40,7 +40,7 @@ chunk_dealloc(chunkObject *self)
         FORMAT_CODE_PY_SSIZE_T,
         self->base, self->len
       );
-    PQfreemem(self->base);
+    PyMem_Free(self->base);
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
@@ -127,95 +127,185 @@ PyTypeObject chunkType = {
     chunk_doc                   /* tp_doc */
 };
 
-static PyObject *
+
+static char *psycopg_parse_hex(
+        const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
+static char *psycopg_parse_escape(
+        const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
+
+/* The function is not static and not hidden as we use ctypes to test it. */
+PyObject *
 typecast_BINARY_cast(const char *s, Py_ssize_t l, PyObject *curs)
 {
     chunkObject *chunk = NULL;
     PyObject *res = NULL;
-    char *str = NULL, *buffer = NULL;
-    size_t len;
+    char *buffer = NULL;
+    Py_ssize_t len;
 
     if (s == NULL) {Py_INCREF(Py_None); return Py_None;}
 
-    /* PQunescapeBytea absolutely wants a 0-terminated string and we don't
-       want to copy the whole buffer, right? Wrong, but there isn't any other
-       way <g> */
-    if (s[l] != '\0') {
-        if ((buffer = PyMem_Malloc(l+1)) == NULL) {
-            PyErr_NoMemory();
-            goto fail;
+    if (s[0] == '\\' && s[1] == 'x') {
+        /* This is a buffer escaped in hex format: libpq before 9.0 can't
+         * parse it and we can't detect reliably the libpq version at runtime.
+         * So the only robust option is to parse it ourselves - luckily it's
+         * an easy format.
+         */
+        if (NULL == (buffer = psycopg_parse_hex(s, l, &len))) {
+            goto exit;
         }
-        /* Py_ssize_t->size_t cast is safe, as long as the Py_ssize_t is
-         * >= 0: */
-        assert (l >= 0);
-        strncpy(buffer, s, (size_t) l);
-
-        buffer[l] = '\0';
-        s = buffer;
-    }
-    str = (char*)PQunescapeBytea((unsigned char*)s, &len);
-    Dprintf("typecast_BINARY_cast: unescaped " FORMAT_CODE_SIZE_T " bytes",
-      len);
-
-    /* The type of the second parameter to PQunescapeBytea is size_t *, so it's
-     * possible (especially with Python < 2.5) to get a return value too large
-     * to fit into a Python container. */
-    if (len > (size_t) PY_SSIZE_T_MAX) {
-      PyErr_SetString(PyExc_IndexError, "PG buffer too large to fit in Python"
-                                        " buffer.");
-      goto fail;
     }
-
-    /* Check the escaping was successful */
-    if (s[0] == '\\' && s[1] == 'x'     /* input encoded in hex format */
-        && str[0] == 'x'                /* output resulted in an 'x' */
-        && s[2] != '7' && s[3] != '8')  /* input wasn't really an x (0x78) */
-    {
-        PyErr_SetString(InterfaceError,
-            "can't receive bytea data from server >= 9.0 with the current "
-            "libpq client library: please update the libpq to at least 9.0 "
-            "or set bytea_output to 'escape' in the server config "
-            "or with a query");
-        goto fail;
+    else {
+        /* This is a buffer in the classic bytea format. So we can handle it
+         * to the PQunescapeBytea to have it parsed, rignt? ...Wrong. We
+         * could, but then we'd have to record whether buffer was allocated by
+         * Python or by the libpq to dispose it properly. Furthermore the
+         * PQunescapeBytea interface is not the most brilliant as it wants a
+         * null-terminated string even if we have known its length thus
+         * requiring a useless memcpy and strlen.
+         * So we'll just have our better integrated parser, let's finish this
+         * story.
+         */
+        if (NULL == (buffer = psycopg_parse_escape(s, l, &len))) {
+            goto exit;
+        }
     }
 
     chunk = (chunkObject *) PyObject_New(chunkObject, &chunkType);
-    if (chunk == NULL) goto fail;
+    if (chunk == NULL) goto exit;
 
-    /* **Transfer** ownership of str's memory to the chunkObject: */
-    chunk->base = str;
-    str = NULL;
+    /* **Transfer** ownership of buffer's memory to the chunkObject: */
+    chunk->base = buffer;
+    buffer = NULL;
+    chunk->len = (Py_ssize_t)len;
 
-    /* size_t->Py_ssize_t cast was validated above: */
-    chunk->len = (Py_ssize_t) len;
 #if PY_MAJOR_VERSION < 3
     if ((res = PyBuffer_FromObject((PyObject *)chunk, 0, chunk->len)) == NULL)
-        goto fail;
+        goto exit;
 #else
     if ((res = PyMemoryView_FromObject((PyObject*)chunk)) == NULL)
-        goto fail;
+        goto exit;
 #endif
-    /* PyBuffer_FromObject() created a new reference.  We'll release our
-     * reference held in 'chunk' in the 'cleanup' clause. */
-
-    goto cleanup;
-    fail:
-      assert (PyErr_Occurred());
-      if (res != NULL) {
-          Py_DECREF(res);
-          res = NULL;
-      }
-      /* Fall through to cleanup: */
-    cleanup:
-      if (chunk != NULL) {
-          Py_DECREF((PyObject *) chunk);
-      }
-      if (str != NULL) {
-          /* str's mem was allocated by PQunescapeBytea; must use PQfreemem: */
-          PQfreemem(str);
-      }
-      /* We allocated buffer with PyMem_Malloc; must use PyMem_Free: */
-      PyMem_Free(buffer);
-
-      return res;
+
+exit:
+    Py_XDECREF((PyObject *)chunk);
+    PyMem_Free(buffer);
+
+    return res;
+}
+
+
+static const char hex_lut[128] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+/* Parse a bytea output buffer encoded in 'hex' format.
+ *
+ * the format is described in
+ * http://www.postgresql.org/docs/9.0/static/datatype-binary.html
+ *
+ * Parse the buffer in 'bufin', whose length is 'sizein'.
+ * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
+ * In case of error set an exception and return NULL.
+ */
+static char *
+psycopg_parse_hex(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
+{
+    char *ret = NULL;
+    const char *bufend = bufin + sizein;
+    const char *pi = bufin + 2;     /* past the \x */
+    char *bufout;
+    char *po;
+
+    po = bufout = PyMem_Malloc((sizein - 2) >> 1);   /* output size upper bound */
+    if (NULL == bufout) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+
+    /* Implementation note: we call this function upon database response, not
+     * user input (because we are parsing the output format of a buffer) so we
+     * don't expect errors. On bad input we reserve the right to return a bad
+     * output, not an error.
+     */
+    while (pi < bufend) {
+        char c;
+        while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
+            if (pi >= bufend) { goto endloop; }
+        }
+        *po = c << 4;
+
+        while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
+            if (pi >= bufend) { goto endloop; }
+        }
+        *po++ |= c;
+    }
+endloop:
+
+    ret = bufout;
+    *sizeout = po - bufout;
+
+exit:
+    return ret;
+}
+
+/* Parse a bytea output buffer encoded in 'escape' format.
+ *
+ * the format is described in
+ * http://www.postgresql.org/docs/9.0/static/datatype-binary.html
+ *
+ * Parse the buffer in 'bufin', whose length is 'sizein'.
+ * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
+ * In case of error set an exception and return NULL.
+ */
+static char *
+psycopg_parse_escape(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
+{
+    char *ret = NULL;
+    const char *bufend = bufin + sizein;
+    const char *pi = bufin;
+    char *bufout;
+    char *po;
+
+    po = bufout = PyMem_Malloc(sizein);   /* output size upper bound */
+    if (NULL == bufout) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+
+    while (pi < bufend) {
+        if (*pi != '\\') {
+            /* Unescaped char */
+            *po++ = *pi++;
+            continue;
+        }
+        if ((pi[1] >= '0' && pi[1] <= '3') &&
+            (pi[2] >= '0' && pi[2] <= '7') &&
+            (pi[3] >= '0' && pi[3] <= '7'))
+        {
+            /* Escaped octal value */
+            *po++ = ((pi[1] - '0') << 6) |
+                    ((pi[2] - '0') << 3) |
+                    ((pi[3] - '0'));
+            pi += 4;
+        }
+        else {
+            /* Escaped char */
+            *po++ = pi[1];
+            pi += 2;
+        }
+    }
+
+    ret = bufout;
+    *sizeout = po - bufout;
+
+exit:
+    return ret;
 }
+
author	Federico Di Gregorio <fog@initd.org>	2011-05-11 09:58:49 +0200
committer	Federico Di Gregorio <fog@initd.org>	2011-05-11 09:58:49 +0200
commit	ab685c2fc0a04651041957af7419a1ecfeeb9e53 (patch)
tree	409828624d0d78257928497c22951c206e099265 /psycopg
parent	29f83f05c4f6565ea67d8b5424b1ec66d55c0858 (diff)
parent	9080b30741e9a6f6b80a6700197445bff48df917 (diff)
download	psycopg2-ab685c2fc0a04651041957af7419a1ecfeeb9e53.tar.gz