summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniele Varrazzo <daniele.varrazzo@gmail.com>2011-03-26 13:02:57 +0000
committerDaniele Varrazzo <daniele.varrazzo@gmail.com>2011-03-26 13:02:57 +0000
commit90536a187dbe6bd978d3bfd5a0db5359ad7f78ad (patch)
tree768a4b0c13e7e571e07736ebb3c3f27e06833134
parentf34e44b3f426bb62103578704d66eac699ee752e (diff)
parentda58bee70af5ee84362a000ab2ab726e20f12df8 (diff)
downloadpsycopg2-90536a187dbe6bd978d3bfd5a0db5359ad7f78ad.tar.gz
Merge branch 'bytea-parser' into devel
-rw-r--r--NEWS2
-rw-r--r--doc/src/faq.rst4
-rw-r--r--doc/src/usage.rst23
-rw-r--r--psycopg/typecast_binary.c236
-rw-r--r--tests/testutils.py18
-rwxr-xr-xtests/types_basic.py96
6 files changed, 267 insertions, 112 deletions
diff --git a/NEWS b/NEWS
index 1eea8af..8aef82e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,8 @@
What's new in psycopg 2.4.1
---------------------------
+ - Use own parser for bytea output, not requiring anymore the libpq 9.0
+ to parse the hex format.
- Correctly detect an empty query sent to the backend (ticket #46).
diff --git a/doc/src/faq.rst b/doc/src/faq.rst
index 642c3e7..4ebf15a 100644
--- a/doc/src/faq.rst
+++ b/doc/src/faq.rst
@@ -97,7 +97,9 @@ Psycopg converts :sql:`decimal`\/\ :sql:`numeric` database types into Python `!D
Transferring binary data from PostgreSQL 9.0 doesn't work.
PostgreSQL 9.0 uses by default `the "hex" format`__ to transfer
:sql:`bytea` data: the format can't be parsed by the libpq 8.4 and
- earlier. Three options to solve the problem are:
+ earlier. The problem is solved in Psycopg 2.4.1, that uses its own parser
+ for the :sql:`bytea` format. For previous Psycopg releases, three options
+ to solve the problem are:
- set the bytea_output__ parameter to ``escape`` in the server;
- execute the database command ``SET bytea_output TO escape;`` in the
diff --git a/doc/src/usage.rst b/doc/src/usage.rst
index 47b78be..4d039de 100644
--- a/doc/src/usage.rst
+++ b/doc/src/usage.rst
@@ -271,6 +271,10 @@ the SQL string that would be sent to the database.
.. versionchanged:: 2.4
only strings were supported before.
+ .. versionchanged:: 2.4.1
+ can parse the 'hex' format from 9.0 servers without relying on the
+ version of the client library.
+
.. note::
In Python 2, if you have binary data in a `!str` object, you can pass them
@@ -282,18 +286,15 @@ the SQL string that would be sent to the database.
.. warning::
- PostgreSQL 9 uses by default `a new "hex" format`__ to emit :sql:`bytea`
- fields. Unfortunately this format can't be parsed by libpq versions
- before 9.0. This means that using a library client with version lesser
- than 9.0 to talk with a server 9.0 or later you may have problems
- receiving :sql:`bytea` data. To work around this problem you can set the
- `bytea_output`__ parameter to ``escape``, either in the server
- configuration or in the client session using a query such as ``SET
- bytea_output TO escape;`` before trying to receive binary data.
+ Since version 9.0 PostgreSQL uses by default `a new "hex" format`__ to
+ emit :sql:`bytea` fields. Starting from Psycopg 2.4.1 the format is
+ correctly supported. If you use a previous version you will need some
+ extra care when receiving bytea from PostgreSQL: you must have at least
+ the libpq 9.0 installed on the client or alternatively you can set the
+ `bytea_output`__ configutation parameter to ``escape``, either in the
+ server configuration file or in the client session (using a query such as
+ ``SET bytea_output TO escape;``) before receiving binary data.
- Starting from Psycopg 2.4 this condition is detected and signaled with a
- `~psycopg2.InterfaceError`.
-
.. __: http://www.postgresql.org/docs/9.0/static/datatype-binary.html
.. __: http://www.postgresql.org/docs/9.0/static/runtime-config-client.html#GUC-BYTEA-OUTPUT
diff --git a/psycopg/typecast_binary.c b/psycopg/typecast_binary.c
index fa371e2..b145b1b 100644
--- a/psycopg/typecast_binary.c
+++ b/psycopg/typecast_binary.c
@@ -40,7 +40,7 @@ chunk_dealloc(chunkObject *self)
FORMAT_CODE_PY_SSIZE_T,
self->base, self->len
);
- PQfreemem(self->base);
+ PyMem_Free(self->base);
Py_TYPE(self)->tp_free((PyObject *)self);
}
@@ -127,95 +127,185 @@ PyTypeObject chunkType = {
chunk_doc /* tp_doc */
};
-static PyObject *
+
+static char *psycopg_parse_hex(
+ const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
+static char *psycopg_parse_escape(
+ const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
+
+/* The function is not static and not hidden as we use ctypes to test it. */
+PyObject *
typecast_BINARY_cast(const char *s, Py_ssize_t l, PyObject *curs)
{
chunkObject *chunk = NULL;
PyObject *res = NULL;
- char *str = NULL, *buffer = NULL;
- size_t len;
+ char *buffer = NULL;
+ Py_ssize_t len;
if (s == NULL) {Py_INCREF(Py_None); return Py_None;}
- /* PQunescapeBytea absolutely wants a 0-terminated string and we don't
- want to copy the whole buffer, right? Wrong, but there isn't any other
- way <g> */
- if (s[l] != '\0') {
- if ((buffer = PyMem_Malloc(l+1)) == NULL) {
- PyErr_NoMemory();
- goto fail;
+ if (s[0] == '\\' && s[1] == 'x') {
+ /* This is a buffer escaped in hex format: libpq before 9.0 can't
+ * parse it and we can't detect reliably the libpq version at runtime.
+ * So the only robust option is to parse it ourselves - luckily it's
+ * an easy format.
+ */
+ if (NULL == (buffer = psycopg_parse_hex(s, l, &len))) {
+ goto exit;
}
- /* Py_ssize_t->size_t cast is safe, as long as the Py_ssize_t is
- * >= 0: */
- assert (l >= 0);
- strncpy(buffer, s, (size_t) l);
-
- buffer[l] = '\0';
- s = buffer;
- }
- str = (char*)PQunescapeBytea((unsigned char*)s, &len);
- Dprintf("typecast_BINARY_cast: unescaped " FORMAT_CODE_SIZE_T " bytes",
- len);
-
- /* The type of the second parameter to PQunescapeBytea is size_t *, so it's
- * possible (especially with Python < 2.5) to get a return value too large
- * to fit into a Python container. */
- if (len > (size_t) PY_SSIZE_T_MAX) {
- PyErr_SetString(PyExc_IndexError, "PG buffer too large to fit in Python"
- " buffer.");
- goto fail;
}
-
- /* Check the escaping was successful */
- if (s[0] == '\\' && s[1] == 'x' /* input encoded in hex format */
- && str[0] == 'x' /* output resulted in an 'x' */
- && s[2] != '7' && s[3] != '8') /* input wasn't really an x (0x78) */
- {
- PyErr_SetString(InterfaceError,
- "can't receive bytea data from server >= 9.0 with the current "
- "libpq client library: please update the libpq to at least 9.0 "
- "or set bytea_output to 'escape' in the server config "
- "or with a query");
- goto fail;
+ else {
+ /* This is a buffer in the classic bytea format. So we can handle it
+ * to the PQunescapeBytea to have it parsed, rignt? ...Wrong. We
+ * could, but then we'd have to record whether buffer was allocated by
+ * Python or by the libpq to dispose it properly. Furthermore the
+ * PQunescapeBytea interface is not the most brilliant as it wants a
+ * null-terminated string even if we have known its length thus
+ * requiring a useless memcpy and strlen.
+ * So we'll just have our better integrated parser, let's finish this
+ * story.
+ */
+ if (NULL == (buffer = psycopg_parse_escape(s, l, &len))) {
+ goto exit;
+ }
}
chunk = (chunkObject *) PyObject_New(chunkObject, &chunkType);
- if (chunk == NULL) goto fail;
+ if (chunk == NULL) goto exit;
- /* **Transfer** ownership of str's memory to the chunkObject: */
- chunk->base = str;
- str = NULL;
+ /* **Transfer** ownership of buffer's memory to the chunkObject: */
+ chunk->base = buffer;
+ buffer = NULL;
+ chunk->len = (Py_ssize_t)len;
- /* size_t->Py_ssize_t cast was validated above: */
- chunk->len = (Py_ssize_t) len;
#if PY_MAJOR_VERSION < 3
if ((res = PyBuffer_FromObject((PyObject *)chunk, 0, chunk->len)) == NULL)
- goto fail;
+ goto exit;
#else
if ((res = PyMemoryView_FromObject((PyObject*)chunk)) == NULL)
- goto fail;
+ goto exit;
#endif
- /* PyBuffer_FromObject() created a new reference. We'll release our
- * reference held in 'chunk' in the 'cleanup' clause. */
-
- goto cleanup;
- fail:
- assert (PyErr_Occurred());
- if (res != NULL) {
- Py_DECREF(res);
- res = NULL;
- }
- /* Fall through to cleanup: */
- cleanup:
- if (chunk != NULL) {
- Py_DECREF((PyObject *) chunk);
- }
- if (str != NULL) {
- /* str's mem was allocated by PQunescapeBytea; must use PQfreemem: */
- PQfreemem(str);
- }
- /* We allocated buffer with PyMem_Malloc; must use PyMem_Free: */
- PyMem_Free(buffer);
-
- return res;
+
+exit:
+ Py_XDECREF((PyObject *)chunk);
+ PyMem_Free(buffer);
+
+ return res;
+}
+
+
+static const char hex_lut[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+/* Parse a bytea output buffer encoded in 'hex' format.
+ *
+ * the format is described in
+ * http://www.postgresql.org/docs/9.0/static/datatype-binary.html
+ *
+ * Parse the buffer in 'bufin', whose length is 'sizein'.
+ * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
+ * In case of error set an exception and return NULL.
+ */
+static char *
+psycopg_parse_hex(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
+{
+ char *ret = NULL;
+ const char *bufend = bufin + sizein;
+ const char *pi = bufin + 2; /* past the \x */
+ char *bufout;
+ char *po;
+
+ po = bufout = PyMem_Malloc((sizein - 2) >> 1); /* output size upper bound */
+ if (NULL == bufout) {
+ PyErr_NoMemory();
+ goto exit;
+ }
+
+ /* Implementation note: we call this function upon database response, not
+ * user input (because we are parsing the output format of a buffer) so we
+ * don't expect errors. On bad input we reserve the right to return a bad
+ * output, not an error.
+ */
+ while (pi < bufend) {
+ char c;
+ while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
+ if (pi >= bufend) { goto endloop; }
+ }
+ *po = c << 4;
+
+ while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
+ if (pi >= bufend) { goto endloop; }
+ }
+ *po++ |= c;
+ }
+endloop:
+
+ ret = bufout;
+ *sizeout = po - bufout;
+
+exit:
+ return ret;
+}
+
+/* Parse a bytea output buffer encoded in 'escape' format.
+ *
+ * the format is described in
+ * http://www.postgresql.org/docs/9.0/static/datatype-binary.html
+ *
+ * Parse the buffer in 'bufin', whose length is 'sizein'.
+ * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
+ * In case of error set an exception and return NULL.
+ */
+static char *
+psycopg_parse_escape(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
+{
+ char *ret = NULL;
+ const char *bufend = bufin + sizein;
+ const char *pi = bufin;
+ char *bufout;
+ char *po;
+
+ po = bufout = PyMem_Malloc(sizein); /* output size upper bound */
+ if (NULL == bufout) {
+ PyErr_NoMemory();
+ goto exit;
+ }
+
+ while (pi < bufend) {
+ if (*pi != '\\') {
+ /* Unescaped char */
+ *po++ = *pi++;
+ continue;
+ }
+ if ((pi[1] >= '0' && pi[1] <= '3') &&
+ (pi[2] >= '0' && pi[2] <= '7') &&
+ (pi[3] >= '0' && pi[3] <= '7'))
+ {
+ /* Escaped octal value */
+ *po++ = ((pi[1] - '0') << 6) |
+ ((pi[2] - '0') << 3) |
+ ((pi[3] - '0'));
+ pi += 4;
+ }
+ else {
+ /* Escaped char */
+ *po++ = pi[1];
+ pi += 2;
+ }
+ }
+
+ ret = bufout;
+ *sizeout = po - bufout;
+
+exit:
+ return ret;
}
+
diff --git a/tests/testutils.py b/tests/testutils.py
index 2459894..26551d4 100644
--- a/tests/testutils.py
+++ b/tests/testutils.py
@@ -140,24 +140,6 @@ def skip_if_no_namedtuple(f):
return skip_if_no_namedtuple_
-def skip_if_broken_hex_binary(f):
- """Decorator to detect libpq < 9.0 unable to parse bytea in hex format"""
- def cope_with_hex_binary_(self):
- from psycopg2 import InterfaceError
- try:
- return f(self)
- except InterfaceError, e:
- if '9.0' in str(e) and self.conn.server_version >= 90000:
- return self.skipTest(
- # FIXME: we are only assuming the libpq is older here,
- # but we don't have a reliable way to detect the libpq
- # version, not pre-9 at least.
- "bytea broken with server >= 9.0, libpq < 9")
- else:
- raise
-
- return cope_with_hex_binary_
-
def skip_if_no_iobase(f):
"""Skip a test if io.TextIOBase is not available."""
def skip_if_no_iobase_(self):
diff --git a/tests/types_basic.py b/tests/types_basic.py
index 4010631..0eb6ac4 100755
--- a/tests/types_basic.py
+++ b/tests/types_basic.py
@@ -28,7 +28,7 @@ except:
pass
import sys
import testutils
-from testutils import unittest, skip_if_broken_hex_binary
+from testutils import unittest
from testconfig import dsn
import psycopg2
@@ -116,7 +116,6 @@ class TypesBasicTests(unittest.TestCase):
s = self.execute("SELECT %s AS foo", (float("-inf"),))
self.failUnless(str(s) == "-inf", "wrong float quoting: " + str(s))
- @skip_if_broken_hex_binary
def testBinary(self):
if sys.version_info[0] < 3:
s = ''.join([chr(x) for x in range(256)])
@@ -143,7 +142,6 @@ class TypesBasicTests(unittest.TestCase):
b = psycopg2.Binary(bytes([]))
self.assertEqual(str(b), "''::bytea")
- @skip_if_broken_hex_binary
def testBinaryRoundTrip(self):
# test to make sure buffers returned by psycopg2 are
# understood by execute:
@@ -191,7 +189,6 @@ class TypesBasicTests(unittest.TestCase):
s = self.execute("SELECT '{}'::text AS foo")
self.failUnlessEqual(s, "{}")
- @skip_if_broken_hex_binary
@testutils.skip_from_python(3)
def testTypeRoundtripBuffer(self):
o1 = buffer("".join(map(chr, range(256))))
@@ -204,7 +201,6 @@ class TypesBasicTests(unittest.TestCase):
self.assertEqual(type(o1), type(o2))
self.assertEqual(str(o1), str(o2))
- @skip_if_broken_hex_binary
@testutils.skip_from_python(3)
def testTypeRoundtripBufferArray(self):
o1 = buffer("".join(map(chr, range(256))))
@@ -213,7 +209,6 @@ class TypesBasicTests(unittest.TestCase):
self.assertEqual(type(o1[0]), type(o2[0]))
self.assertEqual(str(o1[0]), str(o2[0]))
- @skip_if_broken_hex_binary
@testutils.skip_before_python(3)
def testTypeRoundtripBytes(self):
o1 = bytes(range(256))
@@ -225,7 +220,6 @@ class TypesBasicTests(unittest.TestCase):
o2 = self.execute("select %s;", (o1,))
self.assertEqual(memoryview, type(o2))
- @skip_if_broken_hex_binary
@testutils.skip_before_python(3)
def testTypeRoundtripBytesArray(self):
o1 = bytes(range(256))
@@ -233,7 +227,6 @@ class TypesBasicTests(unittest.TestCase):
o2 = self.execute("select %s;", (o1,))
self.assertEqual(memoryview, type(o2[0]))
- @skip_if_broken_hex_binary
@testutils.skip_before_python(2, 6)
def testAdaptBytearray(self):
o1 = bytearray(range(256))
@@ -258,7 +251,6 @@ class TypesBasicTests(unittest.TestCase):
else:
self.assertEqual(memoryview, type(o2))
- @skip_if_broken_hex_binary
@testutils.skip_before_python(2, 7)
def testAdaptMemoryview(self):
o1 = memoryview(bytearray(range(256)))
@@ -335,6 +327,92 @@ class AdaptSubclassTest(unittest.TestCase):
del psycopg2.extensions.adapters[A, psycopg2.extensions.ISQLQuote]
+class ByteaParserTest(unittest.TestCase):
+ """Unit test for our bytea format parser."""
+ def setUp(self):
+ try:
+ self._cast = self._import_cast()
+ except Exception, e:
+ return self.skipTest("can't test bytea parser: %s - %s"
+ % (e.__class__.__name__, e))
+
+ def _import_cast(self):
+ """Use ctypes to access the C function.
+
+ Raise any sort of error: we just support this where ctypes works as
+ expected.
+ """
+ import ctypes
+ lib = ctypes.cdll.LoadLibrary(psycopg2._psycopg.__file__)
+ cast = lib.typecast_BINARY_cast
+ cast.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.py_object]
+ cast.restype = ctypes.py_object
+ return cast
+
+ def cast(self, buffer):
+ """Cast a buffer from the output format"""
+ l = buffer and len(buffer) or 0
+ rv = self._cast(buffer, l, None)
+
+ if rv is None:
+ return None
+
+ if sys.version_info[0] < 3:
+ return str(rv)
+ else:
+ return rv.tobytes()
+
+ def test_null(self):
+ rv = self.cast(None)
+ self.assertEqual(rv, None)
+
+ def test_blank(self):
+ rv = self.cast(b(''))
+ self.assertEqual(rv, b(''))
+
+ def test_blank_hex(self):
+ # Reported as problematic in ticket #48
+ rv = self.cast(b('\\x'))
+ self.assertEqual(rv, b(''))
+
+ def test_full_hex(self, upper=False):
+ buf = ''.join(("%02x" % i) for i in range(256))
+ if upper: buf = buf.upper()
+ buf = '\\x' + buf
+ rv = self.cast(b(buf))
+ if sys.version_info[0] < 3:
+ self.assertEqual(rv, ''.join(map(chr, range(256))))
+ else:
+ self.assertEqual(rv, bytes(range(256)))
+
+ def test_full_hex_upper(self):
+ return self.test_full_hex(upper=True)
+
+ def test_full_escaped_octal(self):
+ buf = ''.join(("\\%03o" % i) for i in range(256))
+ rv = self.cast(b(buf))
+ if sys.version_info[0] < 3:
+ self.assertEqual(rv, ''.join(map(chr, range(256))))
+ else:
+ self.assertEqual(rv, bytes(range(256)))
+
+ def test_escaped_mixed(self):
+ import string
+ buf = ''.join(("\\%03o" % i) for i in range(32))
+ buf += string.ascii_letters
+ buf += ''.join('\\' + c for c in string.ascii_letters)
+ buf += '\\\\'
+ rv = self.cast(b(buf))
+ if sys.version_info[0] < 3:
+ tgt = ''.join(map(chr, range(32))) \
+ + string.ascii_letters * 2 + '\\'
+ else:
+ tgt = bytes(range(32)) + \
+ (string.ascii_letters * 2 + '\\').encode('ascii')
+
+ self.assertEqual(rv, tgt)
+
+
def test_suite():
return unittest.TestLoader().loadTestsFromName(__name__)