summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-04-13 01:39:34 +0000
committerGuido van Rossum <guido@python.org>2007-04-13 01:39:34 +0000
commitad7d8d10b70b62b25fc8ebd1a6bfef0c008a232a (patch)
tree2889cee1ac3e392aac63e7fc848e314d7cf2cda2
parentdc0b1a106981ee204936221f4e0863bd1d7a6ba6 (diff)
downloadcpython-git-ad7d8d10b70b62b25fc8ebd1a6bfef0c008a232a.tar.gz
Rough and dirty job -- allow concatenation of bytes and arbitrary
buffer-supporting objects (Unicode always excluded), and also of str and bytes. (For some reason u"" + b"" doesn't fail, I'll investigate later.)
-rw-r--r--Include/bytesobject.h1
-rw-r--r--Lib/test/test_bytes.py54
-rw-r--r--Objects/bytesobject.c224
-rw-r--r--Objects/object.c3
-rw-r--r--Objects/stringobject.c2
5 files changed, 212 insertions, 72 deletions
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
index 849078d4f8..bc385c1dd7 100644
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@@ -34,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
/* Direct API functions */
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
+PyAPI_FUNC(PyObject *) PyBytes_Concat(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 682f5d7fb3..1d826b6380 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -225,7 +225,7 @@ class BytesTest(unittest.TestCase):
# Skip step 0 (invalid)
for step in indices[1:]:
self.assertEqual(b[start:stop:step], bytes(L[start:stop:step]))
-
+
def test_regexps(self):
def by(s):
return bytes(map(ord, s))
@@ -298,7 +298,7 @@ class BytesTest(unittest.TestCase):
b[3:5] = [3, 4, 5, 6]
self.assertEqual(b, bytes(range(10)))
-
+
b[3:0] = [42, 42, 42]
self.assertEqual(b, bytes([0, 1, 2, 42, 42, 42, 3, 4, 5, 6, 7, 8, 9]))
@@ -317,7 +317,7 @@ class BytesTest(unittest.TestCase):
L[start:stop:step] = data
b[start:stop:step] = data
self.assertEquals(b, bytes(L))
-
+
del L[start:stop:step]
del b[start:stop:step]
self.assertEquals(b, bytes(L))
@@ -371,8 +371,10 @@ class BytesTest(unittest.TestCase):
b1 = bytes("abc")
b2 = bytes("def")
self.assertEqual(b1 + b2, bytes("abcdef"))
- self.assertRaises(TypeError, lambda: b1 + "def")
- self.assertRaises(TypeError, lambda: "abc" + b2)
+ self.assertEqual(b1 + "def", bytes("abcdef"))
+ self.assertEqual("def" + b1, bytes("defabc"))
+ self.assertRaises(TypeError, lambda: b1 + u"def")
+ ##self.assertRaises(TypeError, lambda: u"abc" + b2) # XXX FIXME
def test_repeat(self):
b = bytes("abc")
@@ -393,6 +395,14 @@ class BytesTest(unittest.TestCase):
self.assertEqual(b, bytes("abcdef"))
self.assertEqual(b, b1)
self.failUnless(b is b1)
+ b += "xyz"
+ self.assertEqual(b, b"abcdefxyz")
+ try:
+ b += u""
+ except TypeError:
+ pass
+ else:
+ self.fail("bytes += unicode didn't raise TypeError")
def test_irepeat(self):
b = bytes("abc")
@@ -490,7 +500,7 @@ class BytesTest(unittest.TestCase):
a.extend(a)
self.assertEqual(a, orig + orig)
self.assertEqual(a[5:], orig)
-
+
def test_remove(self):
b = b'hello'
b.remove(ord('l'))
@@ -643,14 +653,36 @@ class BytesTest(unittest.TestCase):
q = pm.loads(ps)
self.assertEqual(b, q)
+ def test_strip(self):
+ b = b'mississippi'
+ self.assertEqual(b.strip(b'i'), b'mississipp')
+ self.assertEqual(b.strip(b'm'), b'ississippi')
+ self.assertEqual(b.strip(b'pi'), b'mississ')
+ self.assertEqual(b.strip(b'im'), b'ssissipp')
+ self.assertEqual(b.strip(b'pim'), b'ssiss')
+
+ def test_lstrip(self):
+ b = b'mississippi'
+ self.assertEqual(b.lstrip(b'i'), b'mississippi')
+ self.assertEqual(b.lstrip(b'm'), b'ississippi')
+ self.assertEqual(b.lstrip(b'pi'), b'mississippi')
+ self.assertEqual(b.lstrip(b'im'), b'ssissippi')
+ self.assertEqual(b.lstrip(b'pim'), b'ssissippi')
+
+ def test_rstrip(self):
+ b = b'mississippi'
+ self.assertEqual(b.rstrip(b'i'), b'mississipp')
+ self.assertEqual(b.rstrip(b'm'), b'mississippi')
+ self.assertEqual(b.rstrip(b'pi'), b'mississ')
+ self.assertEqual(b.rstrip(b'im'), b'mississipp')
+ self.assertEqual(b.rstrip(b'pim'), b'mississ')
+
# Optimizations:
# __iter__? (optimization)
# __reversed__? (optimization)
- # XXX Some string methods? (Those that don't use character properties)
- # lstrip, rstrip, strip?? (currently un-pepped)
- # join
-
+ # XXX More string methods? (Those that don't use character properties)
+
# There are tests in string_tests.py that are more
# comprehensive for things like split, partition, etc.
# Unfortunately they are all bundled with tests that
@@ -675,7 +707,7 @@ class BytesAsStringTest(test.string_tests.BaseTest):
getattr(bytes, methodname),
object,
*args
- )
+ )
# Currently the bytes containment testing uses a single integer
# value. This may not be the final design, but until then the
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 6d257a585a..213dbfc891 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -31,7 +31,10 @@ PyBytes_Init(void)
/* end nullbytes support */
-static int _getbytevalue(PyObject* arg, int *value)
+/* Helpers */
+
+static int
+_getbytevalue(PyObject* arg, int *value)
{
PyObject *intarg = PyNumber_Int(arg);
if (! intarg)
@@ -45,6 +48,24 @@ static int _getbytevalue(PyObject* arg, int *value)
return 1;
}
+Py_ssize_t
+_getbuffer(PyObject *obj, void **ptr)
+{
+ PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
+
+ if (buffer == NULL ||
+ PyUnicode_Check(obj) ||
+ buffer->bf_getreadbuffer == NULL ||
+ buffer->bf_getsegcount == NULL ||
+ buffer->bf_getsegcount(obj, NULL) != 1)
+ {
+ *ptr = NULL;
+ return -1;
+ }
+
+ return buffer->bf_getreadbuffer(obj, 0, ptr);
+}
+
/* Direct API functions */
PyObject *
@@ -140,56 +161,63 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
return 0;
}
-/* Functions stuffed into the type object */
-
-static Py_ssize_t
-bytes_length(PyBytesObject *self)
-{
- return self->ob_size;
-}
-
-static PyObject *
-bytes_concat(PyBytesObject *self, PyObject *other)
+PyObject *
+PyBytes_Concat(PyObject *a, PyObject *b)
{
+ Py_ssize_t asize, bsize, size;
+ void *aptr, *bptr;
PyBytesObject *result;
- Py_ssize_t mysize;
- Py_ssize_t size;
- if (!PyBytes_Check(other)) {
- PyErr_Format(PyExc_TypeError,
- "can't concat bytes to %.100s", other->ob_type->tp_name);
+ asize = _getbuffer(a, &aptr);
+ bsize = _getbuffer(b, &bptr);
+ if (asize < 0 || bsize < 0) {
+ PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
+ a->ob_type->tp_name, b->ob_type->tp_name);
return NULL;
}
- mysize = self->ob_size;
- size = mysize + ((PyBytesObject *)other)->ob_size;
+ size = asize + bsize;
if (size < 0)
return PyErr_NoMemory();
+
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
if (result != NULL) {
- memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
- memcpy(result->ob_bytes + self->ob_size,
- ((PyBytesObject *)other)->ob_bytes,
- ((PyBytesObject *)other)->ob_size);
+ memcpy(result->ob_bytes, aptr, asize);
+ memcpy(result->ob_bytes + asize, bptr, bsize);
}
return (PyObject *)result;
}
+/* Functions stuffed into the type object */
+
+static Py_ssize_t
+bytes_length(PyBytesObject *self)
+{
+ return self->ob_size;
+}
+
+static PyObject *
+bytes_concat(PyBytesObject *self, PyObject *other)
+{
+ return PyBytes_Concat((PyObject *)self, other);
+}
+
static PyObject *
bytes_iconcat(PyBytesObject *self, PyObject *other)
{
- Py_ssize_t mysize;
+ void *optr;
Py_ssize_t osize;
+ Py_ssize_t mysize;
Py_ssize_t size;
- if (!PyBytes_Check(other)) {
+ osize = _getbuffer(other, &optr);
+ if (osize < 0) {
PyErr_Format(PyExc_TypeError,
"can't concat bytes to %.100s", other->ob_type->tp_name);
return NULL;
}
mysize = self->ob_size;
- osize = ((PyBytesObject *)other)->ob_size;
size = mysize + osize;
if (size < 0)
return PyErr_NoMemory();
@@ -197,7 +225,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL;
- memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
+ memcpy(self->ob_bytes + mysize, optr, osize);
Py_INCREF(self);
return (PyObject *)self;
}
@@ -366,15 +394,10 @@ static int
bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
PyObject *values)
{
- int avail;
- int needed;
- char *bytes;
+ Py_ssize_t avail, needed;
+ void *bytes;
- if (values == NULL) {
- bytes = NULL;
- needed = 0;
- }
- else if (values == (PyObject *)self || !PyBytes_Check(values)) {
+ if (values == (PyObject *)self) {
/* Make a copy an call this function recursively */
int err;
values = PyBytes_FromObject(values);
@@ -384,10 +407,19 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Py_DECREF(values);
return err;
}
+ if (values == NULL) {
+ /* del b[lo:hi] */
+ bytes = NULL;
+ needed = 0;
+ }
else {
- assert(PyBytes_Check(values));
- bytes = ((PyBytesObject *)values)->ob_bytes;
- needed = ((PyBytesObject *)values)->ob_size;
+ needed = _getbuffer(values, &bytes);
+ if (needed < 0) {
+ PyErr_Format(PyExc_TypeError,
+ "can't set bytes slice from %.100s",
+ values->ob_type->tp_name);
+ return -1;
+ }
}
if (lo < 0)
@@ -840,42 +872,26 @@ bytes_str(PyBytesObject *self)
static PyObject *
bytes_richcompare(PyObject *self, PyObject *other, int op)
{
- PyBufferProcs *self_buffer, *other_buffer;
Py_ssize_t self_size, other_size;
void *self_bytes, *other_bytes;
PyObject *res;
Py_ssize_t minsize;
int cmp;
- /* For backwards compatibility, bytes can be compared to anything that
- supports the (binary) buffer API. Except Unicode. */
-
- if (PyUnicode_Check(self) || PyUnicode_Check(other)) {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
+ /* Bytes can be compared to anything that supports the (binary) buffer
+ API. Except Unicode. */
- self_buffer = self->ob_type->tp_as_buffer;
- if (self_buffer == NULL ||
- self_buffer->bf_getreadbuffer == NULL ||
- self_buffer->bf_getsegcount == NULL ||
- self_buffer->bf_getsegcount(self, NULL) != 1)
- {
+ self_size = _getbuffer(self, &self_bytes);
+ if (self_size < 0) {
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
- self_size = self_buffer->bf_getreadbuffer(self, 0, &self_bytes);
- other_buffer = other->ob_type->tp_as_buffer;
- if (other_buffer == NULL ||
- other_buffer->bf_getreadbuffer == NULL ||
- other_buffer->bf_getsegcount == NULL ||
- other_buffer->bf_getsegcount(self, NULL) != 1)
- {
+ other_size = _getbuffer(other, &other_bytes);
+ if (other_size < 0) {
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
- other_size = other_buffer->bf_getreadbuffer(other, 0, &other_bytes);
if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
/* Shortcut: if the lengths differ, the objects differ */
@@ -2435,6 +2451,93 @@ bytes_remove(PyBytesObject *self, PyObject *arg)
Py_RETURN_NONE;
}
+/* XXX These two helpers could be optimized if argsize == 1 */
+
+Py_ssize_t
+lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+ void *argptr, Py_ssize_t argsize)
+{
+ Py_ssize_t i = 0;
+ while (i < mysize && memchr(argptr, myptr[i], argsize))
+ i++;
+ return i;
+}
+
+Py_ssize_t
+rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+ void *argptr, Py_ssize_t argsize)
+{
+ Py_ssize_t i = mysize - 1;
+ while (i >= 0 && memchr(argptr, myptr[i], argsize))
+ i--;
+ return i + 1;
+}
+
+PyDoc_STRVAR(strip__doc__,
+"B.strip(bytes) -> bytes\n\
+\n\
+Strip leading and trailing bytes contained in the argument.");
+static PyObject *
+bytes_strip(PyBytesObject *self, PyObject *arg)
+{
+ Py_ssize_t left, right, mysize, argsize;
+ void *myptr, *argptr;
+ if (arg == NULL || !PyBytes_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+ return NULL;
+ }
+ myptr = self->ob_bytes;
+ mysize = self->ob_size;
+ argptr = ((PyBytesObject *)arg)->ob_bytes;
+ argsize = ((PyBytesObject *)arg)->ob_size;
+ left = lstrip_helper(myptr, mysize, argptr, argsize);
+ right = rstrip_helper(myptr, mysize, argptr, argsize);
+ return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
+
+PyDoc_STRVAR(lstrip__doc__,
+"B.lstrip(bytes) -> bytes\n\
+\n\
+Strip leading bytes contained in the argument.");
+static PyObject *
+bytes_lstrip(PyBytesObject *self, PyObject *arg)
+{
+ Py_ssize_t left, right, mysize, argsize;
+ void *myptr, *argptr;
+ if (arg == NULL || !PyBytes_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+ return NULL;
+ }
+ myptr = self->ob_bytes;
+ mysize = self->ob_size;
+ argptr = ((PyBytesObject *)arg)->ob_bytes;
+ argsize = ((PyBytesObject *)arg)->ob_size;
+ left = lstrip_helper(myptr, mysize, argptr, argsize);
+ right = mysize;
+ return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
+
+PyDoc_STRVAR(rstrip__doc__,
+"B.rstrip(bytes) -> bytes\n\
+\n\
+Strip trailing bytes contained in the argument.");
+static PyObject *
+bytes_rstrip(PyBytesObject *self, PyObject *arg)
+{
+ Py_ssize_t left, right, mysize, argsize;
+ void *myptr, *argptr;
+ if (arg == NULL || !PyBytes_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
+ return NULL;
+ }
+ myptr = self->ob_bytes;
+ mysize = self->ob_size;
+ argptr = ((PyBytesObject *)arg)->ob_bytes;
+ argsize = ((PyBytesObject *)arg)->ob_size;
+ left = 0;
+ right = rstrip_helper(myptr, mysize, argptr, argsize);
+ return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
+}
PyDoc_STRVAR(decode_doc,
"B.decode([encoding[,errors]]) -> unicode obect.\n\
@@ -2659,6 +2762,9 @@ bytes_methods[] = {
{"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
{"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
{"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
+ {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
+ {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
+ {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
{"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
{"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
diff --git a/Objects/object.c b/Objects/object.c
index f4ae4f3dfb..0bf0c60555 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1347,7 +1347,7 @@ merge_class_dict(PyObject* dict, PyObject* aclass)
/* Helper for PyObject_Dir without arguments: returns the local scope. */
static PyObject *
-_dir_locals()
+_dir_locals(void)
{
PyObject *names;
PyObject *locals = PyEval_GetLocals();
@@ -1892,4 +1892,3 @@ _PyTrash_destroy_chain(void)
#ifdef __cplusplus
}
#endif
-
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 7212df904e..94943f60e7 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -948,6 +948,8 @@ string_concat(register PyStringObject *a, register PyObject *bb)
if (PyUnicode_Check(bb))
return PyUnicode_Concat((PyObject *)a, bb);
#endif
+ if (PyBytes_Check(bb))
+ return PyBytes_Concat((PyObject *)a, bb);
PyErr_Format(PyExc_TypeError,
"cannot concatenate 'str' and '%.200s' objects",
bb->ob_type->tp_name);