diff options
| author | Tim Peters <tim.peters@gmail.com> | 2003-02-11 22:43:24 +0000 | 
|---|---|---|
| committer | Tim Peters <tim.peters@gmail.com> | 2003-02-11 22:43:24 +0000 | 
| commit | 42f08ac1e303117ea789a8ad2a1326db75f923f8 (patch) | |
| tree | ee82ccd3ada46a1ee43cded38425bf3a4146b67d | |
| parent | e7b33db22d52424ac99ff02f102f83d3d953e93b (diff) | |
| download | cpython-git-42f08ac1e303117ea789a8ad2a1326db75f923f8.tar.gz | |
Implemented batching for dicts in cPickle.  This is after two failed
attempts to merge the C list-batch and dict-batch code -- they worked, but
it was a godawful mess to read.
| -rw-r--r-- | Lib/pickle.py | 3 | ||||
| -rw-r--r-- | Lib/test/pickletester.py | 34 | ||||
| -rw-r--r-- | Modules/cPickle.c | 149 | 
3 files changed, 138 insertions, 48 deletions
| diff --git a/Lib/pickle.py b/Lib/pickle.py index 0173c1f83a..00f5834bea 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -612,7 +612,8 @@ class Pickler:      dispatch[ListType] = save_list -    # Keep in synch with cPickle's BATCHSIZE. +    # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets +    # out of synch, though.      _BATCHSIZE = 1000      def _batch_appends(self, items): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 6ed29b1b42..734f2a30c0 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase):              else:                  self.failUnless(num_appends >= 2) -# XXX Temporary hack, so long as the C implementation of pickle protocol -# XXX 2 isn't ready.  When it is, move the methods in TempAbstractPickleTests -# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests -# XXX along with the references to it in test_pickle.py. -class TempAbstractPickleTests(unittest.TestCase): - -    def test_newobj_list_slots(self): -        x = SlotList([1, 2, 3]) -        x.foo = 42 -        x.bar = "hello" -        s = self.dumps(x, 2) -        y = self.loads(s) -        self.assertEqual(list(x), list(y)) -        self.assertEqual(x.__dict__, y.__dict__) -        self.assertEqual(x.foo, y.foo) -        self.assertEqual(x.bar, y.bar) -      def test_dict_chunking(self):          n = 10  # too small to chunk          x = dict.fromkeys(range(n)) @@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase):              else:                  self.failUnless(num_setitems >= 2) +# XXX Temporary hack, so long as the C implementation of pickle protocol +# XXX 2 isn't ready.  When it is, move the methods in TempAbstractPickleTests +# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests +# XXX along with the references to it in test_pickle.py. +class TempAbstractPickleTests(unittest.TestCase): + +    def test_newobj_list_slots(self): +        x = SlotList([1, 2, 3]) +        x.foo = 42 +        x.bar = "hello" +        s = self.dumps(x, 2) +        y = self.loads(s) +        self.assertEqual(list(x), list(y)) +        self.assertEqual(x.__dict__, y.__dict__) +        self.assertEqual(x.foo, y.foo) +        self.assertEqual(x.bar, y.bar) +  class MyInt(int):      sample = 1 diff --git a/Modules/cPickle.c b/Modules/cPickle.c index a35905d788..6af4afd3a7 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation,  #define FALSE       "I00\n"  /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements - * batch_{list, dict} pump out before doing APPENDS/SETITEMS. + * batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will + * break if this gets out of synch with pickle.py, but it's unclear that + * would help anything either.   */  #define BATCHSIZE 1000 @@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args)  	int len;  	PyObject *iter; -  	if (self->fast && !fast_save_enter(self, args))  		goto finally; @@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args)  } +/* iter is an iterator giving (key, value) pairs, and we batch up chunks of + *     MARK key value ... key value SETITEMS + * opcode sequences.  Calling code should have arranged to first create an + * empty dict, or dict-like object, for the SETITEMS to operate on. + * Returns 0 on success, <0 on error. + * + * This is very much like batch_list().  The difference between saving + * elements directly, and picking apart two-tuples, is so long-winded at + * the C level, though, that attempts to combine these routines were too + * ugly to bear. + */ +static int +batch_dict(Picklerobject *self, PyObject *iter) +{ +	PyObject *p; +	PyObject *slice[BATCHSIZE]; +	int i, n; + +	static char setitem = SETITEM; +	static char setitems = SETITEMS; + +	assert(iter != NULL); + +	if (self->proto == 0) { +		/* SETITEMS isn't available; do one at a time. */ +		for (;;) { +			p = PyIter_Next(iter); +			if (p == NULL) { +				if (PyErr_Occurred()) +					return -1; +				break; +			} +			if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) { +				PyErr_SetString(PyExc_TypeError, "dict items " +					"iterator must return 2-tuples"); +				return -1; +			} +			i = save(self, PyTuple_GET_ITEM(p, 0), 0); +			if (i >= 0) +				i = save(self, PyTuple_GET_ITEM(p, 1), 0); +			Py_DECREF(p); +			if (i < 0) +				return -1; +			if (self->write_func(self, &setitem, 1) < 0) +				return -1; + +		} +		return 0; +	} + +	/* proto > 0:  write in batches of BATCHSIZE. */ +	do { +		/* Get next group of (no more than) BATCHSIZE elements. */ +		for (n = 0; n < BATCHSIZE; ++n) { +			p = PyIter_Next(iter); +			if (p == NULL) { +				if (PyErr_Occurred()) +					goto BatchFailed; +				break; +			} +			if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) { +				PyErr_SetString(PyExc_TypeError, "dict items " +					"iterator must return 2-tuples"); +				goto BatchFailed; +			} +			slice[n] = p; +		} + +		if (n > 1) { +			/* Pump out MARK, slice[0:n], SETITEMS. */ +			if (self->write_func(self, &MARKv, 1) < 0) +				goto BatchFailed; +			for (i = 0; i < n; ++i) { +				p = slice[i]; +				if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0) +					goto BatchFailed; +				if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0) +					goto BatchFailed; +			} +			if (self->write_func(self, &setitems, 1) < 0) +				goto BatchFailed; +		} +		else if (n == 1) { +			p = slice[0]; +			if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0) +				goto BatchFailed; +			if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0) +				goto BatchFailed; +			if (self->write_func(self, &setitem, 1) < 0) +				goto BatchFailed; +		} + +		for (i = 0; i < n; ++i) { +			Py_DECREF(slice[i]); +		} +	}while (n == BATCHSIZE); +	return 0; + +BatchFailed: +	while (--n >= 0) { +		Py_DECREF(slice[n]); +	} +	return -1; +} +  static int  save_dict(Picklerobject *self, PyObject *args)  { -	PyObject *key = 0, *value = 0; -	int i, len, res = -1, using_setitems; +	int res = -1;  	char s[3]; - -	static char setitem = SETITEM, setitems = SETITEMS; +	int len; +	PyObject *iter;  	if (self->fast && !fast_save_enter(self, args))  		goto finally; +	/* Create an empty dict. */  	if (self->bin) {  		s[0] = EMPTY_DICT;  		len = 1; @@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args)  	if (self->write_func(self, s, len) < 0)  		goto finally; +	/* Get dict size, and bow out early if empty. */  	if ((len = PyDict_Size(args)) < 0)  		goto finally; @@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args)  			goto finally;  	} -	if ((using_setitems = (self->bin && (PyDict_Size(args) > 1)))) -		if (self->write_func(self, &MARKv, 1) < 0) -			goto finally; - -	i = 0; -	while (PyDict_Next(args, &i, &key, &value)) { -		if (save(self, key, 0) < 0) -			goto finally; - -		if (save(self, value, 0) < 0) -			goto finally; - -		if (!using_setitems) { -			if (self->write_func(self, &setitem, 1) < 0) -				goto finally; -		} -	} - -	if (using_setitems) { -		if (self->write_func(self, &setitems, 1) < 0) -			goto finally; -	} - -	res = 0; +	/* Materialize the dict items. */ +	iter = PyObject_CallMethod(args, "iteritems", "()"); +	if (iter == NULL) +		goto finally; +	res = batch_dict(self, iter); +	Py_DECREF(iter);    finally:  	if (self->fast && !fast_save_leave(self, args)) | 
