summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-06-12 01:22:22 +0000
committerTim Peters <tim.peters@gmail.com>2001-06-12 01:22:22 +0000
commitafc86946634d396cc0ce2d297ddbdf9beeaa2423 (patch)
treebcbb22f7f71ed9cc395dd04dbfd2b72ac952ea53
parentda5c1bfafeaa93aa85b77b461185267ce7fc756e (diff)
downloadcpython-afc86946634d396cc0ce2d297ddbdf9beeaa2423.tar.gz
Added q/Q standard (x-platform 8-byte ints) mode in struct module.
This completes the q/Q project. longobject.c _PyLong_AsByteArray: The original code had a gross bug: the most-significant Python digit doesn't necessarily have SHIFT significant bits, and you really need to count how many copies of the sign bit it has else spurious overflow errors result. test_struct.py: This now does exhaustive std q/Q testing at, and on both sides of, all relevant power-of-2 boundaries, both positive and negative. NEWS: Added brief dict news while I was at it.
-rw-r--r--Doc/lib/libstruct.tex17
-rw-r--r--Lib/test/test_struct.py171
-rw-r--r--Misc/NEWS9
-rw-r--r--Modules/structmodule.c192
-rw-r--r--Objects/longobject.c25
5 files changed, 337 insertions, 77 deletions
diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex
index 9a1942da60..f8056a2af9 100644
--- a/Doc/lib/libstruct.tex
+++ b/Doc/lib/libstruct.tex
@@ -72,7 +72,8 @@ Notes:
\item[(1)]
The \character{q} and \character{Q} conversion codes are available in
native mode only if the platform C compiler supports C \ctype{long long},
- or, on Windows, \ctype{__int64}.
+ or, on Windows, \ctype{__int64}. They're always available in standard
+ modes.
\end{description}
@@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation
is truncated. If the string is too short, padding is used to ensure
that exactly enough bytes are used to satisfy the count.
-For the \character{I} and \character{L} format characters, the return
-value is a Python long integer.
+For the \character{I}, \character{L}, \character{q} and \character{Q}
+format characters, the return value is a Python long integer.
For the \character{P} format character, the return value is a Python
integer or long integer, depending on the size needed to hold a
@@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's
order.
Standard size and alignment are as follows: no alignment is required
-for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
-\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and
-\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
-respectively.
+for any type (so you have to use pad bytes);
+\ctype{short} is 2 bytes;
+\ctype{int} and \ctype{long} are 4 bytes;
+\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
+\ctype{float} and \ctype{double} are 32-bit and 64-bit
+IEEE floating point numbers, respectively.
Note the difference between \character{@} and \character{=}: both use
native byte order, but the size and alignment of the latter is
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py
index c977913dfc..e6c8bb24c6 100644
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -12,6 +12,16 @@ def simple_err(func, *args):
func.__name__, args)
## pdb.set_trace()
+def any_err(func, *args):
+ try:
+ apply(func, args)
+ except (struct.error, OverflowError, TypeError):
+ pass
+ else:
+ raise TestFailed, "%s%s did not raise error" % (
+ func.__name__, args)
+## pdb.set_trace()
+
simple_err(struct.calcsize, 'Z')
sz = struct.calcsize('i')
@@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests:
raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
`fmt`, `res`, `rev`, `arg`)
-# Some q/Q sanity checks.
+###########################################################################
+# q/Q tests.
has_native_qQ = 1
try:
@@ -124,18 +135,22 @@ except struct.error:
if verbose:
print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
-simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
+any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless
simple_err(struct.pack, "Q", "a") # ditto, but 'Q'
+def string_reverse(s):
+ chars = list(s)
+ chars.reverse()
+ return "".join(chars)
+
def bigendian_to_native(value):
if isbigendian:
return value
- chars = list(value)
- chars.reverse()
- return "".join(chars)
+ else:
+ return string_reverse(value)
-if has_native_qQ:
+def test_native_qQ():
bytes = struct.calcsize('q')
# The expected values here are in big-endian format, primarily because
# I'm on a little-endian machine and so this is the clearest way (for
@@ -156,3 +171,147 @@ if has_native_qQ:
verify(retrieved == input,
"%r-unpack of %r gave %r, not %r" %
(format, got, retrieved, input))
+
+if has_native_qQ:
+ test_native_qQ()
+
+# Standard q/Q (8 bytes; should work on all platforms).
+
+MIN_Q, MAX_Q = 0, 2L**64 - 1
+MIN_q, MAX_q = -(2L**63), 2L**63 - 1
+
+import binascii
+def test_one_qQ(x, pack=struct.pack,
+ unpack=struct.unpack,
+ unhexlify=binascii.unhexlify):
+ if verbose:
+ print "trying std q/Q on", x, "==", hex(x)
+
+ # Try 'q'.
+ if MIN_q <= x <= MAX_q:
+ # Try '>q'.
+ expected = long(x)
+ if x < 0:
+ expected += 1L << 64
+ assert expected > 0
+ expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+ if len(expected) & 1:
+ expected = "0" + expected
+ expected = unhexlify(expected)
+ expected = "\x00" * (8 - len(expected)) + expected
+
+ # >q pack work?
+ got = pack(">q", x)
+ verify(got == expected,
+ "'>q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # >q unpack work?
+ retrieved = unpack(">q", got)[0]
+ verify(x == retrieved,
+ "'>q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, ">q", '\x01' + got)
+
+ # Try '<q'.
+ expected = string_reverse(expected)
+
+ # <q pack work?
+ got = pack("<q", x)
+ verify(got == expected,
+ "'<q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # <q unpack work?
+ retrieved = unpack("<q", got)[0]
+ verify(x == retrieved,
+ "'<q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, "<q", '\x01' + got)
+
+ else:
+ # x is out of q's range -- verify pack realizes that.
+ any_err(pack, '>q', x)
+ any_err(pack, '<q', x)
+
+ # Much the same for 'Q'.
+ if MIN_Q <= x <= MAX_Q:
+ # Try '>Q'.
+ expected = long(x)
+ expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+ if len(expected) & 1:
+ expected = "0" + expected
+ expected = unhexlify(expected)
+ expected = "\x00" * (8 - len(expected)) + expected
+
+ # >Q pack work?
+ got = pack(">Q", x)
+ verify(got == expected,
+ "'>Q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # >Q unpack work?
+ retrieved = unpack(">Q", got)[0]
+ verify(x == retrieved,
+ "'>Q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, ">Q", '\x01' + got)
+
+ # Try '<Q'.
+ expected = string_reverse(expected)
+
+ # <Q pack work?
+ got = pack("<Q", x)
+ verify(got == expected,
+ "'<Q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # <Q unpack work?
+ retrieved = unpack("<Q", got)[0]
+ verify(x == retrieved,
+ "'<Q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, "<Q", '\x01' + got)
+
+ else:
+ # x is out of Q's range -- verify pack realizes that.
+ any_err(pack, '>Q', x)
+ any_err(pack, '<Q', x)
+
+def test_std_qQ():
+ from random import randrange
+
+ # Create all interesting powers of 2.
+ values = []
+ for exp in range(70):
+ values.append(1L << exp)
+
+ # Add some random 64-bit values.
+ for i in range(50):
+ val = 0L
+ for j in range(8):
+ val = (val << 8) | randrange(256)
+ values.append(val)
+
+ # Try all those, and their negations, and +-1 from them. Note
+ # that this tests all power-of-2 boundaries in range, and a few out
+ # of range, plus +-(2**n +- 1).
+ for base in values:
+ for val in -base, base:
+ for incr in -1, 0, 1:
+ x = val + incr
+ try:
+ x = int(x)
+ except OverflowError:
+ pass
+ test_one_qQ(x)
+
+test_std_qQ()
diff --git a/Misc/NEWS b/Misc/NEWS
index 18a87e9cb2..16850aac68 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -84,6 +84,9 @@ Core
sortdict(dict) function for a simple way to display a dict in sorted
order.
+- Many other small changes to dicts were made, resulting in faster
+ operation along the most common code paths.
+
- Dictionary objects now support the "in" operator: "x in dict" means
the same as dict.has_key(x).
@@ -119,7 +122,7 @@ Core
- Collisions in dicts are resolved via a new approach, which can help
dramatically in bad cases. For example, looking up every key in a dict
- d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
+ d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
faster now. Thanks to Christian Tismer for pointing out the cause and
the nature of an effective cure (last December! better late than never).
@@ -145,8 +148,8 @@ Library
native mode, these can be used only when the platform C compiler supports
these types (when HAVE_LONG_LONG is #define'd by the Python config
process), and then they inherit the sizes and alignments of the C types.
- XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
- XXX TODO are 8-byte integral types.
+ In standard mode, 'q' and 'Q' are supported on all platforms, and are
+ 8-byte integral types.
Tests
diff --git a/Modules/structmodule.c b/Modules/structmodule.c
index 9b79978181..4a8886f8be 100644
--- a/Modules/structmodule.c
+++ b/Modules/structmodule.c
@@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long;
#pragma options align=reset
#endif
+/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */
+
+static PyObject *
+get_pylong(PyObject *v)
+{
+ PyNumberMethods *m;
+
+ assert(v != NULL);
+ if (PyInt_Check(v))
+ return PyLong_FromLong(PyInt_AS_LONG(v));
+ if (PyLong_Check(v)) {
+ Py_INCREF(v);
+ return v;
+ }
+ m = v->ob_type->tp_as_number;
+ if (m != NULL && m->nb_long != NULL) {
+ v = m->nb_long(v);
+ if (v == NULL)
+ return NULL;
+ if (PyLong_Check(v))
+ return v;
+ Py_DECREF(v);
+ }
+ PyErr_SetString(StructError,
+ "cannot convert argument to long");
+ return NULL;
+}
+
/* Helper routine to get a Python integer and raise the appropriate error
if it isn't one */
@@ -123,33 +151,13 @@ static int
get_longlong(PyObject *v, LONG_LONG *p)
{
LONG_LONG x;
- int v_needs_decref = 0;
- if (PyInt_Check(v)) {
- x = (LONG_LONG)PyInt_AS_LONG(v);
- *p = x;
- return 0;
- }
- if (!PyLong_Check(v)) {
- PyNumberMethods *m = v->ob_type->tp_as_number;
- if (m != NULL && m->nb_long != NULL) {
- v = m->nb_long(v);
- if (v == NULL)
- return -1;
- v_needs_decref = 1;
- }
- if (!PyLong_Check(v)) {
- PyErr_SetString(StructError,
- "cannot convert argument to long");
- if (v_needs_decref)
- Py_DECREF(v);
- return -1;
- }
- }
+ v = get_pylong(v);
+ if (v == NULL)
+ return -1;
assert(PyLong_Check(v));
x = PyLong_AsLongLong(v);
- if (v_needs_decref)
- Py_DECREF(v);
+ Py_DECREF(v);
if (x == (LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
@@ -162,39 +170,13 @@ static int
get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
{
unsigned LONG_LONG x;
- int v_needs_decref = 0;
- if (PyInt_Check(v)) {
- long i = PyInt_AS_LONG(v);
- if (i < 0) {
- PyErr_SetString(StructError, "can't convert negative "
- "int to unsigned");
- return -1;
- }
- x = (unsigned LONG_LONG)i;
- *p = x;
- return 0;
- }
- if (!PyLong_Check(v)) {
- PyNumberMethods *m = v->ob_type->tp_as_number;
- if (m != NULL && m->nb_long != NULL) {
- v = m->nb_long(v);
- if (v == NULL)
- return -1;
- v_needs_decref = 1;
- }
- if (!PyLong_Check(v)) {
- PyErr_SetString(StructError,
- "cannot convert argument to long");
- if (v_needs_decref)
- Py_DECREF(v);
- return -1;
- }
- }
+ v = get_pylong(v);
+ if (v == NULL)
+ return -1;
assert(PyLong_Check(v));
x = PyLong_AsUnsignedLongLong(v);
- if (v_needs_decref)
- Py_DECREF(v);
+ Py_DECREF(v);
if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
@@ -500,7 +482,7 @@ typedef struct _formatdef {
TYPE is one of char, byte, ubyte, etc.
*/
-/* Native mode routines. */
+/* Native mode routines. ****************************************************/
static PyObject *
nu_char(const char *p, const formatdef *f)
@@ -797,6 +779,8 @@ static formatdef native_table[] = {
{0}
};
+/* Big-endian routines. *****************************************************/
+
static PyObject *
bu_int(const char *p, const formatdef *f)
{
@@ -826,6 +810,24 @@ bu_uint(const char *p, const formatdef *f)
}
static PyObject *
+bu_longlong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 0, /* little-endian */
+ 1 /* signed */);
+}
+
+static PyObject *
+bu_ulonglong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 0, /* little-endian */
+ 0 /* signed */);
+}
+
+static PyObject *
bu_float(const char *p, const formatdef *f)
{
return unpack_float(p, 1);
@@ -868,6 +870,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f)
}
static int
+bp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject *)v,
+ (unsigned char *)p,
+ 8,
+ 0, /* little_endian */
+ 1 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
+bp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject *)v,
+ (unsigned char *)p,
+ 8,
+ 0, /* little_endian */
+ 0 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
bp_float(char *p, PyObject *v, const formatdef *f)
{
double x = PyFloat_AsDouble(v);
@@ -904,11 +934,15 @@ static formatdef bigendian_table[] = {
{'I', 4, 0, bu_uint, bp_uint},
{'l', 4, 0, bu_int, bp_int},
{'L', 4, 0, bu_uint, bp_uint},
+ {'q', 8, 0, bu_longlong, bp_longlong},
+ {'Q', 8, 0, bu_ulonglong, bp_ulonglong},
{'f', 4, 0, bu_float, bp_float},
{'d', 8, 0, bu_double, bp_double},
{0}
};
+/* Little-endian routines. *****************************************************/
+
static PyObject *
lu_int(const char *p, const formatdef *f)
{
@@ -938,6 +972,24 @@ lu_uint(const char *p, const formatdef *f)
}
static PyObject *
+lu_longlong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 1, /* little-endian */
+ 1 /* signed */);
+}
+
+static PyObject *
+lu_ulonglong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 1, /* little-endian */
+ 0 /* signed */);
+}
+
+static PyObject *
lu_float(const char *p, const formatdef *f)
{
return unpack_float(p+3, -1);
@@ -980,6 +1032,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f)
}
static int
+lp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject*)v,
+ (unsigned char *)p,
+ 8,
+ 1, /* little_endian */
+ 1 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
+lp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject*)v,
+ (unsigned char *)p,
+ 8,
+ 1, /* little_endian */
+ 0 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
lp_float(char *p, PyObject *v, const formatdef *f)
{
double x = PyFloat_AsDouble(v);
@@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = {
{'I', 4, 0, lu_uint, lp_uint},
{'l', 4, 0, lu_int, lp_int},
{'L', 4, 0, lu_uint, lp_uint},
+ {'q', 8, 0, lu_longlong, lp_longlong},
+ {'Q', 8, 0, lu_ulonglong, lp_ulonglong},
{'f', 4, 0, lu_float, lp_float},
{'d', 8, 0, lu_double, lp_double},
{0}
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 92f8b046a0..fac8bb648b 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v,
accumbits = 0;
carry = do_twos_comp ? 1 : 0;
for (i = 0; i < ndigits; ++i) {
+ unsigned int oldaccumbits = accumbits;
twodigits thisdigit = v->ob_digit[i];
if (do_twos_comp) {
thisdigit = (thisdigit ^ MASK) + carry;
carry = thisdigit >> SHIFT;
thisdigit &= MASK;
}
+ if (i < ndigits - 1)
+ accumbits += SHIFT;
+ else {
+ /* The most-significant digit may be partly empty. */
+ twodigits bitmask = 1 << (SHIFT - 1);
+ twodigits signbit = do_twos_comp << (SHIFT - 1);
+ unsigned int nsignbits = 0;
+ while ((thisdigit & bitmask) == signbit && bitmask) {
+ ++nsignbits;
+ bitmask >>= 1;
+ signbit >>= 1;
+ }
+ accumbits += SHIFT - nsignbits;
+ }
/* Because we're going LSB to MSB, thisdigit is more
significant than what's already in accum, so needs to be
prepended to accum. */
- accum |= thisdigit << accumbits;
- accumbits += SHIFT;
+ accum |= thisdigit << oldaccumbits;
/* Store as many bytes as possible. */
- assert(accumbits >= 8);
- do {
+ while (accumbits >= 8) {
if (j >= n)
goto Overflow;
++j;
@@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v,
p += pincr;
accumbits -= 8;
accum >>= 8;
- } while (accumbits >= 8);
+ }
}
/* Store the straggler (if any). */
assert(accumbits < 8);
assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */
- if (accum) {
+ if (accumbits > 0) {
if (j >= n)
goto Overflow;
++j;