diff options
Diffstat (limited to 'mercurial/parsers.c')
-rw-r--r-- | mercurial/parsers.c | 1402 |
1 files changed, 134 insertions, 1268 deletions
diff --git a/mercurial/parsers.c b/mercurial/parsers.c index 6710ea6..66060d1 100644 --- a/mercurial/parsers.c +++ b/mercurial/parsers.c @@ -13,10 +13,8 @@ #include "util.h" -static inline int hexdigit(const char *p, Py_ssize_t off) +static int hexdigit(char c) { - char c = p[off]; - if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') @@ -34,8 +32,8 @@ static inline int hexdigit(const char *p, Py_ssize_t off) static PyObject *unhexlify(const char *str, int len) { PyObject *ret; + const char *c; char *d; - int i; ret = PyBytes_FromStringAndSize(NULL, len / 2); @@ -44,9 +42,9 @@ static PyObject *unhexlify(const char *str, int len) d = PyBytes_AsString(ret); - for (i = 0; i < len;) { - int hi = hexdigit(str, i++); - int lo = hexdigit(str, i++); + for (c = str; c < str + len;) { + int hi = hexdigit(*c++); + int lo = hexdigit(*c++); *d++ = (hi << 4) | lo; } @@ -137,6 +135,33 @@ quit: return NULL; } +#ifdef _WIN32 +#ifdef _MSC_VER +/* msvc 6.0 has problems */ +#define inline __inline +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include <stdint.h> +#endif +static uint32_t ntohl(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} +#else +/* not windows */ +#include <sys/types.h> +#if defined __BEOS__ && !defined __HAIKU__ +#include <ByteOrder.h> +#else +#include <arpa/inet.h> +#endif +#include <inttypes.h> +#endif + static PyObject *parse_dirstate(PyObject *self, PyObject *args) { PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; @@ -145,6 +170,7 @@ static PyObject *parse_dirstate(PyObject *self, PyObject *args) int state, mode, size, mtime; unsigned int flen; int len; + uint32_t decode[4]; /* for alignment */ if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", &PyDict_Type, &dmap, @@ -167,10 +193,11 @@ static PyObject *parse_dirstate(PyObject *self, PyObject *args) while (cur < end - 17) { /* unpack header */ state = *cur; - mode = getbe32(cur + 1); - size = getbe32(cur + 5); - mtime = getbe32(cur + 9); - flen = getbe32(cur + 13); + memcpy(decode, cur + 1, 16); + mode = ntohl(decode[0]); + size = ntohl(decode[1]); + mtime = ntohl(decode[2]); + flen = ntohl(decode[3]); cur += 17; if (cur + flen > end || cur + flen < cur) { PyErr_SetString(PyExc_ValueError, "overflow in dirstate"); @@ -214,240 +241,10 @@ quit: return ret; } -static inline int getintat(PyObject *tuple, int off, uint32_t *v) -{ - PyObject *o = PyTuple_GET_ITEM(tuple, off); - long val; - - if (PyInt_Check(o)) - val = PyInt_AS_LONG(o); - else if (PyLong_Check(o)) { - val = PyLong_AsLong(o); - if (val == -1 && PyErr_Occurred()) - return -1; - } else { - PyErr_SetString(PyExc_TypeError, "expected an int or long"); - return -1; - } - if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) { - PyErr_SetString(PyExc_OverflowError, - "Python value to large to convert to uint32_t"); - return -1; - } - *v = (uint32_t)val; - return 0; -} - -static PyObject *dirstate_unset; - -/* - * Efficiently pack a dirstate object into its on-disk format. - */ -static PyObject *pack_dirstate(PyObject *self, PyObject *args) -{ - PyObject *packobj = NULL; - PyObject *map, *copymap, *pl; - Py_ssize_t nbytes, pos, l; - PyObject *k, *v, *pn; - char *p, *s; - double now; - - if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate", - &PyDict_Type, &map, &PyDict_Type, ©map, - &pl, &now)) - return NULL; - - if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) { - PyErr_SetString(PyExc_TypeError, "expected 2-element sequence"); - return NULL; - } - - /* Figure out how much we need to allocate. */ - for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) { - PyObject *c; - if (!PyString_Check(k)) { - PyErr_SetString(PyExc_TypeError, "expected string key"); - goto bail; - } - nbytes += PyString_GET_SIZE(k) + 17; - c = PyDict_GetItem(copymap, k); - if (c) { - if (!PyString_Check(c)) { - PyErr_SetString(PyExc_TypeError, - "expected string key"); - goto bail; - } - nbytes += PyString_GET_SIZE(c) + 1; - } - } - - packobj = PyString_FromStringAndSize(NULL, nbytes); - if (packobj == NULL) - goto bail; - - p = PyString_AS_STRING(packobj); - - pn = PySequence_ITEM(pl, 0); - if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { - PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); - goto bail; - } - memcpy(p, s, l); - p += 20; - pn = PySequence_ITEM(pl, 1); - if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { - PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); - goto bail; - } - memcpy(p, s, l); - p += 20; - - for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) { - uint32_t mode, size, mtime; - Py_ssize_t len, l; - PyObject *o; - char *s, *t; - - if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) { - PyErr_SetString(PyExc_TypeError, "expected a 4-tuple"); - goto bail; - } - o = PyTuple_GET_ITEM(v, 0); - if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) { - PyErr_SetString(PyExc_TypeError, "expected one byte"); - goto bail; - } - *p++ = *s; - if (getintat(v, 1, &mode) == -1) - goto bail; - if (getintat(v, 2, &size) == -1) - goto bail; - if (getintat(v, 3, &mtime) == -1) - goto bail; - if (*s == 'n' && mtime == (uint32_t)now) { - /* See dirstate.py:write for why we do this. */ - if (PyDict_SetItem(map, k, dirstate_unset) == -1) - goto bail; - mode = 0, size = -1, mtime = -1; - } - putbe32(mode, p); - putbe32(size, p + 4); - putbe32(mtime, p + 8); - t = p + 12; - p += 16; - len = PyString_GET_SIZE(k); - memcpy(p, PyString_AS_STRING(k), len); - p += len; - o = PyDict_GetItem(copymap, k); - if (o) { - *p++ = '\0'; - l = PyString_GET_SIZE(o); - memcpy(p, PyString_AS_STRING(o), l); - p += l; - len += l + 1; - } - putbe32((uint32_t)len, t); - } - - pos = p - PyString_AS_STRING(packobj); - if (pos != nbytes) { - PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld", - (long)pos, (long)nbytes); - goto bail; - } +const char nullid[20]; +const int nullrev = -1; - return packobj; -bail: - Py_XDECREF(packobj); - return NULL; -} - -/* - * A base-16 trie for fast node->rev mapping. - * - * Positive value is index of the next node in the trie - * Negative value is a leaf: -(rev + 1) - * Zero is empty - */ -typedef struct { - int children[16]; -} nodetree; - -/* - * This class has two behaviours. - * - * When used in a list-like way (with integer keys), we decode an - * entry in a RevlogNG index file on demand. Our last entry is a - * sentinel, always a nullid. We have limited support for - * integer-keyed insert and delete, only at elements right before the - * sentinel. - * - * With string keys, we lazily perform a reverse mapping from node to - * rev, using a base-16 trie. - */ -typedef struct { - PyObject_HEAD - /* Type-specific fields go here. */ - PyObject *data; /* raw bytes of index */ - PyObject **cache; /* cached tuples */ - const char **offsets; /* populated on demand */ - Py_ssize_t raw_length; /* original number of elements */ - Py_ssize_t length; /* current number of elements */ - PyObject *added; /* populated on demand */ - PyObject *headrevs; /* cache, invalidated on changes */ - nodetree *nt; /* base-16 trie */ - int ntlength; /* # nodes in use */ - int ntcapacity; /* # nodes allocated */ - int ntdepth; /* maximum depth of tree */ - int ntsplits; /* # splits performed */ - int ntrev; /* last rev scanned */ - int ntlookups; /* # lookups */ - int ntmisses; /* # lookups that miss the cache */ - int inlined; -} indexObject; - -static Py_ssize_t index_length(const indexObject *self) -{ - if (self->added == NULL) - return self->length; - return self->length + PyList_GET_SIZE(self->added); -} - -static PyObject *nullentry; -static const char nullid[20]; - -static long inline_scan(indexObject *self, const char **offsets); - -#if LONG_MAX == 0x7fffffffL -static char *tuple_format = "Kiiiiiis#"; -#else -static char *tuple_format = "kiiiiiis#"; -#endif - -/* A RevlogNG v1 index entry is 64 bytes long. */ -static const long v1_hdrsize = 64; - -/* - * Return a pointer to the beginning of a RevlogNG record. - */ -static const char *index_deref(indexObject *self, Py_ssize_t pos) -{ - if (self->inlined && pos > 0) { - if (self->offsets == NULL) { - self->offsets = malloc(self->raw_length * - sizeof(*self->offsets)); - if (self->offsets == NULL) - return (const char *)PyErr_NoMemory(); - inline_scan(self, self->offsets); - } - return self->offsets[pos]; - } - - return PyString_AS_STRING(self->data) + pos * v1_hdrsize; -} - -/* - * RevlogNG format (all in big endian, data may be inlined): +/* RevlogNG format (all in big endian, data may be inlined): * 6 bytes: offset * 2 bytes: flags * 4 bytes: compressed length @@ -458,1076 +255,147 @@ static const char *index_deref(indexObject *self, Py_ssize_t pos) * 4 bytes: parent 2 revision * 32 bytes: nodeid (only 20 bytes used) */ -static PyObject *index_get(indexObject *self, Py_ssize_t pos) +static int _parse_index_ng(const char *data, int size, int inlined, + PyObject *index) { + PyObject *entry; + int n = 0, err; uint64_t offset_flags; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; const char *c_node_id; - const char *data; - Py_ssize_t length = index_length(self); - PyObject *entry; - - if (pos < 0) - pos += length; - - if (pos < 0 || pos >= length) { - PyErr_SetString(PyExc_IndexError, "revlog index out of range"); - return NULL; - } - - if (pos == length - 1) { - Py_INCREF(nullentry); - return nullentry; - } - - if (pos >= self->length - 1) { - PyObject *obj; - obj = PyList_GET_ITEM(self->added, pos - self->length + 1); - Py_INCREF(obj); - return obj; - } - - if (self->cache) { - if (self->cache[pos]) { - Py_INCREF(self->cache[pos]); - return self->cache[pos]; + const char *end = data + size; + uint32_t decode[8]; /* to enforce alignment with inline data */ + + while (data < end) { + unsigned int step; + + memcpy(decode, data, 32); + offset_flags = ntohl(decode[1]); + if (n == 0) /* mask out version number for the first entry */ + offset_flags &= 0xFFFF; + else { + uint32_t offset_high = ntohl(decode[0]); + offset_flags |= ((uint64_t)offset_high) << 32; } - } else { - self->cache = calloc(self->raw_length, sizeof(PyObject *)); - if (self->cache == NULL) - return PyErr_NoMemory(); - } - - data = index_deref(self, pos); - if (data == NULL) - return NULL; - - offset_flags = getbe32(data + 4); - if (pos == 0) /* mask out version number for the first entry */ - offset_flags &= 0xFFFF; - else { - uint32_t offset_high = getbe32(data); - offset_flags |= ((uint64_t)offset_high) << 32; - } - comp_len = getbe32(data + 8); - uncomp_len = getbe32(data + 12); - base_rev = getbe32(data + 16); - link_rev = getbe32(data + 20); - parent_1 = getbe32(data + 24); - parent_2 = getbe32(data + 28); - c_node_id = data + 32; + comp_len = ntohl(decode[2]); + uncomp_len = ntohl(decode[3]); + base_rev = ntohl(decode[4]); + link_rev = ntohl(decode[5]); + parent_1 = ntohl(decode[6]); + parent_2 = ntohl(decode[7]); + c_node_id = data + 32; - entry = Py_BuildValue(tuple_format, offset_flags, comp_len, + entry = Py_BuildValue("Liiiiiis#", offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, 20); - if (entry) - PyObject_GC_UnTrack(entry); - - self->cache[pos] = entry; - Py_INCREF(entry); - - return entry; -} - -/* - * Return the 20-byte SHA of the node corresponding to the given rev. - */ -static const char *index_node(indexObject *self, Py_ssize_t pos) -{ - Py_ssize_t length = index_length(self); - const char *data; - - if (pos == length - 1 || pos == INT_MAX) - return nullid; - - if (pos >= length) - return NULL; - - if (pos >= self->length - 1) { - PyObject *tuple, *str; - tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); - str = PyTuple_GetItem(tuple, 7); - return str ? PyString_AS_STRING(str) : NULL; - } - - data = index_deref(self, pos); - return data ? data + 32 : NULL; -} - -static int nt_insert(indexObject *self, const char *node, int rev); - -static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) -{ - if (PyString_AsStringAndSize(obj, node, nodelen) == -1) - return -1; - if (*nodelen == 20) - return 0; - PyErr_SetString(PyExc_ValueError, "20-byte hash required"); - return -1; -} - -static PyObject *index_insert(indexObject *self, PyObject *args) -{ - PyObject *obj; - char *node; - long offset; - Py_ssize_t len, nodelen; - - if (!PyArg_ParseTuple(args, "lO", &offset, &obj)) - return NULL; - - if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { - PyErr_SetString(PyExc_TypeError, "8-tuple required"); - return NULL; - } - - if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) - return NULL; - - len = index_length(self); - - if (offset < 0) - offset += len; - - if (offset != len - 1) { - PyErr_SetString(PyExc_IndexError, - "insert only supported at index -1"); - return NULL; - } - - if (offset > INT_MAX) { - PyErr_SetString(PyExc_ValueError, - "currently only 2**31 revs supported"); - return NULL; - } - - if (self->added == NULL) { - self->added = PyList_New(0); - if (self->added == NULL) - return NULL; - } - - if (PyList_Append(self->added, obj) == -1) - return NULL; - - if (self->nt) - nt_insert(self, node, (int)offset); - - Py_CLEAR(self->headrevs); - Py_RETURN_NONE; -} - -static void _index_clearcaches(indexObject *self) -{ - if (self->cache) { - Py_ssize_t i; - - for (i = 0; i < self->raw_length; i++) - Py_CLEAR(self->cache[i]); - free(self->cache); - self->cache = NULL; - } - if (self->offsets) { - free(self->offsets); - self->offsets = NULL; - } - if (self->nt) { - free(self->nt); - self->nt = NULL; - } - Py_CLEAR(self->headrevs); -} - -static PyObject *index_clearcaches(indexObject *self) -{ - _index_clearcaches(self); - self->ntlength = self->ntcapacity = 0; - self->ntdepth = self->ntsplits = 0; - self->ntrev = -1; - self->ntlookups = self->ntmisses = 0; - Py_RETURN_NONE; -} - -static PyObject *index_stats(indexObject *self) -{ - PyObject *obj = PyDict_New(); - - if (obj == NULL) - return NULL; - -#define istat(__n, __d) \ - if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \ - goto bail; - - if (self->added) { - Py_ssize_t len = PyList_GET_SIZE(self->added); - if (PyDict_SetItemString(obj, "index entries added", - PyInt_FromSsize_t(len)) == -1) - goto bail; - } - - if (self->raw_length != self->length - 1) - istat(raw_length, "revs on disk"); - istat(length, "revs in memory"); - istat(ntcapacity, "node trie capacity"); - istat(ntdepth, "node trie depth"); - istat(ntlength, "node trie count"); - istat(ntlookups, "node trie lookups"); - istat(ntmisses, "node trie misses"); - istat(ntrev, "node trie last rev scanned"); - istat(ntsplits, "node trie splits"); - -#undef istat - - return obj; - -bail: - Py_XDECREF(obj); - return NULL; -} - -/* - * When we cache a list, we want to be sure the caller can't mutate - * the cached copy. - */ -static PyObject *list_copy(PyObject *list) -{ - Py_ssize_t len = PyList_GET_SIZE(list); - PyObject *newlist = PyList_New(len); - Py_ssize_t i; - - if (newlist == NULL) - return NULL; - - for (i = 0; i < len; i++) { - PyObject *obj = PyList_GET_ITEM(list, i); - Py_INCREF(obj); - PyList_SET_ITEM(newlist, i, obj); - } - - return newlist; -} - -static PyObject *index_headrevs(indexObject *self) -{ - Py_ssize_t i, len, addlen; - char *nothead = NULL; - PyObject *heads; - - if (self->headrevs) - return list_copy(self->headrevs); - - len = index_length(self) - 1; - heads = PyList_New(0); - if (heads == NULL) - goto bail; - if (len == 0) { - PyObject *nullid = PyInt_FromLong(-1); - if (nullid == NULL || PyList_Append(heads, nullid) == -1) { - Py_XDECREF(nullid); - goto bail; - } - goto done; - } - - nothead = calloc(len, 1); - if (nothead == NULL) - goto bail; - - for (i = 0; i < self->raw_length; i++) { - const char *data = index_deref(self, i); - int parent_1 = getbe32(data + 24); - int parent_2 = getbe32(data + 28); - if (parent_1 >= 0) - nothead[parent_1] = 1; - if (parent_2 >= 0) - nothead[parent_2] = 1; - } - - addlen = self->added ? PyList_GET_SIZE(self->added) : 0; - - for (i = 0; i < addlen; i++) { - PyObject *rev = PyList_GET_ITEM(self->added, i); - PyObject *p1 = PyTuple_GET_ITEM(rev, 5); - PyObject *p2 = PyTuple_GET_ITEM(rev, 6); - long parent_1, parent_2; - - if (!PyInt_Check(p1) || !PyInt_Check(p2)) { - PyErr_SetString(PyExc_TypeError, - "revlog parents are invalid"); - goto bail; - } - parent_1 = PyInt_AS_LONG(p1); - parent_2 = PyInt_AS_LONG(p2); - if (parent_1 >= 0) - nothead[parent_1] = 1; - if (parent_2 >= 0) - nothead[parent_2] = 1; - } - - for (i = 0; i < len; i++) { - PyObject *head; - - if (nothead[i]) - continue; - head = PyInt_FromLong(i); - if (head == NULL || PyList_Append(heads, head) == -1) { - Py_XDECREF(head); - goto bail; - } - } - -done: - self->headrevs = heads; - free(nothead); - return list_copy(self->headrevs); -bail: - Py_XDECREF(heads); - free(nothead); - return NULL; -} - -static inline int nt_level(const char *node, Py_ssize_t level) -{ - int v = node[level>>1]; - if (!(level & 1)) - v >>= 4; - return v & 0xf; -} - -/* - * Return values: - * - * -4: match is ambiguous (multiple candidates) - * -2: not found - * rest: valid rev - */ -static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen, - int hex) -{ - int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level; - int level, maxlevel, off; - - if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) - return -1; - - if (self->nt == NULL) - return -2; - - if (hex) - maxlevel = nodelen > 40 ? 40 : (int)nodelen; - else - maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); - - for (level = off = 0; level < maxlevel; level++) { - int k = getnybble(node, level); - nodetree *n = &self->nt[off]; - int v = n->children[k]; - - if (v < 0) { - const char *n; - Py_ssize_t i; - - v = -v - 1; - n = index_node(self, v); - if (n == NULL) - return -2; - for (i = level; i < maxlevel; i++) - if (getnybble(node, i) != nt_level(n, i)) - return -2; - return v; - } - if (v == 0) - return -2; - off = v; - } - /* multiple matches against an ambiguous prefix */ - return -4; -} - -static int nt_new(indexObject *self) -{ - if (self->ntlength == self->ntcapacity) { - self->ntcapacity *= 2; - self->nt = realloc(self->nt, - self->ntcapacity * sizeof(nodetree)); - if (self->nt == NULL) { - PyErr_SetString(PyExc_MemoryError, "out of memory"); - return -1; - } - memset(&self->nt[self->ntlength], 0, - sizeof(nodetree) * (self->ntcapacity - self->ntlength)); - } - return self->ntlength++; -} - -static int nt_insert(indexObject *self, const char *node, int rev) -{ - int level = 0; - int off = 0; - - while (level < 40) { - int k = nt_level(node, level); - nodetree *n; - int v; - - n = &self->nt[off]; - v = n->children[k]; - - if (v == 0) { - n->children[k] = -rev - 1; + if (!entry) return 0; - } - if (v < 0) { - const char *oldnode = index_node(self, -v - 1); - int noff; - - if (!oldnode || !memcmp(oldnode, node, 20)) { - n->children[k] = -rev - 1; - return 0; - } - noff = nt_new(self); - if (noff == -1) - return -1; - /* self->nt may have been changed by realloc */ - self->nt[off].children[k] = noff; - off = noff; - n = &self->nt[off]; - n->children[nt_level(oldnode, ++level)] = v; - if (level > self->ntdepth) - self->ntdepth = level; - self->ntsplits += 1; - } else { - level += 1; - off = v; - } - } - - return -1; -} - -static int nt_init(indexObject *self) -{ - if (self->nt == NULL) { - self->ntcapacity = self->raw_length < 4 - ? 4 : self->raw_length / 2; - self->nt = calloc(self->ntcapacity, sizeof(nodetree)); - if (self->nt == NULL) { - PyErr_NoMemory(); - return -1; - } - self->ntlength = 1; - self->ntrev = (int)index_length(self) - 1; - self->ntlookups = 1; - self->ntmisses = 0; - if (nt_insert(self, nullid, INT_MAX) == -1) - return -1; - } - return 0; -} - -/* - * Return values: - * - * -3: error (exception set) - * -2: not found (no exception set) - * rest: valid rev - */ -static int index_find_node(indexObject *self, - const char *node, Py_ssize_t nodelen) -{ - int rev; - - self->ntlookups++; - rev = nt_find(self, node, nodelen, 0); - if (rev >= -1) - return rev; - - if (nt_init(self) == -1) - return -3; - - /* - * For the first handful of lookups, we scan the entire index, - * and cache only the matching nodes. This optimizes for cases - * like "hg tip", where only a few nodes are accessed. - * - * After that, we cache every node we visit, using a single - * scan amortized over multiple lookups. This gives the best - * bulk performance, e.g. for "hg log". - */ - if (self->ntmisses++ < 4) { - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) - return -2; - if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { - if (nt_insert(self, n, rev) == -1) - return -3; - break; - } - } - } else { - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) { - self->ntrev = rev + 1; - return -2; - } - if (nt_insert(self, n, rev) == -1) { - self->ntrev = rev + 1; - return -3; - } - if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { - break; - } - } - self->ntrev = rev; - } - - if (rev >= 0) - return rev; - return -2; -} - -static PyObject *raise_revlog_error(void) -{ - static PyObject *errclass; - PyObject *mod = NULL, *errobj; - - if (errclass == NULL) { - PyObject *dict; - - mod = PyImport_ImportModule("mercurial.error"); - if (mod == NULL) - goto classfail; - - dict = PyModule_GetDict(mod); - if (dict == NULL) - goto classfail; - - errclass = PyDict_GetItemString(dict, "RevlogError"); - if (errclass == NULL) { - PyErr_SetString(PyExc_SystemError, - "could not find RevlogError"); - goto classfail; - } - Py_INCREF(errclass); - } - - errobj = PyObject_CallFunction(errclass, NULL); - if (errobj == NULL) - return NULL; - PyErr_SetObject(errclass, errobj); - return errobj; - -classfail: - Py_XDECREF(mod); - return NULL; -} - -static PyObject *index_getitem(indexObject *self, PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - int rev; - - if (PyInt_Check(value)) - return index_get(self, PyInt_AS_LONG(value)); - - if (node_check(value, &node, &nodelen) == -1) - return NULL; - rev = index_find_node(self, node, nodelen); - if (rev >= -1) - return PyInt_FromLong(rev); - if (rev == -2) - raise_revlog_error(); - return NULL; -} - -static int nt_partialmatch(indexObject *self, const char *node, - Py_ssize_t nodelen) -{ - int rev; - - if (nt_init(self) == -1) - return -3; - - if (self->ntrev > 0) { - /* ensure that the radix tree is fully populated */ - for (rev = self->ntrev - 1; rev >= 0; rev--) { - const char *n = index_node(self, rev); - if (n == NULL) - return -2; - if (nt_insert(self, n, rev) == -1) - return -3; - } - self->ntrev = rev; - } - - return nt_find(self, node, nodelen, 1); -} - -static PyObject *index_partialmatch(indexObject *self, PyObject *args) -{ - const char *fullnode; - int nodelen; - char *node; - int rev, i; - - if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) - return NULL; - - if (nodelen < 4) { - PyErr_SetString(PyExc_ValueError, "key too short"); - return NULL; - } - - if (nodelen > 40) - nodelen = 40; - - for (i = 0; i < nodelen; i++) - hexdigit(node, i); - if (PyErr_Occurred()) { - /* input contains non-hex characters */ - PyErr_Clear(); - Py_RETURN_NONE; - } - - rev = nt_partialmatch(self, node, nodelen); - - switch (rev) { - case -4: - raise_revlog_error(); - case -3: - return NULL; - case -2: - Py_RETURN_NONE; - case -1: - return PyString_FromStringAndSize(nullid, 20); - } - - fullnode = index_node(self, rev); - if (fullnode == NULL) { - PyErr_Format(PyExc_IndexError, - "could not access rev %d", rev); - return NULL; - } - return PyString_FromStringAndSize(fullnode, 20); -} - -static PyObject *index_m_get(indexObject *self, PyObject *args) -{ - Py_ssize_t nodelen; - PyObject *val; - char *node; - int rev; - - if (!PyArg_ParseTuple(args, "O", &val)) - return NULL; - if (node_check(val, &node, &nodelen) == -1) - return NULL; - rev = index_find_node(self, node, nodelen); - if (rev == -3) - return NULL; - if (rev == -2) - Py_RETURN_NONE; - return PyInt_FromLong(rev); -} - -static int index_contains(indexObject *self, PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - - if (PyInt_Check(value)) { - long rev = PyInt_AS_LONG(value); - return rev >= -1 && rev < index_length(self); - } - - if (node_check(value, &node, &nodelen) == -1) - return -1; - switch (index_find_node(self, node, nodelen)) { - case -3: - return -1; - case -2: - return 0; - default: - return 1; - } -} - -/* - * Invalidate any trie entries introduced by added revs. - */ -static void nt_invalidate_added(indexObject *self, Py_ssize_t start) -{ - Py_ssize_t i, len = PyList_GET_SIZE(self->added); - - for (i = start; i < len; i++) { - PyObject *tuple = PyList_GET_ITEM(self->added, i); - PyObject *node = PyTuple_GET_ITEM(tuple, 7); - - nt_insert(self, PyString_AS_STRING(node), -1); - } - - if (start == 0) - Py_CLEAR(self->added); -} - -/* - * Delete a numeric range of revs, which must be at the end of the - * range, but exclude the sentinel nullid entry. - */ -static int index_slice_del(indexObject *self, PyObject *item) -{ - Py_ssize_t start, stop, step, slicelength; - Py_ssize_t length = index_length(self); - int ret = 0; - - if (PySlice_GetIndicesEx((PySliceObject*)item, length, - &start, &stop, &step, &slicelength) < 0) - return -1; - - if (slicelength <= 0) - return 0; - - if ((step < 0 && start < stop) || (step > 0 && start > stop)) - stop = start; - - if (step < 0) { - stop = start + 1; - start = stop + step*(slicelength - 1) - 1; - step = -step; - } - - if (step != 1) { - PyErr_SetString(PyExc_ValueError, - "revlog index delete requires step size of 1"); - return -1; - } - - if (stop != length - 1) { - PyErr_SetString(PyExc_IndexError, - "revlog index deletion indices are invalid"); - return -1; - } - - if (start < self->length - 1) { - if (self->nt) { - Py_ssize_t i; - - for (i = start + 1; i < self->length - 1; i++) { - const char *node = index_node(self, i); - - if (node) - nt_insert(self, node, -1); - } - if (self->added) - nt_invalidate_added(self, 0); - if (self->ntrev > start) - self->ntrev = (int)start; - } - self->length = start + 1; - if (start < self->raw_length) - self->raw_length = start; - goto done; - } - - if (self->nt) { - nt_invalidate_added(self, start - self->length + 1); - if (self->ntrev > start) - self->ntrev = (int)start; - } - if (self->added) - ret = PyList_SetSlice(self->added, start - self->length + 1, - PyList_GET_SIZE(self->added), NULL); -done: - Py_CLEAR(self->headrevs); - return ret; -} - -/* - * Supported ops: - * - * slice deletion - * string assignment (extend node->rev mapping) - * string deletion (shrink node->rev mapping) - */ -static int index_assign_subscript(indexObject *self, PyObject *item, - PyObject *value) -{ - char *node; - Py_ssize_t nodelen; - long rev; - - if (PySlice_Check(item) && value == NULL) - return index_slice_del(self, item); - - if (node_check(item, &node, &nodelen) == -1) - return -1; + PyObject_GC_UnTrack(entry); /* don't waste time with this */ - if (value == NULL) - return self->nt ? nt_insert(self, node, -1) : 0; - rev = PyInt_AsLong(value); - if (rev > INT_MAX || rev < 0) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_ValueError, "rev out of range"); - return -1; - } - return nt_insert(self, node, (int)rev); -} + if (inlined) { + err = PyList_Append(index, entry); + Py_DECREF(entry); + if (err) + return 0; + } else + PyList_SET_ITEM(index, n, entry); /* steals reference */ -/* - * Find all RevlogNG entries in an index that has inline data. Update - * the optional "offsets" table with those entries. - */ -static long inline_scan(indexObject *self, const char **offsets) -{ - const char *data = PyString_AS_STRING(self->data); - const char *end = data + PyString_GET_SIZE(self->data); - long incr = v1_hdrsize; - Py_ssize_t len = 0; - - while (data + v1_hdrsize <= end) { - uint32_t comp_len; - const char *old_data; - /* 3rd element of header is length of compressed inline data */ - comp_len = getbe32(data + 8); - incr = v1_hdrsize + comp_len; - if (incr < v1_hdrsize) - break; - if (offsets) - offsets[len] = data; - len++; - old_data = data; - data += incr; - if (data <= old_data) + n++; + step = 64 + (inlined ? comp_len : 0); + if (data + step > end || data + step < data) break; + data += step; } - - if (data != end && data + v1_hdrsize != end) { + if (data != end) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_ValueError, "corrupt index file"); - return -1; + return 0; } - return len; -} + /* create the magic nullid entry in the index at [-1] */ + entry = Py_BuildValue("Liiiiiis#", (uint64_t)0, 0, 0, -1, -1, -1, -1, nullid, 20); -static int index_init(indexObject *self, PyObject *args) -{ - PyObject *data_obj, *inlined_obj; - Py_ssize_t size; - - if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) - return -1; - if (!PyString_Check(data_obj)) { - PyErr_SetString(PyExc_TypeError, "data is not a string"); - return -1; - } - size = PyString_GET_SIZE(data_obj); - - self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); - self->data = data_obj; - self->cache = NULL; - - self->added = NULL; - self->headrevs = NULL; - self->offsets = NULL; - self->nt = NULL; - self->ntlength = self->ntcapacity = 0; - self->ntdepth = self->ntsplits = 0; - self->ntlookups = self->ntmisses = 0; - self->ntrev = -1; - Py_INCREF(self->data); - - if (self->inlined) { - long len = inline_scan(self, NULL); - if (len == -1) - goto bail; - self->raw_length = len; - self->length = len + 1; - } else { - if (size % v1_hdrsize) { - PyErr_SetString(PyExc_ValueError, "corrupt index file"); - goto bail; - } - self->raw_length = size / v1_hdrsize; - self->length = self->raw_length + 1; - } + if (!entry) + return 0; - return 0; -bail: - return -1; -} + PyObject_GC_UnTrack(entry); /* don't waste time with this */ -static PyObject *index_nodemap(indexObject *self) -{ - Py_INCREF(self); - return (PyObject *)self; -} + if (inlined) { + err = PyList_Append(index, entry); + Py_DECREF(entry); + if (err) + return 0; + } else + PyList_SET_ITEM(index, n, entry); /* steals reference */ -static void index_dealloc(indexObject *self) -{ - _index_clearcaches(self); - Py_DECREF(self->data); - Py_XDECREF(self->added); - PyObject_Del(self); + return 1; } -static PySequenceMethods index_sequence_methods = { - (lenfunc)index_length, /* sq_length */ - 0, /* sq_concat */ - 0, /* sq_repeat */ - (ssizeargfunc)index_get, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ - (objobjproc)index_contains, /* sq_contains */ -}; - -static PyMappingMethods index_mapping_methods = { - (lenfunc)index_length, /* mp_length */ - (binaryfunc)index_getitem, /* mp_subscript */ - (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ -}; - -static PyMethodDef index_methods[] = { - {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, - "clear the index caches"}, - {"get", (PyCFunction)index_m_get, METH_VARARGS, - "get an index entry"}, - {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS, - "get head revisions"}, - {"insert", (PyCFunction)index_insert, METH_VARARGS, - "insert an index entry"}, - {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, - "match a potentially ambiguous node ID"}, - {"stats", (PyCFunction)index_stats, METH_NOARGS, - "stats for the index"}, - {NULL} /* Sentinel */ -}; - -static PyGetSetDef index_getset[] = { - {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject indexType = { - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ - "parsers.index", /* tp_name */ - sizeof(indexObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)index_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - &index_sequence_methods, /* tp_as_sequence */ - &index_mapping_methods, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - "revlog index", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - index_methods, /* tp_methods */ - 0, /* tp_members */ - index_getset, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)index_init, /* tp_init */ - 0, /* tp_alloc */ -}; - -/* - * returns a tuple of the form (index, index, cache) with elements as - * follows: +/* This function parses a index file and returns a Python tuple of the + * following format: (index, cache) * - * index: an index object that lazily parses RevlogNG records - * cache: if data is inlined, a tuple (index_file_content, 0), else None - * - * added complications are for backwards compatibility + * index: a list of tuples containing the RevlogNG records + * cache: if data is inlined, a tuple (index_file_content, 0) else None */ static PyObject *parse_index2(PyObject *self, PyObject *args) { - PyObject *tuple = NULL, *cache = NULL; - indexObject *idx; - int ret; + const char *data; + int size, inlined; + PyObject *rval = NULL, *index = NULL, *cache = NULL; + PyObject *data_obj = NULL, *inlined_obj; - idx = PyObject_New(indexObject, &indexType); - if (idx == NULL) - goto bail; + if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj)) + return NULL; + inlined = inlined_obj && PyObject_IsTrue(inlined_obj); - ret = index_init(idx, args); - if (ret == -1) - goto bail; + /* If no data is inlined, we know the size of the index list in + * advance: size divided by the size of one revlog record (64 bytes) + * plus one for nullid */ + index = inlined ? PyList_New(0) : PyList_New(size / 64 + 1); + if (!index) + goto quit; - if (idx->inlined) { - cache = Py_BuildValue("iO", 0, idx->data); - if (cache == NULL) - goto bail; + /* set up the cache return value */ + if (inlined) { + /* Note that the reference to data_obj is only borrowed */ + data_obj = PyTuple_GET_ITEM(args, 0); + cache = Py_BuildValue("iO", 0, data_obj); + if (!cache) + goto quit; } else { cache = Py_None; - Py_INCREF(cache); + Py_INCREF(Py_None); } - tuple = Py_BuildValue("NN", idx, cache); - if (!tuple) - goto bail; - return tuple; + /* actually populate the index with data */ + if (!_parse_index_ng(data, size, inlined, index)) + goto quit; + + rval = Py_BuildValue("NN", index, cache); + if (!rval) + goto quit; + return rval; -bail: - Py_XDECREF(idx); +quit: + Py_XDECREF(index); Py_XDECREF(cache); - Py_XDECREF(tuple); + Py_XDECREF(rval); return NULL; } + static char parsers_doc[] = "Efficient content parsing."; static PyMethodDef methods[] = { - {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"}, {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"}, {NULL, NULL} }; -static void module_init(PyObject *mod) -{ - indexType.tp_new = PyType_GenericNew; - if (PyType_Ready(&indexType) < 0) - return; - Py_INCREF(&indexType); - - PyModule_AddObject(mod, "index", (PyObject *)&indexType); - - nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0, - -1, -1, -1, -1, nullid, 20); - if (nullentry) - PyObject_GC_UnTrack(nullentry); - - dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1); -} - #ifdef IS_PY3K static struct PyModuleDef parsers_module = { PyModuleDef_HEAD_INIT, @@ -1539,14 +407,12 @@ static struct PyModuleDef parsers_module = { PyMODINIT_FUNC PyInit_parsers(void) { - PyObject *mod = PyModule_Create(&parsers_module); - module_init(mod); - return mod; + return PyModule_Create(&parsers_module); } #else PyMODINIT_FUNC initparsers(void) { - PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc); - module_init(mod); + Py_InitModule3("parsers", methods, parsers_doc); } #endif + |