summaryrefslogtreecommitdiff
path: root/Modules/regexmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/regexmodule.c')
-rw-r--r--Modules/regexmodule.c690
1 files changed, 0 insertions, 690 deletions
diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c
deleted file mode 100644
index 2fb4198207..0000000000
--- a/Modules/regexmodule.c
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
-XXX support range parameter on search
-XXX support mstop parameter on search
-*/
-
-
-/* Regular expression objects */
-/* This uses Tatu Ylonen's copyleft-free reimplementation of
- GNU regular expressions */
-
-#include "Python.h"
-
-#include <ctype.h>
-
-#include "regexpr.h"
-
-static PyObject *RegexError; /* Exception */
-
-typedef struct {
- PyObject_HEAD
- struct re_pattern_buffer re_patbuf; /* The compiled expression */
- struct re_registers re_regs; /* The registers from the last match */
- char re_fastmap[256]; /* Storage for fastmap */
- PyObject *re_translate; /* String object for translate table */
- PyObject *re_lastok; /* String object last matched/searched */
- PyObject *re_groupindex; /* Group name to index dictionary */
- PyObject *re_givenpat; /* Pattern with symbolic groups */
- PyObject *re_realpat; /* Pattern without symbolic groups */
-} regexobject;
-
-/* Regex object methods */
-
-static void
-reg_dealloc(regexobject *re)
-{
- if (re->re_patbuf.buffer)
- free(re->re_patbuf.buffer);
- Py_XDECREF(re->re_translate);
- Py_XDECREF(re->re_lastok);
- Py_XDECREF(re->re_groupindex);
- Py_XDECREF(re->re_givenpat);
- Py_XDECREF(re->re_realpat);
- PyObject_Del(re);
-}
-
-static PyObject *
-makeresult(struct re_registers *regs)
-{
- PyObject *v;
- int i;
- static PyObject *filler = NULL;
-
- if (filler == NULL) {
- filler = Py_BuildValue("(ii)", -1, -1);
- if (filler == NULL)
- return NULL;
- }
- v = PyTuple_New(RE_NREGS);
- if (v == NULL)
- return NULL;
-
- for (i = 0; i < RE_NREGS; i++) {
- int lo = regs->start[i];
- int hi = regs->end[i];
- PyObject *w;
- if (lo == -1 && hi == -1) {
- w = filler;
- Py_INCREF(w);
- }
- else
- w = Py_BuildValue("(ii)", lo, hi);
- if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
- Py_DECREF(v);
- return NULL;
- }
- }
- return v;
-}
-
-static PyObject *
-regobj_match(regexobject *re, PyObject *args)
-{
- PyObject *argstring;
- char *buffer;
- int size;
- int offset = 0;
- int result;
-
- if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
- return NULL;
- if (!PyArg_Parse(argstring, "t#", &buffer, &size))
- return NULL;
-
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "match offset out of range");
- return NULL;
- }
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
- &re->re_regs);
- if (result < -1) {
- /* Serious failure of some sort; if re_match didn't
- set an exception, raise a generic error */
- if (!PyErr_Occurred())
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Length of the match or -1 */
-}
-
-static PyObject *
-regobj_search(regexobject *re, PyObject *args)
-{
- PyObject *argstring;
- char *buffer;
- int size;
- int offset = 0;
- int range;
- int result;
-
- if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
- return NULL;
- if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
- return NULL;
-
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "search offset out of range");
- return NULL;
- }
- /* NB: In Emacs 18.57, the documentation for re_search[_2] and
- the implementation don't match: the documentation states that
- |range| positions are tried, while the code tries |range|+1
- positions. It seems more productive to believe the code! */
- range = size - offset;
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
- &re->re_regs);
- if (result < -1) {
- /* Serious failure of some sort; if re_match didn't
- set an exception, raise a generic error */
- if (!PyErr_Occurred())
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Position of the match or -1 */
-}
-
-/* get the group from the regex where index can be a string (group name) or
- an integer index [0 .. 99]
- */
-static PyObject*
-group_from_index(regexobject *re, PyObject *index)
-{
- int i, a, b;
- char *v;
-
- if (PyString_Check(index))
- if (re->re_groupindex == NULL ||
- !(index = PyDict_GetItem(re->re_groupindex, index)))
- {
- PyErr_SetString(RegexError,
- "group() group name doesn't exist");
- return NULL;
- }
-
- i = PyInt_AsLong(index);
- if (i == -1 && PyErr_Occurred())
- return NULL;
-
- if (i < 0 || i >= RE_NREGS) {
- PyErr_SetString(RegexError, "group() index out of range");
- return NULL;
- }
- if (re->re_lastok == NULL) {
- PyErr_SetString(RegexError,
- "group() only valid after successful match/search");
- return NULL;
- }
- a = re->re_regs.start[i];
- b = re->re_regs.end[i];
- if (a < 0 || b < 0) {
- Py_INCREF(Py_None);
- return Py_None;
- }
-
- if (!(v = PyString_AsString(re->re_lastok)))
- return NULL;
-
- return PyString_FromStringAndSize(v+a, b-a);
-}
-
-
-static PyObject *
-regobj_group(regexobject *re, PyObject *args)
-{
- int n = PyTuple_Size(args);
- int i;
- PyObject *res = NULL;
-
- if (n < 0)
- return NULL;
- if (n == 0) {
- PyErr_SetString(PyExc_TypeError, "not enough arguments");
- return NULL;
- }
- if (n == 1) {
- /* return value is a single string */
- PyObject *index = PyTuple_GetItem(args, 0);
- if (!index)
- return NULL;
-
- return group_from_index(re, index);
- }
-
- /* return value is a tuple */
- if (!(res = PyTuple_New(n)))
- return NULL;
-
- for (i = 0; i < n; i++) {
- PyObject *index = PyTuple_GetItem(args, i);
- PyObject *group = NULL;
-
- if (!index)
- goto finally;
- if (!(group = group_from_index(re, index)))
- goto finally;
- if (PyTuple_SetItem(res, i, group) < 0)
- goto finally;
- }
- return res;
-
- finally:
- Py_DECREF(res);
- return NULL;
-}
-
-
-static struct PyMethodDef reg_methods[] = {
- {"match", (PyCFunction)regobj_match, METH_VARARGS},
- {"search", (PyCFunction)regobj_search, METH_VARARGS},
- {"group", (PyCFunction)regobj_group, METH_VARARGS},
- {NULL, NULL} /* sentinel */
-};
-
-
-
-static char* members[] = {
- "last", "regs", "translate",
- "groupindex", "realpat", "givenpat",
- NULL
-};
-
-
-static PyObject *
-regobj_getattr(regexobject *re, char *name)
-{
- if (strcmp(name, "regs") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return makeresult(&re->re_regs);
- }
- if (strcmp(name, "last") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_lastok);
- return re->re_lastok;
- }
- if (strcmp(name, "translate") == 0) {
- if (re->re_translate == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_translate);
- return re->re_translate;
- }
- if (strcmp(name, "groupindex") == 0) {
- if (re->re_groupindex == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_groupindex);
- return re->re_groupindex;
- }
- if (strcmp(name, "realpat") == 0) {
- if (re->re_realpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_realpat);
- return re->re_realpat;
- }
- if (strcmp(name, "givenpat") == 0) {
- if (re->re_givenpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_givenpat);
- return re->re_givenpat;
- }
- if (strcmp(name, "__members__") == 0) {
- int i = 0;
- PyObject *list = NULL;
-
- /* okay, so it's unlikely this list will change that often.
- still, it's easier to change it in just one place.
- */
- while (members[i])
- i++;
- if (!(list = PyList_New(i)))
- return NULL;
-
- i = 0;
- while (members[i]) {
- PyObject* v = PyString_FromString(members[i]);
- if (!v || PyList_SetItem(list, i, v) < 0) {
- Py_DECREF(list);
- return NULL;
- }
- i++;
- }
- return list;
- }
- return Py_FindMethod(reg_methods, (PyObject *)re, name);
-}
-
-static PyTypeObject Regextype = {
- PyObject_HEAD_INIT(NULL)
- 0, /*ob_size*/
- "regex.regex", /*tp_name*/
- sizeof(regexobject), /*tp_size*/
- 0, /*tp_itemsize*/
- /* methods */
- (destructor)reg_dealloc, /*tp_dealloc*/
- 0, /*tp_print*/
- (getattrfunc)regobj_getattr, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_compare*/
- 0, /*tp_repr*/
-};
-
-/* reference counting invariants:
- pattern: borrowed
- translate: borrowed
- givenpat: borrowed
- groupindex: transferred
-*/
-static PyObject *
-newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
-{
- regexobject *re;
- char *pat;
- int size;
-
- if (!PyArg_Parse(pattern, "t#", &pat, &size))
- return NULL;
-
- if (translate != NULL && PyString_Size(translate) != 256) {
- PyErr_SetString(RegexError,
- "translation table must be 256 bytes");
- return NULL;
- }
- re = PyObject_New(regexobject, &Regextype);
- if (re != NULL) {
- char *error;
- re->re_patbuf.buffer = NULL;
- re->re_patbuf.allocated = 0;
- re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
- if (translate) {
- re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
- if (!re->re_patbuf.translate)
- goto finally;
- Py_INCREF(translate);
- }
- else
- re->re_patbuf.translate = NULL;
- re->re_translate = translate;
- re->re_lastok = NULL;
- re->re_groupindex = groupindex;
- Py_INCREF(pattern);
- re->re_realpat = pattern;
- Py_INCREF(givenpat);
- re->re_givenpat = givenpat;
- error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
- if (error != NULL) {
- PyErr_SetString(RegexError, error);
- goto finally;
- }
- }
- return (PyObject *)re;
- finally:
- Py_DECREF(re);
- return NULL;
-}
-
-static PyObject *
-regex_compile(PyObject *self, PyObject *args)
-{
- PyObject *pat = NULL;
- PyObject *tran = NULL;
-
- if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
- return NULL;
- return newregexobject(pat, tran, pat, NULL);
-}
-
-static PyObject *
-symcomp(PyObject *pattern, PyObject *gdict)
-{
- char *opat, *oend, *o, *n, *g, *v;
- int group_count = 0;
- int sz;
- int escaped = 0;
- char name_buf[128];
- PyObject *npattern;
- int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
-
- if (!(opat = PyString_AsString(pattern)))
- return NULL;
-
- if ((sz = PyString_Size(pattern)) < 0)
- return NULL;
-
- oend = opat + sz;
- o = opat;
-
- if (oend == opat) {
- Py_INCREF(pattern);
- return pattern;
- }
-
- if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
- !(n = PyString_AsString(npattern)))
- return NULL;
-
- while (o < oend) {
- if (*o == '(' && escaped == require_escape) {
- char *backtrack;
- escaped = 0;
- ++group_count;
- *n++ = *o;
- if (++o >= oend || *o != '<')
- continue;
- /* *o == '<' */
- if (o+1 < oend && *(o+1) == '>')
- continue;
- backtrack = o;
- g = name_buf;
- for (++o; o < oend;) {
- if (*o == '>') {
- PyObject *group_name = NULL;
- PyObject *group_index = NULL;
- *g++ = '\0';
- group_name = PyString_FromString(name_buf);
- group_index = PyInt_FromLong(group_count);
- if (group_name == NULL ||
- group_index == NULL ||
- PyDict_SetItem(gdict, group_name,
- group_index) != 0)
- {
- Py_XDECREF(group_name);
- Py_XDECREF(group_index);
- Py_XDECREF(npattern);
- return NULL;
- }
- Py_DECREF(group_name);
- Py_DECREF(group_index);
- ++o; /* eat the '>' */
- break;
- }
- if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
- o = backtrack;
- break;
- }
- *g++ = *o++;
- }
- }
- else if (*o == '[' && !escaped) {
- *n++ = *o;
- ++o; /* eat the char following '[' */
- *n++ = *o;
- while (o < oend && *o != ']') {
- ++o;
- *n++ = *o;
- }
- if (o < oend)
- ++o;
- }
- else if (*o == '\\') {
- escaped = 1;
- *n++ = *o;
- ++o;
- }
- else {
- escaped = 0;
- *n++ = *o;
- ++o;
- }
- }
-
- if (!(v = PyString_AsString(npattern))) {
- Py_DECREF(npattern);
- return NULL;
- }
- /* _PyString_Resize() decrements npattern on failure */
- _PyString_Resize(&npattern, n - v);
- return npattern;
-
-}
-
-static PyObject *
-regex_symcomp(PyObject *self, PyObject *args)
-{
- PyObject *pattern;
- PyObject *tran = NULL;
- PyObject *gdict = NULL;
- PyObject *npattern;
- PyObject *retval = NULL;
-
- if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
- return NULL;
-
- gdict = PyDict_New();
- if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
- Py_XDECREF(gdict);
- return NULL;
- }
- retval = newregexobject(npattern, tran, pattern, gdict);
- Py_DECREF(npattern);
- return retval;
-}
-
-
-static PyObject *cache_pat;
-static PyObject *cache_prog;
-
-static int
-update_cache(PyObject *pat)
-{
- PyObject *tuple = PyTuple_Pack(1, pat);
- int status = 0;
-
- if (!tuple)
- return -1;
-
- if (pat != cache_pat) {
- Py_XDECREF(cache_pat);
- cache_pat = NULL;
- Py_XDECREF(cache_prog);
- cache_prog = regex_compile((PyObject *)NULL, tuple);
- if (cache_prog == NULL) {
- status = -1;
- goto finally;
- }
- cache_pat = pat;
- Py_INCREF(cache_pat);
- }
- finally:
- Py_DECREF(tuple);
- return status;
-}
-
-static PyObject *
-regex_match(PyObject *self, PyObject *args)
-{
- PyObject *pat, *string;
- PyObject *tuple, *v;
-
- if (!PyArg_ParseTuple(args, "SS:match", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
-
- if (!(tuple = Py_BuildValue("(S)", string)))
- return NULL;
- v = regobj_match((regexobject *)cache_prog, tuple);
- Py_DECREF(tuple);
- return v;
-}
-
-static PyObject *
-regex_search(PyObject *self, PyObject *args)
-{
- PyObject *pat, *string;
- PyObject *tuple, *v;
-
- if (!PyArg_ParseTuple(args, "SS:search", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
-
- if (!(tuple = Py_BuildValue("(S)", string)))
- return NULL;
- v = regobj_search((regexobject *)cache_prog, tuple);
- Py_DECREF(tuple);
- return v;
-}
-
-static PyObject *
-regex_set_syntax(PyObject *self, PyObject *args)
-{
- int syntax;
- if (!PyArg_ParseTuple(args, "i:set_syntax", &syntax))
- return NULL;
- syntax = re_set_syntax(syntax);
- /* wipe the global pattern cache */
- Py_XDECREF(cache_pat);
- cache_pat = NULL;
- Py_XDECREF(cache_prog);
- cache_prog = NULL;
- return PyInt_FromLong((long)syntax);
-}
-
-static PyObject *
-regex_get_syntax(PyObject *self)
-{
- return PyInt_FromLong((long)re_syntax);
-}
-
-
-static struct PyMethodDef regex_global_methods[] = {
- {"compile", regex_compile, METH_VARARGS},
- {"symcomp", regex_symcomp, METH_VARARGS},
- {"match", regex_match, METH_VARARGS},
- {"search", regex_search, METH_VARARGS},
- {"set_syntax", regex_set_syntax, METH_VARARGS},
- {"get_syntax", (PyCFunction)regex_get_syntax, METH_NOARGS},
- {NULL, NULL} /* sentinel */
-};
-
-PyMODINIT_FUNC
-initregex(void)
-{
- PyObject *m, *d, *v;
- int i;
- char *s;
-
- /* Initialize object type */
- Regextype.ob_type = &PyType_Type;
-
- m = Py_InitModule("regex", regex_global_methods);
- if (m == NULL)
- return;
- d = PyModule_GetDict(m);
-
- if (PyErr_Warn(PyExc_DeprecationWarning,
- "the regex module is deprecated; "
- "please use the re module") < 0)
- return;
-
- /* Initialize regex.error exception */
- v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
- if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
- goto finally;
-
- /* Initialize regex.casefold constant */
- if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
- goto finally;
-
- if (!(s = PyString_AsString(v)))
- goto finally;
-
- for (i = 0; i < 256; i++) {
- if (isupper(i))
- s[i] = tolower(i);
- else
- s[i] = i;
- }
- if (PyDict_SetItemString(d, "casefold", v) < 0)
- goto finally;
- Py_DECREF(v);
-
- if (!PyErr_Occurred())
- return;
- finally:
- /* Nothing */ ;
-}