diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2018-08-05 18:41:20 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2018-08-05 19:36:09 -0700 |
commit | 3a6abe65c1324361bf0efcb65df61d22a39cfaaf (patch) | |
tree | 90ecb27f9ecbb8a0f8d9b24cf67a809b52b0b32d /src/regex-emacs.c | |
parent | d904cc83f3036db96107a3976cee1a0112547de6 (diff) | |
download | emacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.tar.gz |
Simplify regex-emacs code by assuming Emacs
* src/regex-emacs.c: Omit no-longer-needed AIX code.
Don’t ignore GCC warnings.
Include regex-emacs.h immediately after config.h,
to test that it’s independent.
Omit the "#ifndef emacs" and "#ifdef REGEX_MALLOC" and
"#if WIDE_CHAR_SUPPORT" or "#ifdef _REGEX_RE_COMP",
code, as we are no longer interested in compiling outside
Emacs (with or without debugging or native wide char support)
or in avoiding alloca.
(REGEX_EMACS_DEBUG, regex_emacs_debug): Rename from DEBUG and debug,
to avoid collision with other DEBUGS. All uses changed.
In debugging output, change %ld and %zd to %zu when appropriate.
No need to include stddef.h, stdlib.h, sys/types.h, wchar.h,
wctype.h, locale/localeinfo.h, locale/elem-hash.h, langinfo.h,
libintl.h, unistd.h, stdbool.h, string.h, stdio.h, assert.h.
All uses of assert changed to eassert.
(RE_DUP_MAX, reg_syntax_t, RE_BACKSLASH_ESCAPE_IN_LISTS)
(RE_BK_PLUS_QM, RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS)
(RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS, RE_DOT_NEWLINE)
(RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE, RE_INTERVALS)
(RE_LIMITED_OPS, RE_NEWLINE_ALT, RE_NO_BK_BRACES)
(RE_NO_BK_PARENS, RE_NO_BK_REFS, RE_NO_BK_VBAR)
(RE_NO_EMPTY_RANGES, RE_UNMATCHED_RIGHT_PAREN_ORD)
(RE_NO_POSIX_BACKTRACKING, RE_NO_GNU_OPS, RE_FRUGAL)
(RE_SHY_GROUPS, RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS)
(REG_NOERROR, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE)
(REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN)
(REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT)
(REG_EEND, REG_ESIZE, REG_ERPAREN, REG_ERANGEX, REG_ESIZEBR)
(reg_errcode_t, REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED)
(RE_NREGS, RE_TRANSLATE, RE_TRANSLATE_P):
Move here from regex-emacs.h.
(RE_NREGS): Define unconditionally.
(boolean): Remove. All uses replaced by bool.
(WIDE_CHAR_SUPPORT, regfree, regexec, regcomp, regerror):
(re_set_syntax, re_syntax_options, WEAK_ALIAS, gettext, gettext_noop):
Remove. All uses removed.
(malloc, realloc, free): Do not redefine. Adjust all callers
to use xmalloc, xrealloc, xfree instead.
(re_error_msgid): Use C99 to avoid need to keep in same order
as reg_error_t.
(REGEX_USE_SAFE_ALLOCA): Simplify by using USE_SAFE_ALLOCA.
(REGEX_ALLOCATE, REGEX_REALLOCATE, REGEX_FREE, REGEX_ALLOCATE_STACK)
(REGEX_REALLOCATE_STACK, REGEX_FREE_STACK): Remove.
All callers changed to use the non-REGEX_MALLOC version.
(REGEX_TALLOC): Remove. All callers changed to use SAFE_ALLOCA.
(re_set_syntax): Remove; unused.
(MATCH_MAY_ALLOCATE): Remove; now always true. All uses simplified.
(INIT_FAILURE_ALLOC): Define unconditionally.
(re_compile_fastmap): Now static.
(re_compile_pattern): Avoid unnecessary cast.
* src/regex-emacs.h (EMACS_REGEX_H): Renamed from _REGEX_H to
avoid possible collision with glibc.
Don’t include sys/types.h. All uses of ssize_t changed to ptrdiff_t.
Don’t worry about C++ or VMS.
Assume emacs is defined and that _REGEX_RE_COMP and WIDE_CHAR_SUPPORT
are not.
Define struct re_registers before including lisp.h.
(REG_ENOSYS, RE_TRANSLATE_TYPE): Remove; all uses replaced by
Lisp_Object.
(regoff_t): Remove. All uses replaced with ptrdiff_t.
(re_match, regcomp, regexec, regerror, regfree):
Remove decl of nonexistent functions.
(RE_DEBUG, RE_SYNTAX_AWK, RE_SYNTAX_GNU_AWK)
(RE_SYNTAX_POSIX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP)
(RE_SYNTAX_POSIX_EGREP, RE_SYNTAX_ED, RE_SYNTAX_SED)
(_RE_SYNTAX_POSIX_COMMON, RE_SYNTAX_POSIX_BASIC)
(RE_SYNTAX_POSIX_MINIMAL_BASIC, RE_SYNTAX_POSIX_EXTENDED)
(RE_SYNTAX_POSIX_MINIMAL_EXTENDED, REG_EXTENDED, REG_ICASE)
(REG_NEWLINE, REG_NOSUB, REG_NOTBOL, REG_NOTEOL, regmatch_t):
Remove; unused.
* src/search.c (Fset_match_data): Simplify range test now that
we know it’s ptrdiff_t.
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r-- | src/regex-emacs.c | 2013 |
1 files changed, 442 insertions, 1571 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 08fc8c67f1c..eb5970ffcf1 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -21,159 +21,187 @@ - structure the opcode space into opcode+flag. - merge with glibc's regex.[ch]. - replace (succeed_n + jump_n + set_number_at) with something that doesn't - need to modify the compiled regexp so that re_match can be reentrant. + need to modify the compiled regexp so that re_search can be reentrant. - get rid of on_failure_jump_smart by doing the optimization in re_comp - rather than at run-time, so that re_match can be reentrant. + rather than at run-time, so that re_search can be reentrant. */ -/* AIX requires this to be the first thing in the file. */ -#if defined _AIX && !defined REGEX_MALLOC - #pragma alloca -#endif - -/* Ignore some GCC warnings for now. This section should go away - once the Emacs and Gnulib regex code is merged. */ -#if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) || defined __clang__ -# pragma GCC diagnostic ignored "-Wstrict-overflow" -# ifndef emacs -# pragma GCC diagnostic ignored "-Wunused-function" -# pragma GCC diagnostic ignored "-Wunused-macros" -# pragma GCC diagnostic ignored "-Wunused-result" -# pragma GCC diagnostic ignored "-Wunused-variable" -# endif -#endif - -#if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) && ! defined __clang__ -# pragma GCC diagnostic ignored "-Wunused-but-set-variable" -#endif - #include <config.h> -#include <stddef.h> -#include <stdlib.h> - -#ifdef emacs -/* We need this for `regex-emacs.h', and perhaps for the Emacs include - files. */ -# include <sys/types.h> -#endif - -/* Whether to use ISO C Amendment 1 wide char functions. - Those should not be used for Emacs since it uses its own. */ -#if defined _LIBC -#define WIDE_CHAR_SUPPORT 1 -#else -#define WIDE_CHAR_SUPPORT \ - (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs) -#endif +/* Get the interface, including the syntax bits. */ +#include "regex-emacs.h" -/* For platform which support the ISO C amendment 1 functionality we - support user defined character classes. */ -#if WIDE_CHAR_SUPPORT -/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ -# include <wchar.h> -# include <wctype.h> -#endif +#include <stdlib.h> -#ifdef _LIBC -/* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(err_code, preg, errbuf, errbuf_size) \ - __regerror (err_code, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ - __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ - __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ - __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ - __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ - __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ - __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) - -/* Make sure we call libc's function even if the user overrides them. */ -# define btowc __btowc -# define iswctype __iswctype -# define wctype __wctype - -# define WEAK_ALIAS(a,b) weak_alias (a, b) - -/* We are also using some library internals. */ -# include <locale/localeinfo.h> -# include <locale/elem-hash.h> -# include <langinfo.h> -#else -# define WEAK_ALIAS(a,b) -#endif +#include "character.h" +#include "buffer.h" -/* This is for other GNU distributions with internationalized messages. */ -#if HAVE_LIBINTL_H || defined _LIBC -# include <libintl.h> -#else -# define gettext(msgid) (msgid) -#endif +#include "syntax.h" +#include "category.h" -#ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String +/* Maximum number of duplicates an interval can allow. Some systems + define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +# undef RE_DUP_MAX #endif - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -# include "lisp.h" -# include "character.h" -# include "buffer.h" - -# include "syntax.h" -# include "category.h" +/* Repeat counts are stored in opcodes as 2 byte integers. This was + previously limited to 7fff because the parsing code uses signed + ints. But Emacs only runs on 32 bit platforms anyway. */ +#define RE_DUP_MAX (0xffff) + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings where historically chosen so + that Emacs syntax had the value 0. + The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \<digit> matches <digit>. + If not set, then \<digit> is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, then *?, +? and ?? match non greedily. */ +#define RE_FRUGAL (RE_NO_GNU_OPS << 1) + +/* If this bit is set, then (?:...) is treated as a shy group. */ +#define RE_SHY_GROUPS (RE_FRUGAL << 1) + +/* If this bit is set, ^ and $ only match at beg/end of buffer. */ +#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +/* extern reg_syntax_t re_syntax_options; */ +/* Define combinations of the above bits for the standard possibilities. */ +#define RE_SYNTAX_EMACS \ + (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL) /* Make syntax table lookup grant data in gl_state. */ -# define SYNTAX(c) syntax_property (c, 1) - -# ifdef malloc -# undef malloc -# endif -# define malloc xmalloc -# ifdef realloc -# undef realloc -# endif -# define realloc xrealloc -# ifdef free -# undef free -# endif -# define free xfree +#define SYNTAX(c) syntax_property (c, 1) /* Converts the pointer to the char to BEG-based offset from the start. */ -# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) +#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) /* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean result to get the right base index. */ -# define POS_AS_IN_BUFFER(p) \ +#define POS_AS_IN_BUFFER(p) \ ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) -# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) -# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) -# define RE_STRING_CHAR(p, multibyte) \ +#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) +#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) +#define RE_STRING_CHAR(p, multibyte) \ (multibyte ? (STRING_CHAR (p)) : (*(p))) -# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ +#define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \ (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p))) -# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) +#define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c) -# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) +#define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) /* Set C a (possibly converted to multibyte) character before P. P points into a string which is the virtual concatenation of STR1 (which ends at END1) or STR2 (which ends at END2). */ -# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ +#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ do { \ if (target_multibyte) \ { \ @@ -191,7 +219,7 @@ /* Set C a (possibly converted to multibyte) character at P, and set LEN to the byte length of that character. */ -# define GET_CHAR_AFTER(c, p, len) \ +#define GET_CHAR_AFTER(c, p, len) \ do { \ if (target_multibyte) \ (c) = STRING_CHAR_AND_LENGTH (p, len); \ @@ -202,235 +230,66 @@ (c) = RE_CHAR_TO_MULTIBYTE (c); \ } \ } while (0) - -#else /* not emacs */ - -/* If we are not linking with Emacs proper, - we can't use the relocating allocator - even if config.h says that we can. */ -# undef REL_ALLOC - -# include <unistd.h> - -/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */ - -static ATTRIBUTE_MALLOC void * -xmalloc (size_t size) -{ - void *val = malloc (size); - if (!val && size) - { - write (STDERR_FILENO, "virtual memory exhausted\n", 25); - exit (1); - } - return val; -} - -static void * -xrealloc (void *block, size_t size) -{ - void *val; - /* We must call malloc explicitly when BLOCK is 0, since some - reallocs don't do this. */ - if (! block) - val = malloc (size); - else - val = realloc (block, size); - if (!val && size) - { - write (STDERR_FILENO, "virtual memory exhausted\n", 25); - exit (1); - } - return val; -} - -# ifdef malloc -# undef malloc -# endif -# define malloc xmalloc -# ifdef realloc -# undef realloc -# endif -# define realloc xrealloc - -# include <stdbool.h> -# include <string.h> - -/* Define the syntax stuff for \<, \>, etc. */ - -/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */ -enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; - -/* Dummy macros for non-Emacs environments. */ -# define MAX_MULTIBYTE_LENGTH 1 -# define RE_MULTIBYTE_P(x) 0 -# define RE_TARGET_MULTIBYTE_P(x) 0 -# define WORD_BOUNDARY_P(c1, c2) (0) -# define BYTES_BY_CHAR_HEAD(p) (1) -# define PREV_CHAR_BOUNDARY(p, limit) ((p)--) -# define STRING_CHAR(p) (*(p)) -# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p) -# define CHAR_STRING(c, s) (*(s) = (c), 1) -# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p)) -# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len) -# define RE_CHAR_TO_MULTIBYTE(c) (c) -# define RE_CHAR_TO_UNIBYTE(c) (c) -# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ - (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) -# define GET_CHAR_AFTER(c, p, len) \ - (c = *p, len = 1) -# define CHAR_BYTE8_P(c) (0) -# define CHAR_LEADING_CODE(c) (c) - -#endif /* not emacs */ - -#ifndef RE_TRANSLATE -# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C]) -# define RE_TRANSLATE_P(TBL) (TBL) -#endif -/* Get the interface, including the syntax bits. */ -#include "regex-emacs.h" - /* isalpha etc. are used for the character classes. */ #include <ctype.h> -#ifdef emacs - /* 1 if C is an ASCII character. */ -# define IS_REAL_ASCII(c) ((c) < 0200) +#define IS_REAL_ASCII(c) ((c) < 0200) /* 1 if C is a unibyte character. */ -# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) +#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c))) /* The Emacs definitions should not be directly affected by locales. */ /* In Emacs, these are only used for single-byte characters. */ -# define ISDIGIT(c) ((c) >= '0' && (c) <= '9') -# define ISCNTRL(c) ((c) < ' ') -# define ISXDIGIT(c) (0 <= char_hexdigit (c)) +#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define ISCNTRL(c) ((c) < ' ') +#define ISXDIGIT(c) (0 <= char_hexdigit (c)) /* The rest must handle multibyte characters. */ -# define ISBLANK(c) (IS_REAL_ASCII (c) \ +#define ISBLANK(c) (IS_REAL_ASCII (c) \ ? ((c) == ' ' || (c) == '\t') \ : blankp (c)) -# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ +#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ : graphicp (c)) -# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ +#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ : printablep (c)) -# define ISALNUM(c) (IS_REAL_ASCII (c) \ +#define ISALNUM(c) (IS_REAL_ASCII (c) \ ? (((c) >= 'a' && (c) <= 'z') \ || ((c) >= 'A' && (c) <= 'Z') \ || ((c) >= '0' && (c) <= '9')) \ : alphanumericp (c)) -# define ISALPHA(c) (IS_REAL_ASCII (c) \ +#define ISALPHA(c) (IS_REAL_ASCII (c) \ ? (((c) >= 'a' && (c) <= 'z') \ || ((c) >= 'A' && (c) <= 'Z')) \ : alphabeticp (c)) -# define ISLOWER(c) lowercasep (c) +#define ISLOWER(c) lowercasep (c) -# define ISPUNCT(c) (IS_REAL_ASCII (c) \ +#define ISPUNCT(c) (IS_REAL_ASCII (c) \ ? ((c) > ' ' && (c) < 0177 \ && !(((c) >= 'a' && (c) <= 'z') \ || ((c) >= 'A' && (c) <= 'Z') \ || ((c) >= '0' && (c) <= '9'))) \ : SYNTAX (c) != Sword) -# define ISSPACE(c) (SYNTAX (c) == Swhitespace) +#define ISSPACE(c) (SYNTAX (c) == Swhitespace) -# define ISUPPER(c) uppercasep (c) - -# define ISWORD(c) (SYNTAX (c) == Sword) - -#else /* not emacs */ - -/* 1 if C is an ASCII character. */ -# define IS_REAL_ASCII(c) ((c) < 0200) - -/* This distinction is not meaningful, except in Emacs. */ -# define ISUNIBYTE(c) 1 - -# ifdef isblank -# define ISBLANK(c) isblank (c) -# else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -# endif -# ifdef isgraph -# define ISGRAPH(c) isgraph (c) -# else -# define ISGRAPH(c) (isprint (c) && !isspace (c)) -# endif - -/* Solaris defines ISPRINT so we must undefine it first. */ -# undef ISPRINT -# define ISPRINT(c) isprint (c) -# define ISDIGIT(c) isdigit (c) -# define ISALNUM(c) isalnum (c) -# define ISALPHA(c) isalpha (c) -# define ISCNTRL(c) iscntrl (c) -# define ISLOWER(c) islower (c) -# define ISPUNCT(c) ispunct (c) -# define ISSPACE(c) isspace (c) -# define ISUPPER(c) isupper (c) -# define ISXDIGIT(c) isxdigit (c) - -# define ISWORD(c) ISALPHA (c) - -# ifdef _tolower -# define TOLOWER(c) _tolower (c) -# else -# define TOLOWER(c) tolower (c) -# endif - -/* How many characters in the character set. */ -# define CHAR_SET_SIZE 256 - -# ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -# else /* not SYNTAX_TABLE */ - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once (void) -{ - register int c; - static int done = 0; - - if (done) - return; - - memset (re_syntax_table, 0, sizeof re_syntax_table); - - for (c = 0; c < CHAR_SET_SIZE; ++c) - if (ISALNUM (c)) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Ssymbol; - - done = 1; -} +#define ISUPPER(c) uppercasep (c) -# endif /* not SYNTAX_TABLE */ - -# define SYNTAX(c) re_syntax_table[(c)] - -#endif /* not emacs */ +#define ISWORD(c) (SYNTAX (c) == Sword) #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in +/* Use alloca instead of malloc. This is because using malloc in re_search* or re_match* could cause memory leaks when C-g is used in Emacs (note that SAFE_ALLOCA could also call malloc, but does so via `record_xmalloc' which uses `unwind_protect' to ensure the @@ -442,64 +301,17 @@ init_syntax_once (void) not functions -- `alloca'-allocated space disappears at the end of the function it is called in. */ -#ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free - -#else /* not REGEX_MALLOC */ - -# ifdef emacs /* This may be adjusted in main(), if the stack is successfully grown. */ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */ -# define REGEX_USE_SAFE_ALLOCA \ - ptrdiff_t sa_avail = emacs_re_safe_alloca; \ - ptrdiff_t sa_count = SPECPDL_INDEX () - -# define REGEX_SAFE_FREE() SAFE_FREE () -# define REGEX_ALLOCATE SAFE_ALLOCA -# else -# include <alloca.h> -# define REGEX_ALLOCATE alloca -# endif +#define REGEX_USE_SAFE_ALLOCA \ + USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca /* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = REGEX_ALLOCATE (nsize), \ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = SAFE_ALLOCA (nsize), \ memcpy (destination, source, osize)) -/* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ - -#endif /* not REGEX_MALLOC */ - -#ifndef REGEX_USE_SAFE_ALLOCA -# define REGEX_USE_SAFE_ALLOCA ((void) 0) -# define REGEX_SAFE_FREE() ((void) 0) -#endif - -/* Define how to allocate the failure stack. */ - -#if defined REL_ALLOC && defined REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK(size) \ - r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ - r_alloc_free (&failure_stack_ptr) - -#else /* not using relocating allocator */ - -# define REGEX_ALLOCATE_STACK(size) REGEX_ALLOCATE (size) -# define REGEX_REALLOCATE_STACK(source, o, n) REGEX_REALLOCATE (source, o, n) -# define REGEX_FREE_STACK(ptr) REGEX_FREE (ptr) - -#endif /* not using relocating allocator */ - - /* True if `size1' is non-NULL and PTR is pointing anywhere inside `string1' or just past its end. This works if PTR is NULL, which is a good thing. */ @@ -507,30 +319,21 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA; (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) /* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) +#define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t))) #define BYTEWIDTH 8 /* In bits. */ -#ifndef emacs -# undef max -# undef min -# define max(a, b) ((a) > (b) ? (a) : (b)) -# define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - /* Type of source-pattern and string chars. */ typedef const unsigned char re_char; -typedef char boolean; - -static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp, +static void re_compile_fastmap (struct re_pattern_buffer *); +static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, size_t size1, re_char *string2, size_t size2, - ssize_t pos, + ptrdiff_t pos, struct re_registers *regs, - ssize_t stop); + ptrdiff_t stop); /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A @@ -592,8 +395,7 @@ typedef enum /* Fail unless at end of line. */ endline, - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ + /* Succeeds if at beginning of buffer. */ begbuf, /* Analogously, for end of buffer/string. */ @@ -658,10 +460,9 @@ typedef enum syntaxspec, /* Matches any character whose syntax is not that specified. */ - notsyntaxspec + notsyntaxspec, -#ifdef emacs - , at_dot, /* Succeeds if at point. */ + at_dot, /* Succeeds if at point. */ /* Matches any character whose category-set contains the specified category. The operator is followed by a byte which contains a @@ -672,7 +473,6 @@ typedef enum specified category. The operator is followed by a byte which contains the category code (mnemonic ASCII character). */ notcategoryspec -#endif /* emacs */ } re_opcode_t; /* Common operations on the compiled pattern. */ @@ -760,12 +560,10 @@ extract_number_and_incr (re_char **source) and the 2 bytes of flags at the start of the range table. */ #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)]) -#ifdef emacs /* Extract the bit flags that start a range table. */ #define CHARSET_RANGE_TABLE_BITS(p) \ ((p)[2 + CHARSET_BITMAP_SIZE (p)] \ + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100) -#endif /* Return the address of end of RANGE_TABLE. COUNT is number of ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' @@ -774,29 +572,23 @@ extract_number_and_incr (re_char **source) #define CHARSET_RANGE_TABLE_END(range_table, count) \ ((range_table) + (count) * 2 * 3) -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ +/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages + (if the variable regex_emacs_debug is positive). */ -#ifdef DEBUG +#ifdef REGEX_EMACS_DEBUG /* We use standard I/O for debugging. */ # include <stdio.h> -/* It is useful to test things that ``must'' be true when debugging. */ -# include <assert.h> - -static int debug = -100000; +static int regex_emacs_debug = -100000; # define DEBUG_STATEMENT(e) e -# define DEBUG_PRINT(...) if (debug > 0) printf (__VA_ARGS__) +# define DEBUG_PRINT(...) if (regex_emacs_debug > 0) printf (__VA_ARGS__) # define DEBUG_COMPILES_ARGUMENTS # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug > 0) print_partial_compiled_pattern (s, e) + if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e) # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug > 0) print_double_string (w, s1, sz1, s2, sz2) + if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2) /* Print the fastmap in human-readable form. */ @@ -1085,7 +877,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp) re_char *buffer = bufp->buffer; print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%ld bytes used/%ld bytes allocated.\n", + printf ("%zu bytes used/%zu bytes allocated.\n", bufp->used, bufp->allocated); if (bufp->fastmap_accurate && bufp->fastmap) @@ -1131,146 +923,100 @@ print_double_string (re_char *where, re_char *string1, ssize_t size1, } } -#else /* not DEBUG */ - -# undef assert -# define assert(e) +#else /* not REGEX_EMACS_DEBUG */ # define DEBUG_STATEMENT(e) # define DEBUG_PRINT(...) # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) -#endif /* not DEBUG */ +#endif /* not REGEX_EMACS_DEBUG */ -#ifndef emacs - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex-emacs.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (reg_syntax_t syntax) +typedef enum { - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} -WEAK_ALIAS (__re_set_syntax, re_set_syntax) - -#endif - -/* This table gives an error message for each of the error codes listed - in regex-emacs.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) An older version of this code supported the POSIX + API; this version continues to use these names internally. */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */ + REG_ERANGEX, /* Range striding over charsets. */ + REG_ESIZEBR /* n or m too big in \{n,m\} */ +} reg_errcode_t; static const char *re_error_msgid[] = { - gettext_noop ("Success"), /* REG_NOERROR */ - gettext_noop ("No match"), /* REG_NOMATCH */ - gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ - gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ - gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ - gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ - gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ - gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ - gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ - gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ - gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ - gettext_noop ("Invalid range end"), /* REG_ERANGE */ - gettext_noop ("Memory exhausted"), /* REG_ESPACE */ - gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ - gettext_noop ("Premature end of regular expression"), /* REG_EEND */ - gettext_noop ("Regular expression too big"), /* REG_ESIZE */ - gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ - gettext_noop ("Range striding over charsets"), /* REG_ERANGEX */ - gettext_noop ("Invalid content of \\{\\}, repetitions too big") /* REG_ESIZEBR */ + [REG_NOERROR] = "Success", + [REG_NOMATCH] = "No match", + [REG_BADPAT] = "Invalid regular expression", + [REG_ECOLLATE] = "Invalid collation character", + [REG_ECTYPE] = "Invalid character class name", + [REG_EESCAPE] = "Trailing backslash", + [REG_ESUBREG] = "Invalid back reference", + [REG_EBRACK] = "Unmatched [ or [^", + [REG_EPAREN] = "Unmatched ( or \\(", + [REG_EBRACE] = "Unmatched \\{", + [REG_BADBR] = "Invalid content of \\{\\}", + [REG_ERANGE] = "Invalid range end", + [REG_ESPACE] = "Memory exhausted", + [REG_BADRPT] = "Invalid preceding regular expression", + [REG_EEND] = "Premature end of regular expression", + [REG_ESIZE] = "Regular expression too big", + [REG_ERPAREN] = "Unmatched ) or \\)", + [REG_ERANGEX ] = "Range striding over charsets", + [REG_ESIZEBR ] = "Invalid content of \\{\\}", }; - -/* Whether to allocate memory during matching. */ - -/* Define MATCH_MAY_ALLOCATE to allow the searching and matching - functions allocate memory for the failure stack and registers. - Normally should be defined, because otherwise searching and - matching routines will have much smaller memory resources at their - disposal, and therefore might fail to handle complex regexps. - Therefore undefine MATCH_MAY_ALLOCATE only in the following - exceptional situations: - - . When running on a system where memory is at premium. - . When alloca cannot be used at all, perhaps due to bugs in - its implementation, or its being unavailable, or due to a - very small stack size. This requires to define REGEX_MALLOC - to use malloc instead, which in turn could lead to memory - leaks if search is interrupted by a signal. (For these - reasons, defining REGEX_MALLOC when building Emacs - automatically undefines MATCH_MAY_ALLOCATE, but outside - Emacs you may not care about memory leaks.) If you want to - prevent the memory leaks, undefine MATCH_MAY_ALLOCATE. - . When code that calls the searching and matching functions - cannot allow memory allocation, for whatever reasons. */ - -/* Normally, this is fine. */ -#define MATCH_MAY_ALLOCATE - -/* The match routines may not allocate if (1) they would do it with malloc - and (2) it's not safe for them to use malloc. - Note that if REL_ALLOC is defined, matching would not use malloc for the - failure stack, but we would still use it for the register vectors; - so REL_ALLOC should not affect this. */ -#if defined REGEX_MALLOC && defined emacs -# undef MATCH_MAY_ALLOCATE -#endif -/* While regex matching of a single compiled pattern isn't reentrant - (because we compile regexes to bytecode programs, and the bytecode - programs are self-modifying), the regex machinery must nevertheless - be reentrant with respect to _different_ patterns, and we do that - by avoiding global variables and using MATCH_MAY_ALLOCATE. */ -#if !defined MATCH_MAY_ALLOCATE && defined emacs -# error "Emacs requires MATCH_MAY_ALLOCATE" -#endif +/* For 'regs_allocated'. */ +enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED }; +/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + 're_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +enum { RE_NREGS = 30 }; +/* The searching and matching functions allocate memory for the + failure stack and registers. Otherwise searching and matching + routines would have much smaller memory resources at their + disposal, and therefore might fail to handle complex regexps. */ + /* Failure stack declarations and macros; both re_compile_fastmap and re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE_STACK. */ + SAFE_ALLOCA. */ /* Approximate number of failure points for which to initially allocate space when matching. If this number is exceeded, we allocate more space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -# define INIT_FAILURE_ALLOC 20 -#endif +#define INIT_FAILURE_ALLOC 20 /* Roughly the maximum number of failure points on the stack. Would be exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE before using it, so it should probably be a byte-count instead. */ -# if defined MATCH_MAY_ALLOCATE /* Note that 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. In order for a larger value to work reliably, you have to try to make it accord with the process stack limit. */ size_t emacs_re_max_failures = 40000; -# else -size_t emacs_re_max_failures = 4000; -# endif union fail_stack_elt { @@ -1292,33 +1038,17 @@ typedef struct #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) -/* Define macros to initialize and free the failure stack. - Do `return -2' if the alloc fails. */ +/* Define macros to initialize and free the failure stack. */ -#ifdef MATCH_MAY_ALLOCATE -# define INIT_FAIL_STACK() \ +#define INIT_FAIL_STACK() \ do { \ fail_stack.stack = \ - REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ - * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ + SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ + * sizeof (fail_stack_elt_t)); \ fail_stack.size = INIT_FAILURE_ALLOC; \ fail_stack.avail = 0; \ fail_stack.frame = 0; \ } while (0) -#else -# define INIT_FAIL_STACK() \ - do { \ - fail_stack.avail = 0; \ - fail_stack.frame = 0; \ - } while (0) - -# define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#endif /* Double the size of FAIL_STACK, up to a limit @@ -1327,7 +1057,7 @@ typedef struct Return 1 if succeeds, and 0 if either ran out of memory allocating space for it or it was already too large. - REGEX_REALLOCATE_STACK requires `destination' be declared. */ + REGEX_REALLOCATE requires `destination' be declared. */ /* Factor to increase the failure stack size by when we increase it. @@ -1340,18 +1070,15 @@ typedef struct (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ ? 0 \ : ((fail_stack).stack \ - = REGEX_REALLOCATE_STACK ((fail_stack).stack, \ + = REGEX_REALLOCATE ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size \ - = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ - ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \ - 1))) + ((fail_stack).size \ + = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ + ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \ + 1)) /* Push a pointer value onto the failure stack. @@ -1385,8 +1112,8 @@ typedef struct while (REMAINING_AVAIL_SLOTS <= space) { \ if (!GROW_FAIL_STACK (fail_stack)) \ return -2; \ - DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ - DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ + DEBUG_PRINT ("\n Doubled stack; size now: %zu\n", (fail_stack).size);\ + DEBUG_PRINT (" slots available: %zu\n", REMAINING_AVAIL_SLOTS);\ } /* Push register NUM onto the stack. */ @@ -1424,7 +1151,7 @@ do { \ if (pfreg == -1) \ { \ /* It's a counter. */ \ - /* Here, we discard `const', making re_match non-reentrant. */ \ + /* Discard 'const', making re_search non-reentrant. */ \ unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \ pfreg = POP_FAILURE_INT (); \ STORE_NUMBER (ptr, pfreg); \ @@ -1442,14 +1169,14 @@ do { \ /* Check that we are not stuck in an infinite loop. */ #define CHECK_INFINITE_LOOP(pat_cur, string_place) \ do { \ - ssize_t failure = TOP_FAILURE_HANDLE (); \ + ptrdiff_t failure = TOP_FAILURE_HANDLE (); \ /* Check for infinite matching loops */ \ while (failure > 0 \ && (FAILURE_STR (failure) == string_place \ || FAILURE_STR (failure) == NULL)) \ { \ - assert (FAILURE_PAT (failure) >= bufp->buffer \ - && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ + eassert (FAILURE_PAT (failure) >= bufp->buffer \ + && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ if (FAILURE_PAT (failure) == pat_cur) \ { \ cycle = 1; \ @@ -1478,14 +1205,14 @@ do { \ \ DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \ - DEBUG_PRINT (" Before push, next avail: %zd\n", (fail_stack).avail); \ - DEBUG_PRINT (" size: %zd\n", (fail_stack).size);\ + DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \ + DEBUG_PRINT (" size: %zu\n", (fail_stack).size);\ \ ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ \ DEBUG_PRINT ("\n"); \ \ - DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \ + DEBUG_PRINT (" Push frame index: %zu\n", fail_stack.frame); \ PUSH_FAILURE_INT (fail_stack.frame); \ \ DEBUG_PRINT (" Push string %p: \"", string_place); \ @@ -1523,12 +1250,12 @@ do { \ #define POP_FAILURE_POINT(str, pat) \ do { \ - assert (!FAIL_STACK_EMPTY ()); \ + eassert (!FAIL_STACK_EMPTY ()); \ \ /* Remove failure points and point to how many regs pushed. */ \ DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT (" Before pop, next avail: %zd\n", fail_stack.avail); \ - DEBUG_PRINT (" size: %zd\n", fail_stack.size); \ + DEBUG_PRINT (" Before pop, next avail: %zu\n", fail_stack.avail); \ + DEBUG_PRINT (" size: %zu\n", fail_stack.size); \ \ /* Pop the saved registers. */ \ while (fail_stack.frame < fail_stack.avail) \ @@ -1547,10 +1274,10 @@ do { \ DEBUG_PRINT ("\"\n"); \ \ fail_stack.frame = POP_FAILURE_INT (); \ - DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \ + DEBUG_PRINT (" Popping frame index: %zu\n", fail_stack.frame); \ \ - assert (fail_stack.avail >= 0); \ - assert (fail_stack.frame <= fail_stack.avail); \ + eassert (fail_stack.avail >= 0); \ + eassert (fail_stack.frame <= fail_stack.avail); \ \ DEBUG_STATEMENT (nfailure_points_popped++); \ } while (0) /* POP_FAILURE_POINT */ @@ -1563,12 +1290,8 @@ do { \ /* Subroutine declarations and macros for regex_compile. */ static reg_errcode_t regex_compile (re_char *pattern, size_t size, -#ifdef emacs bool posix_backtracking, const char *whitespace_regexp, -#else - reg_syntax_t syntax, -#endif struct re_pattern_buffer *bufp); static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); @@ -1576,10 +1299,10 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end); static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end); -static boolean at_begline_loc_p (re_char *pattern, re_char *p, - reg_syntax_t syntax); -static boolean at_endline_loc_p (re_char *p, re_char *pend, - reg_syntax_t syntax); +static bool at_begline_loc_p (re_char *pattern, re_char *p, + reg_syntax_t syntax); +static bool at_endline_loc_p (re_char *p, re_char *pend, + reg_syntax_t syntax); static re_char *skip_one_char (re_char *p); static int analyze_first (re_char *p, re_char *pend, char *fastmap, const int multibyte); @@ -1595,14 +1318,15 @@ static int analyze_first (re_char *p, re_char *pend, } while (0) -/* If `translate' is non-null, return translate[D], else just D. We +#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) +#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) + +/* If `translate' is non-zero, return translate[D], else just D. We cast the subscript to translate because some data is declared as `char *', to avoid warnings when a string constant is passed. But when we use a character as a subscript we must make it unsigned. */ -#ifndef TRANSLATE -# define TRANSLATE(d) \ +#define TRANSLATE(d) \ (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) -#endif /* Macros for outputting the compiled pattern into `buffer'. */ @@ -1677,8 +1401,6 @@ static int analyze_first (re_char *p, re_char *pend, if (laststart_set) laststart_off = laststart - old_buffer; \ if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \ RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ unsigned char *new_buffer = bufp->buffer; \ b = new_buffer + b_off; \ begalt = new_buffer + begalt_off; \ @@ -1729,12 +1451,6 @@ typedef struct /* The next available element. */ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - -/* Explicit quit checking is needed for Emacs, which uses polling to - process input events. */ -#ifndef emacs -static void maybe_quit (void) {} -#endif /* Structure to manage work area for range table. */ struct range_table_work_area @@ -1745,8 +1461,6 @@ struct range_table_work_area int bits; /* flag to record character classes */ }; -#ifdef emacs - /* Make sure that WORK_AREA can hold more N multibyte characters. This is used only in set_image_of_range and set_image_of_range_1. It expects WORK_AREA to be a pointer. @@ -1773,13 +1487,11 @@ struct range_table_work_area (work_area).table[(work_area).used++] = (range_end); \ } while (0) -#endif /* emacs */ - /* Free allocated memory for WORK_AREA. */ #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ do { \ if ((work_area).table) \ - free ((work_area).table); \ + xfree ((work_area).table); \ } while (0) #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0) @@ -1807,8 +1519,6 @@ struct range_table_work_area #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) -#ifdef emacs - /* Store characters in the range FROM to TO in the bitmap at B (for ASCII and unibyte characters) and WORK_AREA (for multibyte characters) while translating them and paying attention to the @@ -1912,8 +1622,6 @@ struct range_table_work_area } \ } while (0) -#endif /* emacs */ - /* Get the next unsigned number in the uncompiled pattern. */ #define GET_INTERVAL_COUNT(num) \ do { \ @@ -1936,8 +1644,6 @@ struct range_table_work_area } \ } while (0) -#if ! WIDE_CHAR_SUPPORT - /* Parse a character class, i.e. string such as "[:name:]". *strp points to the string to be parsed and limit is length, in bytes, of that string. @@ -2031,7 +1737,7 @@ re_wctype_parse (const unsigned char **strp, unsigned limit) } /* True if CH is in the char class CC. */ -boolean +bool re_iswctype (int ch, re_wctype_t cc) { switch (cc) @@ -2084,7 +1790,6 @@ re_wctype_to_bit (re_wctype_t cc) abort (); } } -#endif /* Filling in the work area of a range. */ @@ -2094,288 +1799,16 @@ static void extend_range_table_work_area (struct range_table_work_area *work_area) { work_area->allocated += 16 * sizeof (int); - work_area->table = realloc (work_area->table, work_area->allocated); + work_area->table = xrealloc (work_area->table, work_area->allocated); } - -#if 0 -#ifdef emacs - -/* Carefully find the ranges of codes that are equivalent - under case conversion to the range start..end when passed through - TRANSLATE. Handle the case where non-letters can come in between - two upper-case letters (which happens in Latin-1). - Also handle the case of groups of more than 2 case-equivalent chars. - - The basic method is to look at consecutive characters and see - if they can form a run that can be handled as one. - - Returns -1 if successful, REG_ESPACE if ran out of space. */ - -static int -set_image_of_range_1 (struct range_table_work_area *work_area, - re_wchar_t start, re_wchar_t end, - RE_TRANSLATE_TYPE translate) -{ - /* `one_case' indicates a character, or a run of characters, - each of which is an isolate (no case-equivalents). - This includes all ASCII non-letters. - - `two_case' indicates a character, or a run of characters, - each of which has two case-equivalent forms. - This includes all ASCII letters. - - `strange' indicates a character that has more than one - case-equivalent. */ - - enum case_type {one_case, two_case, strange}; - - /* Describe the run that is in progress, - which the next character can try to extend. - If run_type is strange, that means there really is no run. - If run_type is one_case, then run_start...run_end is the run. - If run_type is two_case, then the run is run_start...run_end, - and the case-equivalents end at run_eqv_end. */ - - enum case_type run_type = strange; - int run_start, run_end, run_eqv_end; - - Lisp_Object eqv_table; - - if (!RE_TRANSLATE_P (translate)) - { - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = (start); - work_area->table[work_area->used++] = (end); - return -1; - } - - eqv_table = XCHAR_TABLE (translate)->extras[2]; - - for (; start <= end; start++) - { - enum case_type this_type; - int eqv = RE_TRANSLATE (eqv_table, start); - int minchar, maxchar; - - /* Classify this character */ - if (eqv == start) - this_type = one_case; - else if (RE_TRANSLATE (eqv_table, eqv) == start) - this_type = two_case; - else - this_type = strange; - - if (start < eqv) - minchar = start, maxchar = eqv; - else - minchar = eqv, maxchar = start; - - /* Can this character extend the run in progress? */ - if (this_type == strange || this_type != run_type - || !(minchar == run_end + 1 - && (run_type == two_case - ? maxchar == run_eqv_end + 1 : 1))) - { - /* No, end the run. - Record each of its equivalent ranges. */ - if (run_type == one_case) - { - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = run_start; - work_area->table[work_area->used++] = run_end; - } - else if (run_type == two_case) - { - EXTEND_RANGE_TABLE (work_area, 4); - work_area->table[work_area->used++] = run_start; - work_area->table[work_area->used++] = run_end; - work_area->table[work_area->used++] - = RE_TRANSLATE (eqv_table, run_start); - work_area->table[work_area->used++] - = RE_TRANSLATE (eqv_table, run_end); - } - run_type = strange; - } - - if (this_type == strange) - { - /* For a strange character, add each of its equivalents, one - by one. Don't start a range. */ - do - { - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = eqv; - work_area->table[work_area->used++] = eqv; - eqv = RE_TRANSLATE (eqv_table, eqv); - } - while (eqv != start); - } - - /* Add this char to the run, or start a new run. */ - else if (run_type == strange) - { - /* Initialize a new range. */ - run_type = this_type; - run_start = start; - run_end = start; - run_eqv_end = RE_TRANSLATE (eqv_table, run_end); - } - else - { - /* Extend a running range. */ - run_end = minchar; - run_eqv_end = RE_TRANSLATE (eqv_table, run_end); - } - } - - /* If a run is still in progress at the end, finish it now - by recording its equivalent ranges. */ - if (run_type == one_case) - { - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = run_start; - work_area->table[work_area->used++] = run_end; - } - else if (run_type == two_case) - { - EXTEND_RANGE_TABLE (work_area, 4); - work_area->table[work_area->used++] = run_start; - work_area->table[work_area->used++] = run_end; - work_area->table[work_area->used++] - = RE_TRANSLATE (eqv_table, run_start); - work_area->table[work_area->used++] - = RE_TRANSLATE (eqv_table, run_end); - } - - return -1; -} - -#endif /* emacs */ - -/* Record the image of the range start..end when passed through - TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) - and is not even necessarily contiguous. - Normally we approximate it with the smallest contiguous range that contains - all the chars we need. However, for Latin-1 we go to extra effort - to do a better job. - - This function is not called for ASCII ranges. - - Returns -1 if successful, REG_ESPACE if ran out of space. */ - -static int -set_image_of_range (struct range_table_work_area *work_area, - re_wchar_t start, re_wchar_t end, - RE_TRANSLATE_TYPE translate) -{ - re_wchar_t cmin, cmax; - -#ifdef emacs - /* For Latin-1 ranges, use set_image_of_range_1 - to get proper handling of ranges that include letters and nonletters. - For a range that includes the whole of Latin-1, this is not necessary. - For other character sets, we don't bother to get this right. */ - if (RE_TRANSLATE_P (translate) && start < 04400 - && !(start < 04200 && end >= 04377)) - { - int newend; - int tem; - newend = end; - if (newend > 04377) - newend = 04377; - tem = set_image_of_range_1 (work_area, start, newend, translate); - if (tem > 0) - return tem; - - start = 04400; - if (end < 04400) - return -1; - } -#endif - - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = (start); - work_area->table[work_area->used++] = (end); - - cmin = -1, cmax = -1; - - if (RE_TRANSLATE_P (translate)) - { - int ch; - - for (ch = start; ch <= end; ch++) - { - re_wchar_t c = TRANSLATE (ch); - if (! (start <= c && c <= end)) - { - if (cmin == -1) - cmin = c, cmax = c; - else - { - cmin = min (cmin, c); - cmax = max (cmax, c); - } - } - } - - if (cmin != -1) - { - EXTEND_RANGE_TABLE (work_area, 2); - work_area->table[work_area->used++] = (cmin); - work_area->table[work_area->used++] = (cmax); - } - } - - return -1; -} -#endif /* 0 */ - -#ifndef MATCH_MAY_ALLOCATE - -/* If we cannot allocate large objects within re_match_2_internal, - we make the fail stack and register vectors global. - The fail stack, we grow to the maximum size when a regexp - is compiled. - The register vectors, we adjust in size each time we - compile a regexp, according to the number of registers it needs. */ - -static fail_stack_type fail_stack; - -/* Size with which the following vectors are currently allocated. - That is so we can make them bigger as needed, - but never make them smaller. */ -static int regs_allocated_size; - -static re_char ** regstart, ** regend; -static re_char **best_regstart, **best_regend; - -/* Make the register vectors big enough for NUM_REGS registers, - but don't make them smaller. */ - -static -regex_grow_registers (int num_regs) -{ - if (num_regs > regs_allocated_size) - { - RETALLOC_IF (regstart, num_regs, re_char *); - RETALLOC_IF (regend, num_regs, re_char *); - RETALLOC_IF (best_regstart, num_regs, re_char *); - RETALLOC_IF (best_regend, num_regs, re_char *); - - regs_allocated_size = num_regs; - } -} - -#endif /* not MATCH_MAY_ALLOCATE */ -static boolean group_in_compile_stack (compile_stack_type compile_stack, - regnum_t regnum); +static bool group_in_compile_stack (compile_stack_type, regnum_t); /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. Returns one of error codes defined in `regex-emacs.h', or zero for success. - If WHITESPACE_REGEXP is given (only #ifdef emacs), it is used instead of - a space character in PATTERN. + If WHITESPACE_REGEXP is given, it is used instead of a space + character in PATTERN. Assumes the `allocated' (and perhaps `buffer') and `translate' fields are set in BUFP on entry. @@ -2404,42 +1837,33 @@ do { \ #define FREE_STACK_RETURN(value) \ do { \ FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ - free (compile_stack.stack); \ + xfree (compile_stack.stack); \ return value; \ } while (0) static reg_errcode_t regex_compile (re_char *pattern, size_t size, -#ifdef emacs -# define syntax RE_SYNTAX_EMACS bool posix_backtracking, const char *whitespace_regexp, -#else - reg_syntax_t syntax, -# define posix_backtracking (!(syntax & RE_NO_POSIX_BACKTRACKING)) -#endif struct re_pattern_buffer *bufp) { + reg_syntax_t syntax = RE_SYNTAX_EMACS; + /* We fetch characters from PATTERN here. */ - register re_wchar_t c, c1; + int c, c1; /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; + unsigned char *b; /* Keeps track of unclosed groups. */ compile_stack_type compile_stack; /* Points to the current (ending) position in the pattern. */ -#ifdef AIX - /* `const' makes AIX compiler fail. */ - unsigned char *p = pattern; -#else re_char *p = pattern; -#endif re_char *pend = pattern + size; /* How to translate the characters in the pattern. */ - RE_TRANSLATE_TYPE translate = bufp->translate; + Lisp_Object translate = bufp->translate; /* Address of the count-byte of the most recently inserted `exactn' command. This makes it possible to tell if a new exact-match @@ -2468,9 +1892,8 @@ regex_compile (re_char *pattern, size_t size, struct range_table_work_area range_table_work; /* If the object matched can contain multibyte characters. */ - const boolean multibyte = RE_MULTIBYTE_P (bufp); + bool multibyte = RE_MULTIBYTE_P (bufp); -#ifdef emacs /* Nonzero if we have pushed down into a subpattern. */ int in_subpattern = 0; @@ -2479,26 +1902,22 @@ regex_compile (re_char *pattern, size_t size, re_char *main_p; re_char *main_pattern; re_char *main_pend; -#endif -#ifdef DEBUG - debug++; +#ifdef REGEX_EMACS_DEBUG + regex_emacs_debug++; DEBUG_PRINT ("\nCompiling pattern: "); - if (debug > 0) + if (regex_emacs_debug > 0) { - unsigned debug_count; + size_t debug_count; for (debug_count = 0; debug_count < size; debug_count++) putchar (pattern[debug_count]); putchar ('\n'); } -#endif /* DEBUG */ +#endif /* Initialize the compile stack. */ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - compile_stack.size = INIT_COMPILE_STACK_SIZE; compile_stack.avail = 0; @@ -2506,9 +1925,6 @@ regex_compile (re_char *pattern, size_t size, range_table_work.allocated = 0; /* Initialize the pattern buffer. */ -#ifndef emacs - bufp->syntax = syntax; -#endif bufp->fastmap_accurate = 0; bufp->not_bol = bufp->not_eol = 0; bufp->used_syntax = 0; @@ -2521,11 +1937,6 @@ regex_compile (re_char *pattern, size_t size, /* Always count groups, whether or not bufp->no_sub is set. */ bufp->re_nsub = 0; -#if !defined emacs && !defined SYNTAX_TABLE - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - if (bufp->allocated == 0) { if (bufp->buffer) @@ -2538,8 +1949,6 @@ regex_compile (re_char *pattern, size_t size, { /* Caller did not allocate a buffer. Do it for them. */ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); } - if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); - bufp->allocated = INIT_BUF_SIZE; } @@ -2550,7 +1959,6 @@ regex_compile (re_char *pattern, size_t size, { if (p == pend) { -#ifdef emacs /* If this is the end of an included regexp, pop back to the main regexp and try again. */ if (in_subpattern) @@ -2561,7 +1969,6 @@ regex_compile (re_char *pattern, size_t size, pend = main_pend; continue; } -#endif /* If this is the end of the main regexp, we are done. */ break; } @@ -2570,7 +1977,6 @@ regex_compile (re_char *pattern, size_t size, switch (c) { -#ifdef emacs case ' ': { re_char *p1 = p; @@ -2603,7 +2009,6 @@ regex_compile (re_char *pattern, size_t size, pend = p + strlen (whitespace_regexp); break; } -#endif case '^': { @@ -2654,8 +2059,8 @@ regex_compile (re_char *pattern, size_t size, { /* 1 means zero (many) matches is allowed. */ - boolean zero_times_ok = 0, many_times_ok = 0; - boolean greedy = 1; + bool zero_times_ok = false, many_times_ok = false; + bool greedy = true; /* If there is a sequence of repetition chars, collapse it down to just one (the right one). We can't combine @@ -2666,7 +2071,7 @@ regex_compile (re_char *pattern, size_t size, { if ((syntax & RE_FRUGAL) && c == '?' && (zero_times_ok || many_times_ok)) - greedy = 0; + greedy = false; else { zero_times_ok |= c != '+'; @@ -2705,13 +2110,13 @@ regex_compile (re_char *pattern, size_t size, { if (many_times_ok) { - boolean simple = skip_one_char (laststart) == b; + bool simple = skip_one_char (laststart) == b; size_t startoffset = 0; re_opcode_t ofj = /* Check if the loop can match the empty string. */ (simple || !analyze_first (laststart, b, NULL, 0)) ? on_failure_jump : on_failure_jump_loop; - assert (skip_one_char (laststart) <= b); + eassert (skip_one_char (laststart) <= b); if (!zero_times_ok && simple) { /* Since simple * loops can be made faster by using @@ -2744,7 +2149,7 @@ regex_compile (re_char *pattern, size_t size, else { /* A simple ? pattern. */ - assert (zero_times_ok); + eassert (zero_times_ok); GET_BUFFER_SPACE (3); INSERT_JUMP (on_failure_jump, laststart, b + 3); b += 3; @@ -2756,7 +2161,7 @@ regex_compile (re_char *pattern, size_t size, GET_BUFFER_SPACE (7); /* We might use less. */ if (many_times_ok) { - boolean emptyp = analyze_first (laststart, b, NULL, 0); + bool emptyp = analyze_first (laststart, b, NULL, 0); /* The non-greedy multiple match looks like a repeat..until: we only need a conditional jump @@ -2831,10 +2236,9 @@ regex_compile (re_char *pattern, size_t size, /* Read in characters and ranges, setting map bits. */ for (;;) { - boolean escaped_char = false; const unsigned char *p2 = p; re_wctype_t cc; - re_wchar_t ch; + int ch; if (p == pend) FREE_STACK_RETURN (REG_EBRACK); @@ -2849,15 +2253,6 @@ regex_compile (re_char *pattern, size_t size, if (p == pend) FREE_STACK_RETURN (REG_EBRACK); -#ifndef emacs - for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) - if (re_iswctype (btowc (ch), cc)) - { - c = TRANSLATE (ch); - if (c < (1 << BYTEWIDTH)) - SET_LIST_BIT (c); - } -#else /* emacs */ /* Most character classes in a multibyte match just set a flag. Exceptions are is_blank, is_digit, is_cntrl, and is_xdigit, since they can only match ASCII characters. @@ -2884,7 +2279,7 @@ regex_compile (re_char *pattern, size_t size, } SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work, re_wctype_to_bit (cc)); -#endif /* emacs */ + /* In most cases the matching rule for char classes only uses the syntax table for multibyte chars, so that the content of the syntax-table is not hardcoded in the @@ -2908,7 +2303,6 @@ regex_compile (re_char *pattern, size_t size, if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c); - escaped_char = true; } else { @@ -2927,13 +2321,12 @@ regex_compile (re_char *pattern, size_t size, /* Fetch the character which ends the range. */ PATFETCH (c1); -#ifdef emacs + if (CHAR_BYTE8_P (c1) && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c)) /* Treat the range from a multibyte character to raw-byte character as empty. */ c = c1 + 1; -#endif /* emacs */ } else /* Range from C to C. */ @@ -2947,15 +2340,6 @@ regex_compile (re_char *pattern, size_t size, } else { -#ifndef emacs - /* Set the range into bitmap */ - for (; c <= c1; c++) - { - ch = TRANSLATE (c); - if (ch < (1 << BYTEWIDTH)) - SET_LIST_BIT (ch); - } -#else /* emacs */ if (c < 128) { ch = min (127, c1); @@ -2982,7 +2366,6 @@ regex_compile (re_char *pattern, size_t size, SETUP_UNIBYTE_RANGE (range_table_work, c, c1); } } -#endif /* emacs */ } } @@ -3007,8 +2390,7 @@ regex_compile (re_char *pattern, size_t size, /* Indicate the existence of range table. */ laststart[1] |= 0x80; - /* Store the character class flag bits into the range table. - If not in emacs, these flag bits are always 0. */ + /* Store the character class flag bits into the range table. */ *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff; *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8; @@ -3127,8 +2509,6 @@ regex_compile (re_char *pattern, size_t size, { RETALLOC (compile_stack.stack, compile_stack.size << 1, compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - compile_stack.size <<= 1; } @@ -3184,7 +2564,7 @@ regex_compile (re_char *pattern, size_t size, /* Since we just checked for an empty stack above, this ``can't happen''. */ - assert (compile_stack.avail != 0); + eassert (compile_stack.avail != 0); { /* We don't just want to restore into `regnum', because later groups should continue to be numbered higher, @@ -3410,7 +2790,7 @@ regex_compile (re_char *pattern, size_t size, unfetch_interval: /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); + eassert (beg_interval); p = beg_interval; beg_interval = NULL; @@ -3419,13 +2799,12 @@ regex_compile (re_char *pattern, size_t size, if (!(syntax & RE_NO_BK_BRACES)) { - assert (p > pattern && p[-1] == '\\'); + eassert (p > pattern && p[-1] == '\\'); goto normal_backslash; } else goto normal_char; -#ifdef emacs case '=': laststart = b; BUF_PUSH (at_dot); @@ -3454,8 +2833,6 @@ regex_compile (re_char *pattern, size_t size, PATFETCH (c); BUF_PUSH_2 (notcategoryspec, c); break; -#endif /* emacs */ - case 'w': if (syntax & RE_NO_GNU_OPS) @@ -3607,7 +2984,7 @@ regex_compile (re_char *pattern, size_t size, c1 = RE_CHAR_TO_MULTIBYTE (c); if (! CHAR_BYTE8_P (c1)) { - re_wchar_t c2 = TRANSLATE (c1); + int c2 = TRANSLATE (c1); if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0) c = c1; @@ -3638,41 +3015,18 @@ regex_compile (re_char *pattern, size_t size, /* We have succeeded; set the length of the buffer. */ bufp->used = b - bufp->buffer; -#ifdef DEBUG - if (debug > 0) +#ifdef REGEX_EMACS_DEBUG + if (regex_emacs_debug > 0) { re_compile_fastmap (bufp); DEBUG_PRINT ("\nCompiled pattern: \n"); print_compiled_pattern (bufp); } - debug--; -#endif /* DEBUG */ - -#ifndef MATCH_MAY_ALLOCATE - /* Initialize the failure stack to the largest possible stack. This - isn't necessary unless we're trying to avoid calling alloca in - the search and match routines. */ - { - int num_regs = bufp->re_nsub + 1; - - if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE) - { - fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE; - falk_stack.stack = realloc (fail_stack.stack, - fail_stack.size * sizeof *falk_stack.stack); - } - - regex_grow_registers (num_regs); - } -#endif /* not MATCH_MAY_ALLOCATE */ + regex_emacs_debug--; +#endif FREE_STACK_RETURN (REG_NOERROR); -#ifdef emacs -# undef syntax -#else -# undef posix_backtracking -#endif } /* regex_compile */ /* Subroutines for `regex_compile'. */ @@ -3733,11 +3087,11 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha after an alternative or a begin-subexpression. We assume there is at least one character before the ^. */ -static boolean +static bool at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) { re_char *prev = p - 2; - boolean odd_backslashes; + bool odd_backslashes; /* After a subexpression? */ if (*prev == '(') @@ -3774,11 +3128,11 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) /* The dual of at_begline_loc_p. This one is for $. We assume there is at least one character after the $, i.e., `P < PEND'. */ -static boolean +static bool at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) { re_char *next = p; - boolean next_backslash = *next == '\\'; + bool next_backslash = *next == '\\'; re_char *next_next = p + 1 < pend ? p + 1 : 0; return @@ -3794,10 +3148,10 @@ at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) /* Returns true if REGNUM is in one of COMPILE_STACK's elements and false if it's not. */ -static boolean +static bool group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) { - ssize_t this_element; + ptrdiff_t this_element; for (this_element = compile_stack.avail - 1; this_element >= 0; @@ -3823,13 +3177,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, const int multibyte) { int j, k; - boolean not; + bool not; /* If all elements for base leading-codes in fastmap is set, this flag is set true. */ - boolean match_any_multibyte_characters = false; + bool match_any_multibyte_characters = false; - assert (p); + eassert (p); /* The loop below works as follows: - It has a working-list kept in the PATTERN_STACK and which basically @@ -3920,7 +3274,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) fastmap[j] = 1; -#ifdef emacs if (/* Any leading code can possibly start a character which doesn't match the specified set of characters. */ not @@ -3966,20 +3319,11 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, fastmap[j] = 1; } } -#endif break; case syntaxspec: case notsyntaxspec: if (!fastmap) break; -#ifndef emacs - not = (re_opcode_t)p[-1] == notsyntaxspec; - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if ((SYNTAX (j) == (enum syntaxcode) k) ^ not) - fastmap[j] = 1; - break; -#else /* emacs */ /* This match depends on text properties. These end with aborting optimizations. */ return -1; @@ -4008,7 +3352,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, `continue'. */ case at_dot: -#endif /* !emacs */ case no_op: case begline: case endline: @@ -4066,7 +3409,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, case jump_n: /* This code simply does not properly handle forward jump_n. */ - DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0)); + DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0)); p += 4; /* jump_n can either jump or fall through. The (backward) jump case has already been handled, so we only need to look at the @@ -4075,7 +3418,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, case succeed_n: /* If N == 0, it should be an on_failure_jump_loop instead. */ - DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0)); + DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0)); p += 4; /* We only care about one iteration of the loop, so we don't need to consider the case where this behaves like an @@ -4126,13 +3469,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap, Returns 0 if we succeed, -2 if an internal error. */ -int +static void re_compile_fastmap (struct re_pattern_buffer *bufp) { char *fastmap = bufp->fastmap; int analysis; - assert (fastmap && bufp->buffer); + eassert (fastmap && bufp->buffer); memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ bufp->fastmap_accurate = 1; /* It will be when we're done. */ @@ -4140,14 +3483,13 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used, fastmap, RE_MULTIBYTE_P (bufp)); bufp->can_be_null = (analysis != 0); - return 0; } /* re_compile_fastmap */ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use this memory for recording register information. STARTS and ENDS must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. + be at least NUM_REGS * sizeof (ptrdiff_t) bytes long. If NUM_REGS == 0, then subsequent matches should allocate their own register data. @@ -4157,7 +3499,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp) freeing the old data. */ void -re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends) +re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, + unsigned int num_regs, ptrdiff_t *starts, ptrdiff_t *ends) { if (num_regs) { @@ -4173,21 +3516,19 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, uns regs->start = regs->end = 0; } } -WEAK_ALIAS (__re_set_registers, re_set_registers) /* Searching routines. */ /* Like re_search_2, below, but only one string is specified, and doesn't let you say where to stop matching. */ -regoff_t +ptrdiff_t re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, - ssize_t startpos, ssize_t range, struct re_registers *regs) + ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs) { return re_search_2 (bufp, NULL, 0, string, size, startpos, range, regs, size); } -WEAK_ALIAS (__re_search, re_search) /* Head address of virtual concatenation of string. */ #define HEAD_ADDR_VSTRING(P) \ @@ -4218,21 +3559,21 @@ WEAK_ALIAS (__re_search, re_search) found, -1 if no match, or -2 if error (such as failure stack overflow). */ -regoff_t +ptrdiff_t re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, - const char *str2, size_t size2, ssize_t startpos, ssize_t range, - struct re_registers *regs, ssize_t stop) + const char *str2, size_t size2, ptrdiff_t startpos, ptrdiff_t range, + struct re_registers *regs, ptrdiff_t stop) { - regoff_t val; + ptrdiff_t val; re_char *string1 = (re_char *) str1; re_char *string2 = (re_char *) str2; - register char *fastmap = bufp->fastmap; - register RE_TRANSLATE_TYPE translate = bufp->translate; + char *fastmap = bufp->fastmap; + Lisp_Object translate = bufp->translate; size_t total_size = size1 + size2; - ssize_t endpos = startpos + range; - boolean anchored_start; + ptrdiff_t endpos = startpos + range; + bool anchored_start; /* Nonzero if we are searching multibyte string. */ - const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp); + bool multibyte = RE_TARGET_MULTIBYTE_P (bufp); /* Check for out-of-range STARTPOS. */ if (startpos < 0 || startpos > total_size) @@ -4256,7 +3597,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, range = 0; } -#ifdef emacs /* In a forward search for something that starts with \=. don't keep searching past point. */ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) @@ -4265,7 +3605,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, if (range < 0) return -1; } -#endif /* emacs */ /* Update the fastmap now if not correct already. */ if (fastmap && !bufp->fastmap_accurate) @@ -4274,14 +3613,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, /* See whether the pattern is anchored. */ anchored_start = (bufp->buffer[0] == begline); -#ifdef emacs gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ { - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); } -#endif /* Loop through the string, looking for a place to start matching. */ for (;;) @@ -4304,14 +3641,14 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, the first null string. */ if (fastmap && startpos < total_size && !bufp->can_be_null) { - register re_char *d; - register re_wchar_t buf_ch; + re_char *d; + int buf_ch; d = POS_ADDR_VSTRING (startpos); if (range > 0) /* Searching forwards. */ { - ssize_t irange = range, lim = 0; + ptrdiff_t irange = range, lim = 0; if (startpos < size1 && startpos + range >= size1) lim = range - (size1 - startpos); @@ -4336,11 +3673,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, else while (range > lim) { - register re_wchar_t ch, translated; - buf_ch = *d; - ch = RE_CHAR_TO_MULTIBYTE (buf_ch); - translated = RE_TRANSLATE (translate, ch); + int ch = RE_CHAR_TO_MULTIBYTE (buf_ch); + int translated = RE_TRANSLATE (translate, ch); if (translated != ch && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) buf_ch = ch; @@ -4383,11 +3718,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, } else { - register re_wchar_t ch, translated; - buf_ch = *d; - ch = RE_CHAR_TO_MULTIBYTE (buf_ch); - translated = TRANSLATE (ch); + int ch = RE_CHAR_TO_MULTIBYTE (buf_ch); + int translated = TRANSLATE (ch); if (translated != ch && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0) buf_ch = ch; @@ -4457,13 +3790,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, } return -1; } /* re_search_2 */ -WEAK_ALIAS (__re_search_2, re_search_2) /* Declarations and macros for re_match_2. */ static int bcmp_translate (re_char *s1, re_char *s2, - register ssize_t len, - RE_TRANSLATE_TYPE translate, + ptrdiff_t len, + Lisp_Object translate, const int multibyte); /* This converts PTR, a pointer into one of the search strings `string1' @@ -4531,29 +3863,6 @@ static int bcmp_translate (re_char *s1, re_char *s2, || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) #endif -/* Free everything we malloc. */ -#ifdef MATCH_MAY_ALLOCATE -# define FREE_VAR(var) \ - do { \ - if (var) \ - { \ - REGEX_FREE (var); \ - var = NULL; \ - } \ - } while (0) -# define FREE_VARIABLES() \ - do { \ - REGEX_FREE_STACK (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - REGEX_SAFE_FREE (); \ - } while (0) -#else -# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ -#endif /* not MATCH_MAY_ALLOCATE */ - /* Optimization routines. */ @@ -4586,10 +3895,8 @@ skip_one_char (re_char *p) case syntaxspec: case notsyntaxspec: -#ifdef emacs case categoryspec: case notcategoryspec: -#endif /* emacs */ p++; break; @@ -4623,7 +3930,7 @@ skip_noops (re_char *p, re_char *pend) return p; } } - assert (p == pend); + eassert (p == pend); return p; } @@ -4656,11 +3963,10 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) return !not; } -#ifdef emacs else if (rtp) { int class_bits = CHARSET_RANGE_TABLE_BITS (p); - re_wchar_t range_start, range_end; + int range_start, range_end; /* Sort tests by the most commonly used classes with some adjustment to which tests are easiest to perform. Take a look at comment in re_wctype_parse @@ -4691,7 +3997,7 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte) return !not; } } -#endif /* emacs */ + return not; } @@ -4701,11 +4007,11 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, re_char *p2) { re_opcode_t op2; - const boolean multibyte = RE_MULTIBYTE_P (bufp); + bool multibyte = RE_MULTIBYTE_P (bufp); unsigned char *pend = bufp->buffer + bufp->used; - assert (p1 >= bufp->buffer && p1 < pend - && p2 >= bufp->buffer && p2 <= pend); + eassert (p1 >= bufp->buffer && p1 < pend + && p2 >= bufp->buffer && p2 <= pend); /* Skip over open/close-group commands. If what follows this loop is a ...+ construct, @@ -4716,8 +4022,8 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, is only used in the case where p1 is a simple match operator. */ /* p1 = skip_noops (p1, pend); */ - assert (p1 >= bufp->buffer && p1 < pend - && p2 >= bufp->buffer && p2 <= pend); + eassert (p1 >= bufp->buffer && p1 < pend + && p2 >= bufp->buffer && p2 <= pend); op2 = p2 == pend ? succeed : *p2; @@ -4736,7 +4042,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, case endline: case exactn: { - register re_wchar_t c + int c = (re_opcode_t) *p2 == endline ? '\n' : RE_STRING_CHAR (p2 + 2, multibyte); @@ -4866,12 +4172,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, || (re_opcode_t) *p1 == syntaxspec) && p1[1] == Sword); -#ifdef emacs case categoryspec: return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); case notcategoryspec: return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]); -#endif /* emacs */ default: ; @@ -4884,20 +4188,6 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, /* Matching routines. */ -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -regoff_t -re_match (struct re_pattern_buffer *bufp, const char *string, - size_t size, ssize_t pos, struct re_registers *regs) -{ - regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, - size, pos, regs, size); - return result; -} -WEAK_ALIAS (__re_match, re_match) -#endif /* not emacs */ - /* re_match_2 matches the compiled pattern in BUFP against the the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and SIZE2, respectively). We start matching at POS, and stop @@ -4911,34 +4201,31 @@ WEAK_ALIAS (__re_match, re_match) failure stack overflowing). Otherwise, we return the length of the matched substring. */ -regoff_t +ptrdiff_t re_match_2 (struct re_pattern_buffer *bufp, const char *string1, - size_t size1, const char *string2, size_t size2, ssize_t pos, - struct re_registers *regs, ssize_t stop) + size_t size1, const char *string2, size_t size2, ptrdiff_t pos, + struct re_registers *regs, ptrdiff_t stop) { - regoff_t result; + ptrdiff_t result; -#ifdef emacs - ssize_t charpos; + ptrdiff_t charpos; gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); -#endif result = re_match_2_internal (bufp, (re_char *) string1, size1, (re_char *) string2, size2, pos, regs, stop); return result; } -WEAK_ALIAS (__re_match_2, re_match_2) /* This is a separate function so that we can force an alloca cleanup afterwards. */ -static regoff_t +static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, size_t size1, re_char *string2, size_t size2, - ssize_t pos, struct re_registers *regs, ssize_t stop) + ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop) { /* General temporaries. */ int mcnt; @@ -4965,13 +4252,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, re_char *pend = p + bufp->used; /* We use this to map every character in the string. */ - RE_TRANSLATE_TYPE translate = bufp->translate; + Lisp_Object translate = bufp->translate; - /* Nonzero if BUFP is setup from a multibyte regex. */ - const boolean multibyte = RE_MULTIBYTE_P (bufp); + /* True if BUFP is setup from a multibyte regex. */ + bool multibyte = RE_MULTIBYTE_P (bufp); - /* Nonzero if STRING1/STRING2 are multibyte. */ - const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); + /* True if STRING1/STRING2 are multibyte. */ + bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); /* Failure point stack. Each place that can handle a failure further down the line pushes a failure point on this stack. It consists of @@ -4980,19 +4267,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, registers, and, finally, two char *'s. The first char * is where to resume scanning the pattern; the second one is where to resume scanning the strings. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ fail_stack_type fail_stack; -#endif #ifdef DEBUG_COMPILES_ARGUMENTS unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; #endif -#if defined REL_ALLOC && defined REGEX_MALLOC - /* This holds the pointer to the failure stack, when - it is allocated relocatably. */ - fail_stack_elt_t *failure_stack_ptr; -#endif - /* We fill all the registers internally, independent of what we return, for use in backreferences. The number here includes an element for register zero. */ @@ -5005,18 +4284,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, matching and the regnum-th regend points to right after where we stopped matching the regnum-th subexpression. (The zeroth register keeps track of what the whole pattern matches.) */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - re_char **regstart, **regend; -#endif + re_char **regstart UNINIT, **regend UNINIT; /* The following record the register info as found in the above variables when we find a match better than any we've seen before. This happens as we backtrack through the failure points, which in turn happens only if we have not yet matched the entire string. */ unsigned best_regs_set = false; -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - re_char **best_regstart, **best_regend; -#endif + re_char **best_regstart UNINIT, **best_regend UNINIT; /* Logically, this is `best_regend[0]'. But we don't want to have to allocate space for that if we're not allocating space for anything @@ -5039,7 +4314,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, INIT_FAIL_STACK (); -#ifdef MATCH_MAY_ALLOCATE /* Do not bother to initialize all the register variables if there are no groups in the pattern, as it takes a fair amount of time. If there are groups, we include space for register 0 (the whole @@ -5047,29 +4321,16 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, array indexing. We should fix this. */ if (bufp->re_nsub) { - regstart = REGEX_TALLOC (num_regs, re_char *); - regend = REGEX_TALLOC (num_regs, re_char *); - best_regstart = REGEX_TALLOC (num_regs, re_char *); - best_regend = REGEX_TALLOC (num_regs, re_char *); - - if (!(regstart && regend && best_regstart && best_regend)) - { - FREE_VARIABLES (); - return -2; - } + regstart = SAFE_ALLOCA (num_regs * 4 * sizeof *regstart); + regend = regstart + num_regs; + best_regstart = regend + num_regs; + best_regend = best_regstart + num_regs; } - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = best_regstart = best_regend = NULL; - } -#endif /* MATCH_MAY_ALLOCATE */ /* The starting position is bogus. */ if (pos < 0 || pos > size1 + size2) { - FREE_VARIABLES (); + SAFE_FREE (); return -1; } @@ -5229,13 +4490,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, extra element beyond `num_regs' for the `-1' marker GNU code uses. */ regs->num_regs = max (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - { - FREE_VARIABLES (); - return -2; - } + regs->start = TALLOC (regs->num_regs, ptrdiff_t); + regs->end = TALLOC (regs->num_regs, ptrdiff_t); bufp->regs_allocated = REGS_REALLOCATE; } else if (bufp->regs_allocated == REGS_REALLOCATE) @@ -5245,21 +4501,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, if (regs->num_regs < num_regs + 1) { regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - { - FREE_VARIABLES (); - return -2; - } + RETALLOC (regs->start, regs->num_regs, ptrdiff_t); + RETALLOC (regs->end, regs->num_regs, ptrdiff_t); } } else - { - /* These braces fend off a "empty body in an else-statement" - warning under GCC when assert expands to nothing. */ - assert (bufp->regs_allocated == REGS_FIXED); - } + eassert (bufp->regs_allocated == REGS_FIXED); /* Convert the pointer data in `regstart' and `regend' to indices. Register zero has to be set differently, @@ -5301,7 +4548,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt); - FREE_VARIABLES (); + SAFE_FREE (); return dcnt; } @@ -5328,33 +4575,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, /* Remember the start point to rollback upon failure. */ dfail = d; -#ifndef emacs - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (RE_TRANSLATE_P (translate)) - do - { - PREFETCH (); - if (RE_TRANSLATE (translate, *d) != *p++) - { - d = dfail; - goto fail; - } - d++; - } - while (--mcnt); - else - do - { - PREFETCH (); - if (*d++ != *p++) - { - d = dfail; - goto fail; - } - } - while (--mcnt); -#else /* emacs */ /* The cost of testing `translate' is comparatively small. */ if (target_multibyte) do @@ -5419,7 +4639,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, d++; } while (--mcnt); -#endif + break; @@ -5427,7 +4647,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case anychar: { int buf_charlen; - re_wchar_t buf_ch; + int buf_ch; reg_syntax_t syntax; DEBUG_PRINT ("EXECUTING anychar.\n"); @@ -5437,11 +4657,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, target_multibyte); buf_ch = TRANSLATE (buf_ch); -#ifdef emacs syntax = RE_SYNTAX_EMACS; -#else - syntax = bufp->syntax; -#endif if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) @@ -5460,7 +4676,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, int len; /* Whether matching against a unibyte character. */ - boolean unibyte_char = false; + bool unibyte_char = false; DEBUG_PRINT ("EXECUTING charset%s.\n", (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); @@ -5530,10 +4746,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case stop_memory: DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p); - assert (!REG_UNSET (regstart[*p])); + eassert (!REG_UNSET (regstart[*p])); /* Strictly speaking, there should be code such as: - assert (REG_UNSET (regend[*p])); + eassert (REG_UNSET (regend[*p])); PUSH_FAILURE_REGSTOP ((unsigned int)*p); But the only info to be pushed is regend[*p] and it is known to @@ -5557,7 +4773,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, followed by the numeric value of <digit> as the register number. */ case duplicate: { - register re_char *d2, *dend2; + re_char *d2, *dend2; int regno = *p++; /* Get which register to match against. */ DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno); @@ -5719,7 +4935,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", mcnt, p + mcnt); - assert ((re_opcode_t)p[-4] == no_op); + eassert ((re_opcode_t)p[-4] == no_op); { int cycle = 0; CHECK_INFINITE_LOOP (p - 4, d); @@ -5788,7 +5004,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, mcnt, p + mcnt); { re_char *p1 = p; /* Next operation. */ - /* Here, we discard `const', making re_match non-reentrant. */ + /* Discard 'const', making re_search non-reentrant. */ unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */ unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */ @@ -5799,9 +5015,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, /* Ensure this is indeed the trivial kind of loop we are expecting. */ - assert (skip_one_char (p1) == p2 - 3); - assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); - DEBUG_STATEMENT (debug += 2); + eassert (skip_one_char (p1) == p2 - 3); + eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p); + DEBUG_STATEMENT (regex_emacs_debug += 2); if (mutually_exclusive_p (bufp, p1, p2)) { /* Use a fast `on_failure_keep_string_jump' loop. */ @@ -5815,7 +5031,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT (" smart default => slow loop.\n"); *p3 = (unsigned char) on_failure_jump; } - DEBUG_STATEMENT (debug -= 2); + DEBUG_STATEMENT (regex_emacs_debug -= 2); } break; @@ -5840,7 +5056,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, /* Originally, mcnt is how many times we HAVE to succeed. */ if (mcnt != 0) { - /* Here, we discard `const', making re_match non-reentrant. */ + /* Discard 'const', making re_search non-reentrant. */ unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ mcnt--; p += 4; @@ -5859,7 +5075,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, /* Originally, this is how many times we CAN jump. */ if (mcnt != 0) { - /* Here, we discard `const', making re_match non-reentrant. */ + /* Discard 'const', making re_search non-reentrant. */ unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */ mcnt--; PUSH_NUMBER (p2, mcnt); @@ -5876,7 +5092,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, DEBUG_PRINT ("EXECUTING set_number_at.\n"); EXTRACT_NUMBER_AND_INCR (mcnt, p); - /* Here, we discard `const', making re_match non-reentrant. */ + /* Discard 'const', making re_search non-reentrant. */ p2 = (unsigned char *) p + mcnt; /* Signedness doesn't matter since we only copy MCNT's bits. */ EXTRACT_NUMBER_AND_INCR (mcnt, p); @@ -5888,7 +5104,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case wordbound: case notwordbound: { - boolean not = (re_opcode_t) *(p - 1) == notwordbound; + bool not = (re_opcode_t) *(p - 1) == notwordbound; DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : ""); /* We SUCCEED (or FAIL) in one of the following cases: */ @@ -5900,19 +5116,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ - re_wchar_t c1, c2; + int c1, c2; int s1, s2; int dummy; -#ifdef emacs - ssize_t offset = PTR_TO_OFFSET (d - 1); - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d - 1); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); -#endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); -#ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); -#endif PREFETCH_NOLIMIT (); GET_CHAR_AFTER (c2, d, dummy); s2 = SYNTAX (c2); @@ -5942,14 +5154,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ - re_wchar_t c1, c2; + int c1, c2; int s1, s2; int dummy; -#ifdef emacs - ssize_t offset = PTR_TO_OFFSET (d); - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); -#endif PREFETCH (); GET_CHAR_AFTER (c2, d, dummy); s2 = SYNTAX (c2); @@ -5962,9 +5172,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, if (!AT_STRINGS_BEG (d)) { GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); -#ifdef emacs UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); -#endif s1 = SYNTAX (c1); /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) @@ -5987,14 +5195,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ - re_wchar_t c1, c2; + int c1, c2; int s1, s2; int dummy; -#ifdef emacs - ssize_t offset = PTR_TO_OFFSET (d) - 1; - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); -#endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); @@ -6007,9 +5213,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { PREFETCH_NOLIMIT (); GET_CHAR_AFTER (c2, d, dummy); -#ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos); -#endif s2 = SYNTAX (c2); /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) @@ -6032,13 +5236,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ - re_wchar_t c1, c2; + int c1, c2; int s1, s2; -#ifdef emacs - ssize_t offset = PTR_TO_OFFSET (d); - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); -#endif PREFETCH (); c2 = RE_STRING_CHAR (d, target_multibyte); s2 = SYNTAX (c2); @@ -6051,9 +5253,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, if (!AT_STRINGS_BEG (d)) { GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); -#ifdef emacs UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); -#endif s1 = SYNTAX (c1); /* ... and S1 is Sword or Ssymbol. */ @@ -6075,13 +5275,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ - re_wchar_t c1, c2; + int c1, c2; int s1, s2; -#ifdef emacs - ssize_t offset = PTR_TO_OFFSET (d) - 1; - ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); -#endif GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); @@ -6094,9 +5292,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { PREFETCH_NOLIMIT (); c2 = RE_STRING_CHAR (d, target_multibyte); -#ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); -#endif s2 = SYNTAX (c2); /* ... and S2 is Sword or Ssymbol. */ @@ -6109,21 +5305,19 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case syntaxspec: case notsyntaxspec: { - boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec; + bool not = (re_opcode_t) *(p - 1) == notsyntaxspec; mcnt = *p++; DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "", mcnt); PREFETCH (); -#ifdef emacs { - ssize_t offset = PTR_TO_OFFSET (d); - ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (pos1); } -#endif { int len; - re_wchar_t c; + int c; GET_CHAR_AFTER (c, d, len); if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not) @@ -6133,7 +5327,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, } break; -#ifdef emacs case at_dot: DEBUG_PRINT ("EXECUTING at_dot.\n"); if (PTR_BYTE_POS (d) != PT_BYTE) @@ -6143,7 +5336,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, case categoryspec: case notcategoryspec: { - boolean not = (re_opcode_t) *(p - 1) == notcategoryspec; + bool not = (re_opcode_t) *(p - 1) == notcategoryspec; mcnt = *p++; DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n", not ? "not" : "", mcnt); @@ -6151,7 +5344,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { int len; - re_wchar_t c; + int c; GET_CHAR_AFTER (c, d, len); if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) goto fail; @@ -6160,8 +5353,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, } break; -#endif /* emacs */ - default: abort (); } @@ -6180,11 +5371,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, switch (*pat++) { case on_failure_keep_string_jump: - assert (str == NULL); + eassert (str == NULL); goto continue_failure_jump; case on_failure_jump_nastyloop: - assert ((re_opcode_t)pat[-2] == no_op); + eassert ((re_opcode_t)pat[-2] == no_op); PUSH_FAILURE_POINT (pat - 2, str); FALLTHROUGH; case on_failure_jump_loop: @@ -6204,7 +5395,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, abort (); } - assert (p >= bufp->buffer && p <= pend); + eassert (p >= bufp->buffer && p <= pend); if (d >= string1 && d <= end1) dend = end_match_1; @@ -6216,7 +5407,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, if (best_regs_set) goto restore_best_regs; - FREE_VARIABLES (); + SAFE_FREE (); return -1; /* Failure to match. */ } @@ -6227,8 +5418,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, bytes; nonzero otherwise. */ static int -bcmp_translate (re_char *s1, re_char *s2, ssize_t len, - RE_TRANSLATE_TYPE translate, const int target_multibyte) +bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len, + Lisp_Object translate, int target_multibyte) { re_char *p1 = s1, *p2 = s2; re_char *p1_end = s1 + len; @@ -6239,7 +5430,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len, while (p1 < p1_end && p2 < p2_end) { int p1_charlen, p2_charlen; - re_wchar_t p1_ch, p2_ch; + int p1_ch, p2_ch; GET_CHAR_AFTER (p1_ch, p1, p1_charlen); GET_CHAR_AFTER (p2_ch, p2, p2_charlen); @@ -6270,9 +5461,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len, const char * re_compile_pattern (const char *pattern, size_t length, -#ifdef emacs bool posix_backtracking, const char *whitespace_regexp, -#endif struct re_pattern_buffer *bufp) { reg_errcode_t ret; @@ -6282,334 +5471,16 @@ re_compile_pattern (const char *pattern, size_t length, bufp->regs_allocated = REGS_UNALLOCATED; /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by + by passing null for the REGS argument to re_search, etc., not by setting no_sub. */ bufp->no_sub = 0; ret = regex_compile ((re_char *) pattern, length, -#ifdef emacs posix_backtracking, whitespace_regexp, -#else - re_syntax_options, -#endif bufp); if (!ret) return NULL; - return gettext (re_error_msgid[(int) ret]); -} -WEAK_ALIAS (__re_compile_pattern, re_compile_pattern) - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -# ifdef _LIBC -/* Make these definitions weak in libc, so POSIX programs can redefine - these names if they don't use our functions, and still use - regcomp/regexec below without link errors. */ -weak_function -# endif -re_comp (const char *s) -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext ("No previous regular expression"); - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = malloc (200); - if (re_comp_buf.buffer == NULL) - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - if (!ret) - return NULL; - - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (re_error_msgid[(int) ret]); -} - - -int -# ifdef _LIBC -weak_function -# endif -re_exec (const char *s) -{ - const size_t len = strlen (s); - return re_search (&re_comp_buf, s, len, 0, len, 0) >= 0; + return re_error_msgid[ret]; } -#endif /* _REGEX_RE_COMP */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex-emacs.h for - the return codes and their meanings.) */ - -reg_errcode_t -regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern, - int cflags) -{ - reg_errcode_t ret; - reg_syntax_t syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - preg->used = 0; - - /* Try to allocate space for the fastmap. */ - preg->fastmap = malloc (1 << BYTEWIDTH); - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - } - else - syntax |= RE_NO_NEWLINE_ANCHOR; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile ((re_char *) pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) - ret = REG_EPAREN; - - if (ret == REG_NOERROR && preg->fastmap) - { /* Compute the fastmap now, since regexec cannot modify the pattern - buffer. */ - re_compile_fastmap (preg); - if (preg->can_be_null) - { /* The fastmap can't be used anyway. */ - free (preg->fastmap); - preg->fastmap = NULL; - } - } - return ret; -} -WEAK_ALIAS (__regcomp, regcomp) - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -reg_errcode_t -regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string, - size_t nmatch, regmatch_t pmatch[_Restrict_arr_], int eflags) -{ - regoff_t ret; - struct re_registers regs; - regex_t private_preg; - size_t len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch * 2, regoff_t); - if (regs.start == NULL) - return REG_NOMATCH; - regs.end = regs.start + nmatch; - } - - /* Instead of using not_eol to implement REG_NOTEOL, we could simply - pass (&private_preg, string, len + 1, 0, len, ...) pretending the string - was a little bit longer but still only matching the real part. - This works because the `endline' will check for a '\n' and will find a - '\0', correctly deciding that this is not the end of a line. - But it doesn't work out so nicely for REG_NOTBOL, since we don't have - a convenient '\0' there. For all we know, the string could be preceded - by '\n' which would throw things off. */ - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? REG_NOERROR : REG_NOMATCH; -} -WEAK_ALIAS (__regexec, regexec) - - -/* Returns a message corresponding to an error code, ERR_CODE, returned - from either regcomp or regexec. We don't use PREG here. - - ERR_CODE was previously called ERRCODE, but that name causes an - error with msvc8 compiler. */ - -size_t -regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size) -{ - const char *msg; - size_t msg_size; - - if (err_code < 0 - || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = gettext (re_error_msgid[err_code]); - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - memcpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} -WEAK_ALIAS (__regerror, regerror) - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (regex_t *preg) -{ - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - free (preg->translate); - preg->translate = NULL; -} -WEAK_ALIAS (__regfree, regfree) - -#endif /* not emacs */ |