summaryrefslogtreecommitdiff
path: root/src/search.c
diff options
context:
space:
mode:
authorDaniel Colascione <dancol@dancol.org>2018-06-16 13:46:10 -0700
committerDaniel Colascione <dancol@dancol.org>2018-06-16 13:46:38 -0700
commit938d252d1c6c5e2027aa250c649deb024154f936 (patch)
treec0ee02c8cb40d0b7fc9bb0fa55ac8569491197c9 /src/search.c
parent1502b377d35d6db623301829549ebcab9a2777e6 (diff)
downloademacs-938d252d1c6c5e2027aa250c649deb024154f936.tar.gz
Make regex matching reentrant; update syntax during match
* src/lisp.h (compile_pattern): Remove prototype of now-internal function. * src/regex.c (POS_AS_IN_BUFFER): Consult gl_state instead of re_match_object: the latter can change in Lisp. (re_match_2_internal): Switch back to UPDATE_SYNTAX_* FROM UPDATE_SYNTAX_FAST*, allowing calls into Lisp. * src/regex.h (re_match_object): Uncomment declaration. * src/search.c (struct regexp_cache): Add `busy' field. (thaw_buffer_relocation): Delete; rely on unbind. (compile_pattern_1): Assert pattern isn't busy. (shrink_regexp_cache): Don't shrink busy patterns. (clear_regexp_cache): Don't nuke busy patterns. (unfreeze_pattern, freeze_pattern): New functions. (compile_pattern): Return a regexp_cache pointer instead of the re_pattern_buffer, allowing callers to use `freeze_pattern' if needed. Do not consider busy patterns as cache hit candidates; error if we run out of non-busy cache entries. (looking_at_1, fast_looking_at): Snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (string_match_1, fast_string_match_internal) (fast_c_string_match_ignore_case): Adjust for compile_pattern return type. (search_buffer_re): Regex code from old search_buffer moved here; snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (search_buffer_non_re): Non-regex code from old search_buffer moved here. (search_buffer): Split into search_buffer_re, search_buffer_non_re. (syms_of_search): Staticpro re_match_object, even though we really shouldn't have to. * src/syntax.h (UPDATE_SYNTAX_TABLE_FORWARD_FAST): (UPDATE_SYNTAX_TABLE_FAST): Remove. * src/thread.h (struct thread_state): Remove m_re_match_object, which is global again. (It never needs to be preserved across thread switch.)
Diffstat (limited to 'src/search.c')
-rw-r--r--src/search.c795
1 files changed, 417 insertions, 378 deletions
diff --git a/src/search.c b/src/search.c
index a21c01ca4b4..ccdb659776d 100644
--- a/src/search.c
+++ b/src/search.c
@@ -48,6 +48,8 @@ struct regexp_cache
char fastmap[0400];
/* True means regexp was compiled to do full POSIX backtracking. */
bool posix;
+ /* True means we're inside a buffer match. */
+ bool busy;
};
/* The instances of that struct. */
@@ -93,6 +95,8 @@ static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
ptrdiff_t, ptrdiff_t, EMACS_INT, int,
Lisp_Object, Lisp_Object, bool);
+Lisp_Object re_match_object;
+
static _Noreturn void
matcher_overflow (void)
{
@@ -110,14 +114,6 @@ freeze_buffer_relocation (void)
#endif
}
-static void
-thaw_buffer_relocation (void)
-{
-#ifdef REL_ALLOC
- unbind_to (SPECPDL_INDEX () - 1, Qnil);
-#endif
-}
-
/* Compile a regexp and signal a Lisp error if anything goes wrong.
PATTERN is the pattern to compile.
CP is the place to put the result.
@@ -134,6 +130,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
const char *whitespace_regexp;
char *val;
+ eassert (!cp->busy);
cp->regexp = Qnil;
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
cp->posix = posix;
@@ -170,10 +167,11 @@ shrink_regexp_cache (void)
struct regexp_cache *cp;
for (cp = searchbuf_head; cp != 0; cp = cp->next)
- {
- cp->buf.allocated = cp->buf.used;
- cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
- }
+ if (!cp->busy)
+ {
+ cp->buf.allocated = cp->buf.used;
+ cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
+ }
}
/* Clear the regexp cache w.r.t. a particular syntax table,
@@ -190,10 +188,25 @@ clear_regexp_cache (void)
/* It's tempting to compare with the syntax-table we've actually changed,
but it's not sufficient because char-table inheritance means that
modifying one syntax-table can change others at the same time. */
- if (!EQ (searchbufs[i].syntax_table, Qt))
+ if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
searchbufs[i].regexp = Qnil;
}
+static void
+unfreeze_pattern (void *arg)
+{
+ struct regexp_cache *searchbuf = arg;
+ searchbuf->busy = false;
+}
+
+static void
+freeze_pattern (struct regexp_cache *searchbuf)
+{
+ eassert (!searchbuf->busy);
+ record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
+ searchbuf->busy = true;
+}
+
/* Compile a regexp if necessary, but first check to see if there's one in
the cache.
PATTERN is the pattern to compile.
@@ -205,7 +218,7 @@ clear_regexp_cache (void)
POSIX is true if we want full backtracking (POSIX style) for this pattern.
False means backtrack only enough to get a valid match. */
-struct re_pattern_buffer *
+static struct regexp_cache *
compile_pattern (Lisp_Object pattern, struct re_registers *regp,
Lisp_Object translate, bool posix, bool multibyte)
{
@@ -222,6 +235,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
if (NILP (cp->regexp))
goto compile_it;
if (SCHARS (cp->regexp) == SCHARS (pattern)
+ && !cp->busy
&& STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
@@ -237,7 +251,10 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
string value. */
if (cp->next == 0)
{
+ if (cp->busy)
+ error ("Too much matching reentrancy");
compile_it:
+ eassert (!cp->busy);
compile_pattern_1 (cp, pattern, translate, posix);
break;
}
@@ -258,8 +275,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
/* The compiled pattern can be used both for multibyte and unibyte
target. But, we have to tell which the pattern is used for. */
cp->buf.target_multibyte = multibyte;
-
- return &cp->buf;
+ return cp;
}
@@ -270,7 +286,6 @@ looking_at_1 (Lisp_Object string, bool posix)
unsigned char *p1, *p2;
ptrdiff_t s1, s2;
register ptrdiff_t i;
- struct re_pattern_buffer *bufp;
if (running_asynch_code)
save_search_regs ();
@@ -280,13 +295,17 @@ looking_at_1 (Lisp_Object string, bool posix)
BVAR (current_buffer, case_eqv_table));
CHECK_STRING (string);
- bufp = compile_pattern (string,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
- (!NILP (BVAR (current_buffer, case_fold_search))
- ? BVAR (current_buffer, case_canon_table) : Qnil),
- posix,
- !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+
+ /* Snapshot in case Lisp changes the value. */
+ bool preserve_match_data = NILP (Vinhibit_changing_match_data);
+
+ struct regexp_cache *cache_entry = compile_pattern (
+ string,
+ preserve_match_data ? &search_regs : NULL,
+ (!NILP (BVAR (current_buffer, case_fold_search))
+ ? BVAR (current_buffer, case_canon_table) : Qnil),
+ posix,
+ !NILP (BVAR (current_buffer, enable_multibyte_characters)));
/* Do a pending quit right away, to avoid paradoxical behavior */
maybe_quit ();
@@ -310,21 +329,20 @@ looking_at_1 (Lisp_Object string, bool posix)
s2 = 0;
}
- re_match_object = Qnil;
-
+ ptrdiff_t count = SPECPDL_INDEX ();
freeze_buffer_relocation ();
- i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ freeze_pattern (cache_entry);
+ re_match_object = Qnil;
+ i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
PT_BYTE - BEGV_BYTE,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
+ preserve_match_data ? &search_regs : NULL,
ZV_BYTE - BEGV_BYTE);
- thaw_buffer_relocation ();
if (i == -2)
matcher_overflow ();
val = (i >= 0 ? Qt : Qnil);
- if (NILP (Vinhibit_changing_match_data) && i >= 0)
+ if (preserve_match_data && i >= 0)
{
for (i = 0; i < search_regs.num_regs; i++)
if (search_regs.start[i] >= 0)
@@ -338,7 +356,7 @@ looking_at_1 (Lisp_Object string, bool posix)
XSETBUFFER (last_thing_searched, current_buffer);
}
- return val;
+ return unbind_to (count, val);
}
DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
@@ -396,15 +414,14 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
BVAR (current_buffer, case_eqv_table));
- bufp = compile_pattern (regexp,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
- (!NILP (BVAR (current_buffer, case_fold_search))
- ? BVAR (current_buffer, case_canon_table) : Qnil),
- posix,
- STRING_MULTIBYTE (string));
+ bufp = &compile_pattern (regexp,
+ (NILP (Vinhibit_changing_match_data)
+ ? &search_regs : NULL),
+ (!NILP (BVAR (current_buffer, case_fold_search))
+ ? BVAR (current_buffer, case_canon_table) : Qnil),
+ posix,
+ STRING_MULTIBYTE (string))->buf;
re_match_object = string;
-
val = re_search (bufp, SSDATA (string),
SBYTES (string), pos_byte,
SBYTES (string) - pos_byte,
@@ -471,10 +488,9 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
ptrdiff_t val;
struct re_pattern_buffer *bufp;
- bufp = compile_pattern (regexp, 0, table,
- 0, STRING_MULTIBYTE (string));
+ bufp = &compile_pattern (regexp, 0, table,
+ 0, STRING_MULTIBYTE (string))->buf;
re_match_object = string;
-
val = re_search (bufp, SSDATA (string),
SBYTES (string), 0,
SBYTES (string), 0);
@@ -494,10 +510,10 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
struct re_pattern_buffer *bufp;
regexp = string_make_unibyte (regexp);
+ bufp = &compile_pattern (regexp, 0,
+ Vascii_canon_table, 0,
+ 0)->buf;
re_match_object = Qt;
- bufp = compile_pattern (regexp, 0,
- Vascii_canon_table, 0,
- 0);
val = re_search (bufp, string, len, 0, len, 0);
return val;
}
@@ -513,7 +529,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
{
bool multibyte;
- struct re_pattern_buffer *buf;
unsigned char *p1, *p2;
ptrdiff_t s1, s2;
ptrdiff_t len;
@@ -528,7 +543,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
s1 = 0;
p2 = SDATA (string);
s2 = SBYTES (string);
- re_match_object = string;
multibyte = STRING_MULTIBYTE (string);
}
else
@@ -554,16 +568,19 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
s1 = ZV_BYTE - BEGV_BYTE;
s2 = 0;
}
- re_match_object = Qnil;
multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
}
- buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
+ struct regexp_cache *cache_entry =
+ compile_pattern (regexp, 0, Qnil, 0, multibyte);
+ ptrdiff_t count = SPECPDL_INDEX ();
freeze_buffer_relocation ();
- len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
+ freeze_pattern (cache_entry);
+ re_match_object = STRINGP (string) ? string : Qnil;
+ len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
pos_byte, NULL, limit_byte);
- thaw_buffer_relocation ();
+ unbind_to (count, Qnil);
return len;
}
@@ -1151,355 +1168,372 @@ while (0)
static struct re_registers search_regs_1;
static EMACS_INT
-search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
- ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
- int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+ ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+ Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
{
- ptrdiff_t len = SCHARS (string);
- ptrdiff_t len_byte = SBYTES (string);
- register ptrdiff_t i;
+ unsigned char *p1, *p2;
+ ptrdiff_t s1, s2;
- if (running_asynch_code)
- save_search_regs ();
+ /* Snapshot in case Lisp changes the value. */
+ bool preserve_match_data = NILP (Vinhibit_changing_match_data);
- /* Searching 0 times means don't move. */
- /* Null string is found at starting position. */
- if (len == 0 || n == 0)
+ struct regexp_cache *cache_entry =
+ compile_pattern (string,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ trt, posix,
+ !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ struct re_pattern_buffer *bufp = &cache_entry->buf;
+
+ maybe_quit (); /* Do a pending quit right away,
+ to avoid paradoxical behavior */
+ /* Get pointers and sizes of the two strings
+ that make up the visible portion of the buffer. */
+
+ p1 = BEGV_ADDR;
+ s1 = GPT_BYTE - BEGV_BYTE;
+ p2 = GAP_END_ADDR;
+ s2 = ZV_BYTE - GPT_BYTE;
+ if (s1 < 0)
{
- set_search_regs (pos_byte, 0);
- return pos;
+ p2 = p1;
+ s2 = ZV_BYTE - BEGV_BYTE;
+ s1 = 0;
}
-
- if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+ if (s2 < 0)
{
- unsigned char *p1, *p2;
- ptrdiff_t s1, s2;
- struct re_pattern_buffer *bufp;
+ s1 = ZV_BYTE - BEGV_BYTE;
+ s2 = 0;
+ }
- bufp = compile_pattern (string,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- trt, posix,
- !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ ptrdiff_t count = SPECPDL_INDEX ();
+ freeze_buffer_relocation ();
+ freeze_pattern (cache_entry);
- maybe_quit (); /* Do a pending quit right away,
- to avoid paradoxical behavior */
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
+ while (n < 0)
+ {
+ ptrdiff_t val;
- p1 = BEGV_ADDR;
- s1 = GPT_BYTE - BEGV_BYTE;
- p2 = GAP_END_ADDR;
- s2 = ZV_BYTE - GPT_BYTE;
- if (s1 < 0)
- {
- p2 = p1;
- s2 = ZV_BYTE - BEGV_BYTE;
- s1 = 0;
- }
- if (s2 < 0)
- {
- s1 = ZV_BYTE - BEGV_BYTE;
- s2 = 0;
- }
re_match_object = Qnil;
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ /* Don't allow match past current point */
+ pos_byte - BEGV_BYTE);
+ if (val == -2)
+ {
+ matcher_overflow ();
+ }
+ if (val >= 0)
+ {
+ if (preserve_match_data)
+ {
+ pos_byte = search_regs.start[0] + BEGV_BYTE;
+ for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+ search_regs.end[i]
+ = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+ }
+ XSETBUFFER (last_thing_searched, current_buffer);
+ /* Set pos to the new position. */
+ pos = search_regs.start[0];
+ }
+ else
+ {
+ pos_byte = search_regs_1.start[0] + BEGV_BYTE;
+ /* Set pos to the new position. */
+ pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
+ }
+ }
+ else
+ {
+ unbind_to (count, Qnil);
+ return (n);
+ }
+ n++;
+ maybe_quit ();
+ }
+ while (n > 0)
+ {
+ ptrdiff_t val;
- freeze_buffer_relocation ();
+ re_match_object = Qnil;
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ lim_byte - BEGV_BYTE);
+ if (val == -2)
+ {
+ matcher_overflow ();
+ }
+ if (val >= 0)
+ {
+ if (preserve_match_data)
+ {
+ pos_byte = search_regs.end[0] + BEGV_BYTE;
+ for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+ search_regs.end[i]
+ = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+ }
+ XSETBUFFER (last_thing_searched, current_buffer);
+ pos = search_regs.end[0];
+ }
+ else
+ {
+ pos_byte = search_regs_1.end[0] + BEGV_BYTE;
+ pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
+ }
+ }
+ else
+ {
+ unbind_to (count, Qnil);
+ return (0 - n);
+ }
+ n--;
+ maybe_quit ();
+ }
+ unbind_to (count, Qnil);
+ return (pos);
+}
- while (n < 0)
- {
- ptrdiff_t val;
-
- val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
- pos_byte - BEGV_BYTE, lim_byte - pos_byte,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- /* Don't allow match past current point */
- pos_byte - BEGV_BYTE);
- if (val == -2)
- {
- matcher_overflow ();
- }
- if (val >= 0)
- {
- if (NILP (Vinhibit_changing_match_data))
- {
- pos_byte = search_regs.start[0] + BEGV_BYTE;
- for (i = 0; i < search_regs.num_regs; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i]
- = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
- search_regs.end[i]
- = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
- }
- XSETBUFFER (last_thing_searched, current_buffer);
- /* Set pos to the new position. */
- pos = search_regs.start[0];
- }
- else
- {
- pos_byte = search_regs_1.start[0] + BEGV_BYTE;
- /* Set pos to the new position. */
- pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
- }
- }
- else
- {
- thaw_buffer_relocation ();
- return (n);
- }
- n++;
- maybe_quit ();
- }
- while (n > 0)
- {
- ptrdiff_t val;
-
- val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
- pos_byte - BEGV_BYTE, lim_byte - pos_byte,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- lim_byte - BEGV_BYTE);
- if (val == -2)
- {
- matcher_overflow ();
- }
- if (val >= 0)
- {
- if (NILP (Vinhibit_changing_match_data))
- {
- pos_byte = search_regs.end[0] + BEGV_BYTE;
- for (i = 0; i < search_regs.num_regs; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i]
- = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
- search_regs.end[i]
- = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
- }
- XSETBUFFER (last_thing_searched, current_buffer);
- pos = search_regs.end[0];
- }
- else
- {
- pos_byte = search_regs_1.end[0] + BEGV_BYTE;
- pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
- }
- }
- else
- {
- thaw_buffer_relocation ();
- return (0 - n);
- }
- n--;
- maybe_quit ();
- }
- thaw_buffer_relocation ();
- return (pos);
+static EMACS_INT
+search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
+ ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
+ EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
+ bool posix)
+{
+ unsigned char *raw_pattern, *pat;
+ ptrdiff_t raw_pattern_size;
+ ptrdiff_t raw_pattern_size_byte;
+ unsigned char *patbuf;
+ bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
+ unsigned char *base_pat;
+ /* Set to positive if we find a non-ASCII char that need
+ translation. Otherwise set to zero later. */
+ int char_base = -1;
+ bool boyer_moore_ok = 1;
+ USE_SAFE_ALLOCA;
+
+ /* MULTIBYTE says whether the text to be searched is multibyte.
+ We must convert PATTERN to match that, or we will not really
+ find things right. */
+
+ if (multibyte == STRING_MULTIBYTE (string))
+ {
+ raw_pattern = SDATA (string);
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte = SBYTES (string);
}
- else /* non-RE case */
+ else if (multibyte)
{
- unsigned char *raw_pattern, *pat;
- ptrdiff_t raw_pattern_size;
- ptrdiff_t raw_pattern_size_byte;
- unsigned char *patbuf;
- bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
- unsigned char *base_pat;
- /* Set to positive if we find a non-ASCII char that need
- translation. Otherwise set to zero later. */
- int char_base = -1;
- bool boyer_moore_ok = 1;
- USE_SAFE_ALLOCA;
-
- /* MULTIBYTE says whether the text to be searched is multibyte.
- We must convert PATTERN to match that, or we will not really
- find things right. */
-
- if (multibyte == STRING_MULTIBYTE (string))
- {
- raw_pattern = SDATA (string);
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte = SBYTES (string);
- }
- else if (multibyte)
- {
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte
- = count_size_as_multibyte (SDATA (string),
- raw_pattern_size);
- raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
- copy_text (SDATA (string), raw_pattern,
- SCHARS (string), 0, 1);
- }
- else
- {
- /* Converting multibyte to single-byte.
-
- ??? Perhaps this conversion should be done in a special way
- by subtracting nonascii-insert-offset from each non-ASCII char,
- so that only the multibyte chars which really correspond to
- the chosen single-byte character set can possibly match. */
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte = SCHARS (string);
- raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
- copy_text (SDATA (string), raw_pattern,
- SBYTES (string), 1, 0);
- }
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte
+ = count_size_as_multibyte (SDATA (string),
+ raw_pattern_size);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
+ copy_text (SDATA (string), raw_pattern,
+ SCHARS (string), 0, 1);
+ }
+ else
+ {
+ /* Converting multibyte to single-byte.
+
+ ??? Perhaps this conversion should be done in a special way
+ by subtracting nonascii-insert-offset from each non-ASCII char,
+ so that only the multibyte chars which really correspond to
+ the chosen single-byte character set can possibly match. */
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte = SCHARS (string);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
+ copy_text (SDATA (string), raw_pattern,
+ SBYTES (string), 1, 0);
+ }
- /* Copy and optionally translate the pattern. */
- len = raw_pattern_size;
- len_byte = raw_pattern_size_byte;
- SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
- pat = patbuf;
- base_pat = raw_pattern;
- if (multibyte)
- {
- /* Fill patbuf by translated characters in STRING while
- checking if we can use boyer-moore search. If TRT is
- non-nil, we can use boyer-moore search only if TRT can be
- represented by the byte array of 256 elements. For that,
- all non-ASCII case-equivalents of all case-sensitive
- characters in STRING must belong to the same character
- group (two characters belong to the same group iff their
- multibyte forms are the same except for the last byte;
- i.e. every 64 characters form a group; U+0000..U+003F,
- U+0040..U+007F, U+0080..U+00BF, ...). */
-
- while (--len >= 0)
- {
- unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
- int c, translated, inverse;
- int in_charlen, charlen;
-
- /* If we got here and the RE flag is set, it's because we're
- dealing with a regexp known to be trivial, so the backslash
- just quotes the next character. */
- if (RE && *base_pat == '\\')
- {
- len--;
- raw_pattern_size--;
- len_byte--;
- base_pat++;
- }
+ /* Copy and optionally translate the pattern. */
+ ptrdiff_t len = raw_pattern_size;
+ ptrdiff_t len_byte = raw_pattern_size_byte;
+ SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
+ pat = patbuf;
+ base_pat = raw_pattern;
+ if (multibyte)
+ {
+ /* Fill patbuf by translated characters in STRING while
+ checking if we can use boyer-moore search. If TRT is
+ non-nil, we can use boyer-moore search only if TRT can be
+ represented by the byte array of 256 elements. For that,
+ all non-ASCII case-equivalents of all case-sensitive
+ characters in STRING must belong to the same character
+ group (two characters belong to the same group iff their
+ multibyte forms are the same except for the last byte;
+ i.e. every 64 characters form a group; U+0000..U+003F,
+ U+0040..U+007F, U+0080..U+00BF, ...). */
+
+ while (--len >= 0)
+ {
+ unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
+ int c, translated, inverse;
+ int in_charlen, charlen;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ raw_pattern_size--;
+ len_byte--;
+ base_pat++;
+ }
- c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
+ c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
- if (NILP (trt))
- {
- str = base_pat;
- charlen = in_charlen;
- }
- else
- {
- /* Translate the character. */
- TRANSLATE (translated, trt, c);
- charlen = CHAR_STRING (translated, str_base);
- str = str_base;
-
- /* Check if C has any other case-equivalents. */
- TRANSLATE (inverse, inverse_trt, c);
- /* If so, check if we can use boyer-moore. */
- if (c != inverse && boyer_moore_ok)
- {
- /* Check if all equivalents belong to the same
- group of characters. Note that the check of C
- itself is done by the last iteration. */
- int this_char_base = -1;
+ if (NILP (trt))
+ {
+ str = base_pat;
+ charlen = in_charlen;
+ }
+ else
+ {
+ /* Translate the character. */
+ TRANSLATE (translated, trt, c);
+ charlen = CHAR_STRING (translated, str_base);
+ str = str_base;
+
+ /* Check if C has any other case-equivalents. */
+ TRANSLATE (inverse, inverse_trt, c);
+ /* If so, check if we can use boyer-moore. */
+ if (c != inverse && boyer_moore_ok)
+ {
+ /* Check if all equivalents belong to the same
+ group of characters. Note that the check of C
+ itself is done by the last iteration. */
+ int this_char_base = -1;
+
+ while (boyer_moore_ok)
+ {
+ if (ASCII_CHAR_P (inverse))
+ {
+ if (this_char_base > 0)
+ boyer_moore_ok = 0;
+ else
+ this_char_base = 0;
+ }
+ else if (CHAR_BYTE8_P (inverse))
+ /* Boyer-moore search can't handle a
+ translation of an eight-bit
+ character. */
+ boyer_moore_ok = 0;
+ else if (this_char_base < 0)
+ {
+ this_char_base = inverse & ~0x3F;
+ if (char_base < 0)
+ char_base = this_char_base;
+ else if (this_char_base != char_base)
+ boyer_moore_ok = 0;
+ }
+ else if ((inverse & ~0x3F) != this_char_base)
+ boyer_moore_ok = 0;
+ if (c == inverse)
+ break;
+ TRANSLATE (inverse, inverse_trt, inverse);
+ }
+ }
+ }
- while (boyer_moore_ok)
- {
- if (ASCII_CHAR_P (inverse))
- {
- if (this_char_base > 0)
- boyer_moore_ok = 0;
- else
- this_char_base = 0;
- }
- else if (CHAR_BYTE8_P (inverse))
- /* Boyer-moore search can't handle a
- translation of an eight-bit
- character. */
- boyer_moore_ok = 0;
- else if (this_char_base < 0)
- {
- this_char_base = inverse & ~0x3F;
- if (char_base < 0)
- char_base = this_char_base;
- else if (this_char_base != char_base)
- boyer_moore_ok = 0;
- }
- else if ((inverse & ~0x3F) != this_char_base)
- boyer_moore_ok = 0;
- if (c == inverse)
- break;
- TRANSLATE (inverse, inverse_trt, inverse);
- }
- }
- }
+ /* Store this character into the translated pattern. */
+ memcpy (pat, str, charlen);
+ pat += charlen;
+ base_pat += in_charlen;
+ len_byte -= in_charlen;
+ }
- /* Store this character into the translated pattern. */
- memcpy (pat, str, charlen);
- pat += charlen;
- base_pat += in_charlen;
- len_byte -= in_charlen;
- }
+ /* If char_base is still negative we didn't find any translated
+ non-ASCII characters. */
+ if (char_base < 0)
+ char_base = 0;
+ }
+ else
+ {
+ /* Unibyte buffer. */
+ char_base = 0;
+ while (--len >= 0)
+ {
+ int c, translated, inverse;
- /* If char_base is still negative we didn't find any translated
- non-ASCII characters. */
- if (char_base < 0)
- char_base = 0;
- }
- else
- {
- /* Unibyte buffer. */
- char_base = 0;
- while (--len >= 0)
- {
- int c, translated, inverse;
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ raw_pattern_size--;
+ base_pat++;
+ }
+ c = *base_pat++;
+ TRANSLATE (translated, trt, c);
+ *pat++ = translated;
+ /* Check that none of C's equivalents violates the
+ assumptions of boyer_moore. */
+ TRANSLATE (inverse, inverse_trt, c);
+ while (1)
+ {
+ if (inverse >= 0200)
+ {
+ boyer_moore_ok = 0;
+ break;
+ }
+ if (c == inverse)
+ break;
+ TRANSLATE (inverse, inverse_trt, inverse);
+ }
+ }
+ }
- /* If we got here and the RE flag is set, it's because we're
- dealing with a regexp known to be trivial, so the backslash
- just quotes the next character. */
- if (RE && *base_pat == '\\')
- {
- len--;
- raw_pattern_size--;
- base_pat++;
- }
- c = *base_pat++;
- TRANSLATE (translated, trt, c);
- *pat++ = translated;
- /* Check that none of C's equivalents violates the
- assumptions of boyer_moore. */
- TRANSLATE (inverse, inverse_trt, c);
- while (1)
- {
- if (inverse >= 0200)
- {
- boyer_moore_ok = 0;
- break;
- }
- if (c == inverse)
- break;
- TRANSLATE (inverse, inverse_trt, inverse);
- }
- }
- }
+ len_byte = pat - patbuf;
+ pat = base_pat = patbuf;
+
+ EMACS_INT result
+ = (boyer_moore_ok
+ ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+ pos_byte, lim_byte,
+ char_base)
+ : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+ pos, pos_byte, lim, lim_byte));
+ SAFE_FREE ();
+ return result;
+}
+
+static EMACS_INT
+search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+ ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+ int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+{
+ if (running_asynch_code)
+ save_search_regs ();
- len_byte = pat - patbuf;
- pat = base_pat = patbuf;
-
- EMACS_INT result
- = (boyer_moore_ok
- ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
- pos_byte, lim_byte,
- char_base)
- : simple_search (n, pat, raw_pattern_size, len_byte, trt,
- pos, pos_byte, lim, lim_byte));
- SAFE_FREE ();
- return result;
+ /* Searching 0 times means don't move. */
+ /* Null string is found at starting position. */
+ if (n == 0 || SCHARS (string) == 0)
+ {
+ set_search_regs (pos_byte, 0);
+ return pos;
}
+
+ if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+ pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
+ n, trt, inverse_trt, posix);
+ else
+ pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
+ n, RE, trt, inverse_trt, posix);
+
+ return pos;
}
/* Do a simple string search N times for the string PAT,
@@ -3353,6 +3387,7 @@ the buffer. If the buffer doesn't have a cache, the value is nil. */)
return val;
}
+
void
syms_of_search (void)
{
@@ -3365,6 +3400,7 @@ syms_of_search (void)
searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
searchbufs[i].regexp = Qnil;
searchbufs[i].f_whitespace_regexp = Qnil;
+ searchbufs[i].busy = false;
searchbufs[i].syntax_table = Qnil;
staticpro (&searchbufs[i].regexp);
staticpro (&searchbufs[i].f_whitespace_regexp);
@@ -3405,6 +3441,9 @@ syms_of_search (void)
saved_last_thing_searched = Qnil;
staticpro (&saved_last_thing_searched);
+ re_match_object = Qnil;
+ staticpro (&re_match_object);
+
DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
doc: /* Regexp to substitute for bunches of spaces in regexp search.
Some commands use this for user-specified regexps.