summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-02-21 13:48:17 +0200
committerArnold D. Robbins <arnold@skeeve.com>2014-02-21 13:48:17 +0200
commit0a66ebd338de385b08e8cf80c39b99f5abc205db (patch)
treec876c36ea654da5a37cd37a18bbddf4acd2c2f2e
parentc0aeaf87402aa2670e76a32a888053fb64b879b0 (diff)
downloadgawk-0a66ebd338de385b08e8cf80c39b99f5abc205db.tar.gz
Sync dfa with grep.
-rw-r--r--ChangeLog4
-rw-r--r--dfa.c43
-rw-r--r--dfa.h2
3 files changed, 29 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index 9621c3c6..a6736b21 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2014-02-21 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.h, dfa.c (parse_bracket_exp): Sync with grep.
+
2014-02-20 Arnold D. Robbins <arnold@skeeve.com>
* regex.h, regex.c, regex_internal.c, regex_internal.h: Sync
diff --git a/dfa.c b/dfa.c
index 19ca737f..03a61878 100644
--- a/dfa.c
+++ b/dfa.c
@@ -791,7 +791,7 @@ setbit_case_fold_c (int b, charclass c)
/* UTF-8 encoding allows some optimizations that we can't otherwise
assume in a multibyte encoding. */
-static inline int
+int
using_utf8 (void)
{
static int utf8 = -1;
@@ -1160,28 +1160,31 @@ parse_bracket_exp (void)
#else
/* Defer to the system regex library about the meaning
of range expressions. */
- regex_t re;
- char pattern[6] = { '[', 0, '-', 0, ']', 0 };
- char subject[2] = { 0, 0 };
- c1 = c;
- if (case_fold)
- {
- c1 = tolower (c1);
- c2 = tolower (c2);
- }
-
- pattern[1] = c1;
- pattern[3] = c2;
- regcomp (&re, pattern, REG_NOSUB);
- for (c = 0; c < NOTCHAR; ++c)
+ struct re_pattern_buffer re = { 0 };
+ char const *compile_msg;
+#if 199901 <= __STDC_VERSION__
+ char pattern[] = { '[', '\\', c, '-', '\\', c2, ']' };
+#else
+ char pattern[] = { '[', '\\', 0, '-', '\\', 0, ']' };
+ pattern[2] = c;
+ pattern[5] = c2;
+#endif
+ re_set_syntax (syntax_bits | RE_BACKSLASH_ESCAPE_IN_LISTS);
+ compile_msg = re_compile_pattern (pattern, sizeof pattern, &re);
+ if (compile_msg)
+ dfaerror (compile_msg);
+ for (c = 0; c < NOTCHAR; c++)
{
- if ((case_fold && isupper (c)))
- continue;
- subject[0] = c;
- if (regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
- setbit_case_fold_c (c, ccl);
+ char subject = c;
+ switch (re_match (&re, &subject, 1, 0, NULL))
+ {
+ case 1: setbit (c, ccl); break;
+ case -1: break;
+ default: xalloc_die ();
+ }
}
regfree (&re);
+ re_set_syntax (syntax_bits);
#endif
}
diff --git a/dfa.h b/dfa.h
index bacd4894..7e0674fc 100644
--- a/dfa.h
+++ b/dfa.h
@@ -99,3 +99,5 @@ extern void dfawarn (const char *);
takes a single argument, a NUL-terminated string describing the error.
The user must supply a dfaerror. */
extern _Noreturn void dfaerror (const char *);
+
+extern int using_utf8 (void);