summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-04-27 15:53:16 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-04-27 15:53:16 +0000
commitbb5b3f4f8fa4d4a4d959caa9de0e2dd75b8bf821 (patch)
treece288a93749b089da1e289e9613a1fe95afe1401
parent3617d129fe30a0c4cd097a5ad4b273b3cbd8bcb7 (diff)
downloadruby-bb5b3f4f8fa4d4a4d959caa9de0e2dd75b8bf821.tar.gz
imported Oni Guruma 2.2.7.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/KOSAKO@6225 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--oniguruma.h8
-rw-r--r--regcomp.c29
-rw-r--r--regexec.c57
-rw-r--r--regint.h28
-rw-r--r--regparse.c194
5 files changed, 86 insertions, 230 deletions
diff --git a/oniguruma.h b/oniguruma.h
index 3daec9d49b..3fd9f4c395 100644
--- a/oniguruma.h
+++ b/oniguruma.h
@@ -11,7 +11,7 @@
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 2
#define ONIGURUMA_VERSION_MINOR 2
-#define ONIGURUMA_VERSION_TEENY 6
+#define ONIGURUMA_VERSION_TEENY 8
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -401,7 +401,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
-/* predefined syntaxes (see regparse.c) */
+/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@@ -716,6 +716,10 @@ void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)
ONIG_EXTERN
int onig_set_meta_char P_((unsigned int what, OnigCodePoint code));
ONIG_EXTERN
+unsigned int onig_get_match_stack_limit_size P_((void));
+ONIG_EXTERN
+int onig_set_match_stack_limit_size P_((unsigned int size));
+ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
diff --git a/regcomp.c b/regcomp.c
index 24d44dd1b8..de44cfe037 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3021,7 +3021,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
NSTRING_SET_CASE_AMBIG(node);
break;
}
- p++;
+ p += enc_len(reg->enc, *p);
}
}
break;
@@ -3950,22 +3950,17 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
}
- if (IS_NULL(cc->mbuf)) {
- if (cc->not) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- add_char_opt_map_info(&opt->map, i);
- }
- mb_found = 1;
- }
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = ONIGENC_IS_MBC_HEAD(env->enc, i);
- if (z) {
- mb_found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
+ if (! ONIGENC_IS_SINGLEBYTE(env->enc)) {
+ if (! IS_NULL(cc->mbuf) ||
+ (cc->not != 0 && found != 0)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = ONIGENC_IS_MBC_HEAD(env->enc, i);
+ if (z) {
+ mb_found = 1;
+ add_char_opt_map_info(&opt->map, i);
+ }
+ }
+ }
}
if (mb_found) {
diff --git a/regexec.c b/regexec.c
index 870a6535bd..1bae0d9516 100644
--- a/regexec.c
+++ b/regexec.c
@@ -362,11 +362,26 @@ typedef struct {
};\
} while(0)
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
static int
stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
{
- int n;
+ unsigned int n;
StackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
@@ -385,7 +400,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}
else {
n *= 2;
- if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
+ if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = MatchStackLimitSize;
+ }
x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
@@ -1171,10 +1191,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
goto fail; /* for retry */
}
}
- else {
- /* default behavior: return first-matching result. */
- goto finish;
- }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
break;
case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
@@ -2574,11 +2593,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
if (t < target) return p + 1;
skip = reg->map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
+ do {
p += enc_len(reg->enc, *p);
- }
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2592,11 +2613,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
if (t < target) return p + 1;
skip = reg->int_map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
+ do {
p += enc_len(reg->enc, *p);
- }
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -3288,13 +3311,3 @@ onig_get_syntax(regex_t* reg)
{
return reg->syntax;
}
-
-extern const char*
-onig_version(void)
-{
-#define MSTR(a) # a
-
- return (MSTR(ONIGURUMA_VERSION_MAJOR) "."
- MSTR(ONIGURUMA_VERSION_MINOR) "."
- MSTR(ONIGURUMA_VERSION_TEENY));
-}
diff --git a/regint.h b/regint.h
index dacc0400be..bcc5fa5fc4 100644
--- a/regint.h
+++ b/regint.h
@@ -46,13 +46,12 @@
#define USE_QUALIFIER_PEEK_NEXT
#define INIT_MATCH_STACK_SIZE 160
-#define MATCH_STACK_LIMIT_SIZE 500000
+#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_VARIABLE_META_CHARS
-#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
#define THREAD_ATOMIC_START /* depend on thread system */
@@ -654,6 +653,31 @@ extern OnigMetaCharTableType OnigMetaCharTable;
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
#ifdef ONIG_DEBUG
diff --git a/regparse.c b/regparse.c
index f59c289565..632e15c30a 100644
--- a/regparse.c
+++ b/regparse.c
@@ -9,114 +9,6 @@
#define WARN_BUFSIZE 256
-#define SYN_POSIX_COMMON_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
- ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
- ONIG_SYN_OP_LINE_ANCHOR | \
- ONIG_SYN_OP_ESC_CONTROL_CHARS )
-
-#define SYN_GNU_REGEX_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
- ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
- ONIG_SYN_OP_VBAR_ALT | \
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
- ONIG_SYN_OP_QMARK_ZERO_ONE | \
- ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
- ONIG_SYN_OP_ESC_W_WORD | \
- ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
- ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
- ONIG_SYN_OP_LINE_ANCHOR )
-
-#define SYN_GNU_REGEX_BV \
- ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
- ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
- ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
-
-#ifdef USE_VARIABLE_SYNTAX
-OnigSyntaxType OnigSyntaxPosixBasic = {
- ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
- ONIG_SYN_OP_ESC_BRACE_INTERVAL )
- , 0
- , 0
- , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
-};
-
-OnigSyntaxType OnigSyntaxPosixExtended = {
- ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
- ONIG_SYN_OP_BRACE_INTERVAL |
- ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
- , 0
- , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
- ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
- ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
- ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
- , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
-};
-
-OnigSyntaxType OnigSyntaxEmacs = {
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
- ONIG_SYN_OP_ESC_BRACE_INTERVAL |
- ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
- ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
- ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
- , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
- , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxGrep = {
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
- ONIG_SYN_OP_ESC_VBAR_ALT |
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
- ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
- ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
- ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
- , 0
- , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxGnuRegex = {
- SYN_GNU_REGEX_OP
- , 0
- , SYN_GNU_REGEX_BV
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxJava = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
- ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
- ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
- ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
- , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
- , ONIG_OPTION_SINGLELINE
-};
-
-OnigSyntaxType OnigSyntaxPerl = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
- ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
- ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
- ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
- ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
- , SYN_GNU_REGEX_BV
- , ONIG_OPTION_SINGLELINE
-};
-#endif /* USE_VARIABLE_SYNTAX */
-
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
@@ -142,48 +34,6 @@ OnigSyntaxType OnigSyntaxRuby = {
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
-#ifdef USE_VARIABLE_SYNTAX
-extern int
-onig_set_default_syntax(OnigSyntaxType* syntax)
-{
- if (IS_NULL(syntax))
- syntax = ONIG_SYNTAX_RUBY;
-
- OnigDefaultSyntax = syntax;
- return 0;
-}
-
-extern void
-onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
-{
- *to = *from;
-}
-
-extern void
-onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
-{
- syntax->op = op;
-}
-
-extern void
-onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
-{
- syntax->op2 = op2;
-}
-
-extern void
-onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
-{
- syntax->behavior = behavior;
-}
-
-extern void
-onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
-{
- syntax->options = options;
-}
-#endif
-
OnigMetaCharTableType OnigMetaCharTable = {
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )0 /* anychar '.' */
@@ -193,41 +43,6 @@ OnigMetaCharTableType OnigMetaCharTable = {
, (OnigCodePoint )0 /* anychar anytime */
};
-#ifdef USE_VARIABLE_META_CHARS
-extern int onig_set_meta_char(unsigned int what, OnigCodePoint code)
-{
- if (code >= 256) { /* restricted by current implementation. */
- return ONIGERR_INVALID_ARGUMENT;
- }
-
- switch (what) {
- case ONIG_META_CHAR_ESCAPE:
- OnigMetaCharTable.esc = (UChar )code;
- break;
- case ONIG_META_CHAR_ANYCHAR:
- OnigMetaCharTable.anychar = (UChar )code;
- break;
- case ONIG_META_CHAR_ANYTIME:
- OnigMetaCharTable.anytime = (UChar )code;
- break;
- case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
- OnigMetaCharTable.zero_or_one_time = (UChar )code;
- break;
- case ONIG_META_CHAR_ONE_OR_MORE_TIME:
- OnigMetaCharTable.one_or_more_time = (UChar )code;
- break;
- case ONIG_META_CHAR_ANYCHAR_ANYTIME:
- OnigMetaCharTable.anychar_anytime = (UChar )code;
- break;
- default:
- return ONIGERR_INVALID_ARGUMENT;
- break;
- }
- return 0;
-}
-#endif /* USE_VARIABLE_META_CHARS */
-
-
extern void onig_null_warn(char* s) { }
#ifdef DEFAULT_WARN_FUNCTION
@@ -3197,7 +3012,12 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
while (1) {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
- if (c == ')') break;
+ if (c == MC_ESC) {
+ if (!PEND) PFETCH(c);
+ }
+ else {
+ if (c == ')') break;
+ }
}
goto start;
}
@@ -4316,7 +4136,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (onig_verb_warn != onig_null_warn) {
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
- "nested repeat operator '%s and %s' should be replaced with '%s'",
+ "nested repeat operator '%s and %s' was replaced with '%s'",
PopularQStr[targetq_num], PopularQStr[nestq_num],
ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
(*onig_verb_warn)(buf);