diff options
author | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-04-27 15:53:16 +0000 |
---|---|---|
committer | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-04-27 15:53:16 +0000 |
commit | bb5b3f4f8fa4d4a4d959caa9de0e2dd75b8bf821 (patch) | |
tree | ce288a93749b089da1e289e9613a1fe95afe1401 | |
parent | 3617d129fe30a0c4cd097a5ad4b273b3cbd8bcb7 (diff) | |
download | ruby-bb5b3f4f8fa4d4a4d959caa9de0e2dd75b8bf821.tar.gz |
imported Oni Guruma 2.2.7.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/KOSAKO@6225 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | oniguruma.h | 8 | ||||
-rw-r--r-- | regcomp.c | 29 | ||||
-rw-r--r-- | regexec.c | 57 | ||||
-rw-r--r-- | regint.h | 28 | ||||
-rw-r--r-- | regparse.c | 194 |
5 files changed, 86 insertions, 230 deletions
diff --git a/oniguruma.h b/oniguruma.h index 3daec9d49b..3fd9f4c395 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -11,7 +11,7 @@ #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 2 #define ONIGURUMA_VERSION_MINOR 2 -#define ONIGURUMA_VERSION_TEENY 6 +#define ONIGURUMA_VERSION_TEENY 8 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -401,7 +401,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; -/* predefined syntaxes (see regparse.c) */ +/* predefined syntaxes (see regsyntax.c) */ #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) @@ -716,6 +716,10 @@ void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options) ONIG_EXTERN int onig_set_meta_char P_((unsigned int what, OnigCodePoint code)); ONIG_EXTERN +unsigned int onig_get_match_stack_limit_size P_((void)); +ONIG_EXTERN +int onig_set_match_stack_limit_size P_((unsigned int size)); +ONIG_EXTERN int onig_end P_((void)); ONIG_EXTERN const char* onig_version P_((void)); @@ -3021,7 +3021,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) NSTRING_SET_CASE_AMBIG(node); break; } - p++; + p += enc_len(reg->enc, *p); } } break; @@ -3950,22 +3950,17 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } } - if (IS_NULL(cc->mbuf)) { - if (cc->not) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - add_char_opt_map_info(&opt->map, i); - } - mb_found = 1; - } - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = ONIGENC_IS_MBC_HEAD(env->enc, i); - if (z) { - mb_found = 1; - add_char_opt_map_info(&opt->map, i); - } - } + if (! ONIGENC_IS_SINGLEBYTE(env->enc)) { + if (! IS_NULL(cc->mbuf) || + (cc->not != 0 && found != 0)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = ONIGENC_IS_MBC_HEAD(env->enc, i); + if (z) { + mb_found = 1; + add_char_opt_map_info(&opt->map, i); + } + } + } } if (mb_found) { @@ -362,11 +362,26 @@ typedef struct { };\ } while(0) +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + static int stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) { - int n; + unsigned int n; StackType *x, *stk_base, *stk_end, *stk; stk_base = *arg_stk_base; @@ -385,7 +400,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } else { n *= 2; - if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); if (IS_NULL(x)) { STACK_SAVE; @@ -1171,10 +1191,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, goto fail; /* for retry */ } } - else { - /* default behavior: return first-matching result. */ - goto finish; - } + + /* default behavior: return first-matching result. */ + goto finish; break; case OP_EXACT1: STAT_OP_IN(OP_EXACT1); @@ -2574,11 +2593,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, if (t < target) return p + 1; skip = reg->map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { + do { p += enc_len(reg->enc, *p); - } + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -2592,11 +2613,13 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, if (t < target) return p + 1; skip = reg->int_map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { + do { p += enc_len(reg->enc, *p); - } + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -3288,13 +3311,3 @@ onig_get_syntax(regex_t* reg) { return reg->syntax; } - -extern const char* -onig_version(void) -{ -#define MSTR(a) # a - - return (MSTR(ONIGURUMA_VERSION_MAJOR) "." - MSTR(ONIGURUMA_VERSION_MINOR) "." - MSTR(ONIGURUMA_VERSION_TEENY)); -} @@ -46,13 +46,12 @@ #define USE_QUALIFIER_PEEK_NEXT #define INIT_MATCH_STACK_SIZE 160 -#define MATCH_STACK_LIMIT_SIZE 500000 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ /* interface to external system */ #ifdef NOT_RUBY /* gived from Makefile */ #include "config.h" #define USE_VARIABLE_META_CHARS -#define USE_VARIABLE_SYNTAX #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ #define THREAD_ATOMIC_START /* depend on thread system */ @@ -654,6 +653,31 @@ extern OnigMetaCharTableType OnigMetaCharTable; #define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time #define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + #ifdef ONIG_DEBUG diff --git a/regparse.c b/regparse.c index f59c289565..632e15c30a 100644 --- a/regparse.c +++ b/regparse.c @@ -9,114 +9,6 @@ #define WARN_BUFSIZE 256 -#define SYN_POSIX_COMMON_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ - ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ - ONIG_SYN_OP_LINE_ANCHOR | \ - ONIG_SYN_OP_ESC_CONTROL_CHARS ) - -#define SYN_GNU_REGEX_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ - ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ - ONIG_SYN_OP_VBAR_ALT | \ - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ - ONIG_SYN_OP_QMARK_ZERO_ONE | \ - ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ - ONIG_SYN_OP_ESC_W_WORD | \ - ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ - ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ - ONIG_SYN_OP_LINE_ANCHOR ) - -#define SYN_GNU_REGEX_BV \ - ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ - ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ - ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - -#ifdef USE_VARIABLE_SYNTAX -OnigSyntaxType OnigSyntaxPosixBasic = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_BRACE_INTERVAL ) - , 0 - , 0 - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) -}; - -OnigSyntaxType OnigSyntaxPosixExtended = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | - ONIG_SYN_OP_BRACE_INTERVAL | - ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) - , 0 - , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | - ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | - ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | - ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) -}; - -OnigSyntaxType OnigSyntaxEmacs = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | - ONIG_SYN_OP_ESC_BRACE_INTERVAL | - ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | - ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | - ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) - , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR - , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC - , ONIG_OPTION_NONE -}; - -OnigSyntaxType OnigSyntaxGrep = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | - ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | - ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | - ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) - , 0 - , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) - , ONIG_OPTION_NONE -}; - -OnigSyntaxType OnigSyntaxGnuRegex = { - SYN_GNU_REGEX_OP - , 0 - , SYN_GNU_REGEX_BV - , ONIG_OPTION_NONE -}; - -OnigSyntaxType OnigSyntaxJava = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | - ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) - , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) - , ONIG_OPTION_SINGLELINE -}; - -OnigSyntaxType OnigSyntaxPerl = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | - ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS ) - , SYN_GNU_REGEX_BV - , ONIG_OPTION_SINGLELINE -}; -#endif /* USE_VARIABLE_SYNTAX */ - OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | @@ -142,48 +34,6 @@ OnigSyntaxType OnigSyntaxRuby = { OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; -#ifdef USE_VARIABLE_SYNTAX -extern int -onig_set_default_syntax(OnigSyntaxType* syntax) -{ - if (IS_NULL(syntax)) - syntax = ONIG_SYNTAX_RUBY; - - OnigDefaultSyntax = syntax; - return 0; -} - -extern void -onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) -{ - *to = *from; -} - -extern void -onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) -{ - syntax->op = op; -} - -extern void -onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) -{ - syntax->op2 = op2; -} - -extern void -onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) -{ - syntax->behavior = behavior; -} - -extern void -onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) -{ - syntax->options = options; -} -#endif - OnigMetaCharTableType OnigMetaCharTable = { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )0 /* anychar '.' */ @@ -193,41 +43,6 @@ OnigMetaCharTableType OnigMetaCharTable = { , (OnigCodePoint )0 /* anychar anytime */ }; -#ifdef USE_VARIABLE_META_CHARS -extern int onig_set_meta_char(unsigned int what, OnigCodePoint code) -{ - if (code >= 256) { /* restricted by current implementation. */ - return ONIGERR_INVALID_ARGUMENT; - } - - switch (what) { - case ONIG_META_CHAR_ESCAPE: - OnigMetaCharTable.esc = (UChar )code; - break; - case ONIG_META_CHAR_ANYCHAR: - OnigMetaCharTable.anychar = (UChar )code; - break; - case ONIG_META_CHAR_ANYTIME: - OnigMetaCharTable.anytime = (UChar )code; - break; - case ONIG_META_CHAR_ZERO_OR_ONE_TIME: - OnigMetaCharTable.zero_or_one_time = (UChar )code; - break; - case ONIG_META_CHAR_ONE_OR_MORE_TIME: - OnigMetaCharTable.one_or_more_time = (UChar )code; - break; - case ONIG_META_CHAR_ANYCHAR_ANYTIME: - OnigMetaCharTable.anychar_anytime = (UChar )code; - break; - default: - return ONIGERR_INVALID_ARGUMENT; - break; - } - return 0; -} -#endif /* USE_VARIABLE_META_CHARS */ - - extern void onig_null_warn(char* s) { } #ifdef DEFAULT_WARN_FUNCTION @@ -3197,7 +3012,12 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) while (1) { if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH(c); - if (c == ')') break; + if (c == MC_ESC) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } } goto start; } @@ -4316,7 +4136,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "nested repeat operator '%s and %s' should be replaced with '%s'", + "nested repeat operator '%s and %s' was replaced with '%s'", PopularQStr[targetq_num], PopularQStr[nestq_num], ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); (*onig_verb_warn)(buf); |