diff options
author | Rui Hirokawa <hirokawa@php.net> | 2008-07-13 01:25:22 +0000 |
---|---|---|
committer | Rui Hirokawa <hirokawa@php.net> | 2008-07-13 01:25:22 +0000 |
commit | 7aab46a2f1c5a4dd21029cd9f78e8f4a6bb3abb2 (patch) | |
tree | 27afeef14546945094becdbf874c7196de2ebda3 | |
parent | 64cf2c2f50bd0afc1ad72a5ce613b6c457bfd926 (diff) | |
download | php-git-7aab46a2f1c5a4dd21029cd9f78e8f4a6bb3abb2.tar.gz |
updated bundled oniguruma from 4.4.0 to 4.7.1
46 files changed, 1035 insertions, 1352 deletions
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index 6c824a697b..a1debefa49 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,5 +1,98 @@ History +2007/08/16: Version 4.7.1 + +2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/07/04: [spec] (thanks K.Takata) + ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl) +2007/07/04: [dist] (thanks K.Takata) + fix documents API and API.ja. + +2007/06/18: Version 4.7.0 + +2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/06/18: [bug] (thanks KUBO Takehiro) + WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint). +2007/06/05: [impl] add #ifndef vsnprintf in regint.h. +2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case + in onig_search(). + +2007/04/12: Version 4.6.2 + +2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000 + to 0x4000. +2007/03/26: [impl] add 'void' to function declarations. + +2007/03/06: Version 4.6.1 + +2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/03/06: [bug] add #include <malloc.h> for bcc32. + (In bcc32, alloca() is declared in malloc.h.) +2007/03/06: [impl] remove including version.h of Ruby. +2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search(). + ex. /\n\Z/.match("aaaaaaaaaa\n") +2007/03/02: [impl] move range > start check position in end_buf process. + +2007/02/08: Version 4.6.0 + +2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/01/09: [tune] select_opt_exact_info() didn't work for empty info. + ex. /.a/ make MAP info instead of EXACT info. +2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode. +2006/12/22: [spec] should check too short multibyte char in parse_exp(). + add USE_PAD_TO_SHORT_BYTE_CHAR. + ex. /\x00/ in UTF16 should be error. + +2006/11/17: Version 4.5.1 + +2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/15: [impl] remove CHECK_INTERRUPT. +2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e + should be [:punct:]. +2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER. +2006/11/07: [bug] (thanks Byte) + add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R. + +2006/11/06: Version 4.5.0 + +2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND. +2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of + the string range. + add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. + +2006/10/30: Version 4.4.6 + +2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/30: [impl] (thanks K.Takata) + add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END. +2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner) + invalid offset value was used in STATE_CHECK_BUFF_INIT(). + +2006/10/24: Version 4.4.5 + +2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/24: [impl] escape -Wall warning. +2006/10/24: [tune] (thanks Kornelius Kalnbach) + String#scan for long string needs long time compare with + old Ruby + by initialization time for combination explosion check + ex. ("test " * 100_000).scan(/\w*\s?/) + change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000. + reduce initialization area of state_check_buff. +2006/10/16: [bug] (thanks Akinori Musha) + first argument of rb_warn() should be format string. +2006/10/10: [impl] add msa.state_check_buff_size initialization + in onig_search(). +2006/10/10: [bug] should call onig_st_free_table() in + onig_free_shared_cclass_table(). +2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. +2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT(). + make valgrind happy. +2006/09/22: [impl] convert to ascii for parameter string in + onig_error_code_to_str(). + add enc member into OnigErrorInfo. + 2006/09/19: Version 4.4.4 2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. @@ -1717,15 +1810,6 @@ History [test: test] [memo: memo] -- -<branch> -svn mkdir http://localhost/repos/branches -m "" -svn mkdir http://localhost/repos/branches/oniguruma -m "" -svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only" - -<create tag> -svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX" - - <CVS: show all tags> cvs history -T diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README index f2cc7c9815..dff7fba562 100644 --- a/ext/mbstring/oniguruma/README +++ b/ext/mbstring/oniguruma/README @@ -1,9 +1,8 @@ -README 2006/05/15 +README 2007/06/18 Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> http://www.geocities.jp/kosako3/oniguruma/ -http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ Oniguruma is a regular expressions library. @@ -183,18 +182,8 @@ API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6 + re_alloc_pattern() is added. -ToDo - ? ignore case in full code point range of Unicode. - ? Unicode Property. - ? ambig-flag Katakana <-> Hiragana. - ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) - ?? \X (== \PM\pM*) - ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. - ?? variable line separator. - ?? transmission stopper. (return ONIG_STOP from match_at()) - -and I'm thankful to Akinori MUSHA. +I'm thankful to Akinori MUSHA. Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja index 2394e958ff..2dee793cae 100644 --- a/ext/mbstring/oniguruma/README.ja +++ b/ext/mbstring/oniguruma/README.ja @@ -1,9 +1,8 @@ -README.ja 2006/05/15 +README.ja 2007/06/18 鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> http://www.geocities.jp/kosako3/oniguruma/ -http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ 鬼車は正規表現ライブラリである。 @@ -187,18 +186,7 @@ Ruby 1.8/1.6の日本語化GNU regexとのAPIの違い + re_alloc_pattern() が追加された。 -残件 - - ? Unicode全コードポイント領域での大文字小文字照合 - ? Unicodeプロパティ - ? ambig-flag Katakana <-> Hiragana - ? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z) - ?? \X (== \PM\pM*) - ?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装 - ?? 改行文字(文字列)を変更できる - ?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す) - -and I'm thankful to Akinori MUSHA. +I'm thankful to Akinori MUSHA. アドレス: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API index 7374f65bd4..2f66287d49 100644 --- a/ext/mbstring/oniguruma/doc/API +++ b/ext/mbstring/oniguruma/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 4.1.0 2006/05/15 +Oniguruma API Version 4.7.1 2007/07/04 #include <oniguruma.h> @@ -63,7 +63,7 @@ Oniguruma API Version 4.1.0 2006/05/15 4 option: compile time options. ONIG_OPTION_NONE no option - ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z' + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' ONIG_OPTION_MULTILINE '.' match with newline ONIG_OPTION_IGNORECASE ambiguity match on ONIG_OPTION_EXTEND extended pattern form @@ -159,7 +159,6 @@ Oniguruma API Version 4.1.0 2006/05/15 ONIGENC_AMBIGUOUS_MATCH_NONE: exact ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII - ONIGENC_AMBIGUOUS_MATCH_COMPOUND: grapheme cluster as a char ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) onig_set_default_ambig_flag() diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja index 2682da4808..f2a8bd6f10 100644 --- a/ext/mbstring/oniguruma/doc/API.ja +++ b/ext/mbstring/oniguruma/doc/API.ja @@ -1,4 +1,4 @@ -鬼車インターフェース Version 4.1.0 2006/05/15 +鬼車インターフェース Version 4.7.1 2007/07/04 #include <oniguruma.h> @@ -64,7 +64,7 @@ 4 option: 正規表現コンパイル時オプション ONIG_OPTION_NONE オプションなし - ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z' + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' ONIG_OPTION_MULTILINE '.'が改行にマッチする ONIG_OPTION_IGNORECASE 曖昧マッチ オン ONIG_OPTION_EXTEND パターン拡張形式 @@ -159,7 +159,6 @@ ONIGENC_AMBIGUOUS_MATCH_NONE: 曖昧無し ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCIIの大文字小文字 ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII以外の大文字小文字 - ONIGENC_AMBIGUOUS_MATCH_COMPOUND: 合成文字 ONIGENC_AMBIGUOUS_MATCH_FULL: 全ての曖昧フラグ有効 ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII) onig_set_default_ambig_flag() diff --git a/ext/mbstring/oniguruma/doc/FAQ b/ext/mbstring/oniguruma/doc/FAQ index 1621a359eb..dccf242c8d 100644 --- a/ext/mbstring/oniguruma/doc/FAQ +++ b/ext/mbstring/oniguruma/doc/FAQ @@ -1,4 +1,4 @@ -FAQ 2006/05/15 +FAQ 2006/10/30 1. Lognest match @@ -19,6 +19,10 @@ FAQ 2006/05/15 THREAD_ATOMIC_END THREAD_PASS + THREAD_SYSTEM_INIT + THREAD_SYSTEM_END + + (B) Application Layer The plural threads should not do simultaneously that making diff --git a/ext/mbstring/oniguruma/doc/FAQ.ja b/ext/mbstring/oniguruma/doc/FAQ.ja index 5f61b09554..5582765ee6 100644 --- a/ext/mbstring/oniguruma/doc/FAQ.ja +++ b/ext/mbstring/oniguruma/doc/FAQ.ja @@ -1,4 +1,4 @@ -FAQ 2006/05/15 +FAQ 2006/10/30 1. 最長マッチ @@ -20,6 +20,11 @@ FAQ 2006/05/15 THREAD_ATOMIC_END THREAD_PASS + 何らかの初期化/終了処理が必要であれば、以下のマクロに定義する。 + THREAD_SYSTEM_INIT + THREAD_SYSTEM_END + + (B) Application Layer 同時に複数のスレッドが、正規表現オブジェクトを作成する、 @@ -85,6 +90,8 @@ Rubyは自分自身で独自のスレッド機能を実装しています。 なります。 #define USE_MULTI_THREAD_SYSTEM +#define THREAD_SYSTEM_INIT +#define THREAD_SYSTEM_END #define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_PASS rb_thread_schedule() diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c index 4dd708d841..5646f26c10 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_1.c +++ b/ext/mbstring/oniguruma/enc/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_1_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -72,16 +72,6 @@ iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* e { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -101,22 +91,6 @@ iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -153,8 +127,7 @@ OnigEncodingType OnigEncodingISO_8859_1 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c index e317f49752..8081ef8010 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_10.c +++ b/ext/mbstring/oniguruma/enc/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_10_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_10_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -302,8 +276,7 @@ OnigEncodingType OnigEncodingISO_8859_10 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c index 6afaa27f41..de9bb3b825 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_11.c +++ b/ext/mbstring/oniguruma/enc/iso8859_11.c @@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_11_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c index abd7644527..69316edfc3 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_13.c +++ b/ext/mbstring/oniguruma/enc/iso8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_13_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_13_CtypeTable[256] = { }; static int -iso_8859_13_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_13_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_13_CTYPE(code, ctype); @@ -185,74 +158,73 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { - static const OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { *ccs = OnigAsciiPairAmbigCodes; @@ -272,8 +244,7 @@ OnigEncodingType OnigEncodingISO_8859_13 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -286,11 +257,11 @@ OnigEncodingType OnigEncodingISO_8859_13 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_13_mbc_to_normalize, - iso_8859_13_is_mbc_ambiguous, - iso_8859_13_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_13_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c index d76771a1cf..44638cf13a 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_14.c +++ b/ext/mbstring/oniguruma/enc/iso8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_14_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_14_CtypeTable[256] = { }; static int -iso_8859_14_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_14_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_14_CTYPE(code, ctype); @@ -185,103 +158,102 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa4, 0xa5 }, - { 0xa5, 0xa4 }, - { 0xa6, 0xab }, - { 0xa8, 0xb8 }, - { 0xaa, 0xba }, - { 0xab, 0xa6 }, - { 0xac, 0xbc }, - { 0xaf, 0xff }, + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa4, 0xa5 }, + { 0xa5, 0xa4 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xab, 0xa6 }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, - { 0xb0, 0xb1 }, - { 0xb1, 0xb0 }, - { 0xb2, 0xb3 }, - { 0xb3, 0xb2 }, - { 0xb4, 0xb5 }, - { 0xb5, 0xb4 }, - { 0xb7, 0xb9 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xb7 }, - { 0xba, 0xaa }, - { 0xbb, 0xbf }, - { 0xbc, 0xac }, - { 0xbd, 0xbe }, - { 0xbe, 0xbd }, - { 0xbf, 0xbb }, + { 0xb0, 0xb1 }, + { 0xb1, 0xb0 }, + { 0xb2, 0xb3 }, + { 0xb3, 0xb2 }, + { 0xb4, 0xb5 }, + { 0xb5, 0xb4 }, + { 0xb7, 0xb9 }, + { 0xb8, 0xa8 }, + { 0xb9, 0xb7 }, + { 0xba, 0xaa }, + { 0xbb, 0xbf }, + { 0xbc, 0xac }, + { 0xbd, 0xbe }, + { 0xbe, 0xbd }, + { 0xbf, 0xbb }, - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xaf } + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xaf } }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { @@ -302,8 +274,7 @@ OnigEncodingType OnigEncodingISO_8859_14 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -316,11 +287,11 @@ OnigEncodingType OnigEncodingISO_8859_14 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_14_mbc_to_normalize, - iso_8859_14_is_mbc_ambiguous, - iso_8859_14_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_14_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c index d6611ed290..f643b895df 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_15.c +++ b/ext/mbstring/oniguruma/enc/iso8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_15_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_15_CtypeTable[256] = { }; static int -iso_8859_15_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_15_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_15_CTYPE(code, ctype); @@ -185,8 +158,8 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) { static const OnigPairAmbigCodes cc[] = { { 0xa6, 0xa8 }, @@ -282,8 +255,7 @@ OnigEncodingType OnigEncodingISO_8859_15 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -296,11 +268,11 @@ OnigEncodingType OnigEncodingISO_8859_15 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_15_mbc_to_normalize, - iso_8859_15_is_mbc_ambiguous, - iso_8859_15_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_15_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c index 23b868065c..921ae36d9d 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_16.c +++ b/ext/mbstring/oniguruma/enc/iso8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_16_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_16_CtypeTable[256] = { }; static int -iso_8859_16_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_16_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_16_CTYPE(code, ctype); @@ -185,97 +158,96 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag, - const OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { static const OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa3, 0xb3 }, - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - { 0xaa, 0xba }, - { 0xac, 0xae }, - { 0xae, 0xac }, - { 0xaf, 0xbf }, + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xa8, 0xa6 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xae, 0xac }, + { 0xaf, 0xbf }, - { 0xb2, 0xb9 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xb9, 0xb2 }, - { 0xba, 0xaa }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - { 0xbf, 0xaf }, + { 0xb2, 0xb9 }, + { 0xb3, 0xa3 }, + { 0xb4, 0xb8 }, + { 0xb8, 0xb4 }, + { 0xb9, 0xb2 }, + { 0xba, 0xaa }, + { 0xbc, 0xbd }, + { 0xbd, 0xbc }, + { 0xbe, 0xff }, + { 0xbf, 0xaf }, - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xbe } }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { @@ -296,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_16 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -310,11 +281,11 @@ OnigEncodingType OnigEncodingISO_8859_16 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_16_mbc_to_normalize, - iso_8859_16_is_mbc_ambiguous, - iso_8859_16_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_16_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c index 5f21ff78ae..f8cb3756f2 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_2.c +++ b/ext/mbstring/oniguruma/enc/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_2_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_2_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -294,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_2 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c index 9ac3dab179..e62d20de7b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_3.c +++ b/ext/mbstring/oniguruma/enc/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_3_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_3_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -283,8 +257,7 @@ OnigEncodingType OnigEncodingISO_8859_3 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c index c54a2fa149..dd6bd7dfe3 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_4.c +++ b/ext/mbstring/oniguruma/enc/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_4_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_4_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -292,8 +266,7 @@ OnigEncodingType OnigEncodingISO_8859_4 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c index 5b941e2eb9..87b7fb8a29 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_5.c +++ b/ext/mbstring/oniguruma/enc/iso8859_5.c @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_5_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c index bb5515d30b..fffcd0e7d1 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_6.c +++ b/ext/mbstring/oniguruma/enc/iso8859_6.c @@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_6_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c index 2529dae666..e87661d84b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_7.c +++ b/ext/mbstring/oniguruma/enc/iso8859_7.c @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_7_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c index d7f0fc5947..e76966c667 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_8.c +++ b/ext/mbstring/oniguruma/enc/iso8859_8.c @@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_8_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c index f4bcac1ae3..16a30c5f24 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_9.c +++ b/ext/mbstring/oniguruma/enc/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_9_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_9_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -272,8 +246,7 @@ OnigEncodingType OnigEncodingISO_8859_9 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c index 27f97f3072..d7277e862e 100644 --- a/ext/mbstring/oniguruma/enc/koi8.c +++ b/ext/mbstring/oniguruma/enc/koi8.c @@ -73,18 +73,18 @@ static const unsigned short EncKOI8_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c index d2a4440f2c..1010f5ff93 100644 --- a/ext/mbstring/oniguruma/enc/koi8_r.c +++ b/ext/mbstring/oniguruma/enc/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,18 +73,18 @@ static const unsigned short EncKOI8_R_CtypeTable[256] = { 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, @@ -153,6 +153,9 @@ koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { static const OnigPairAmbigCodes cc[] = { + { 0xa3, 0xb3 }, + { 0xb3, 0xa3 }, + { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c index 6b9ef4c5b5..fcf057423c 100644 --- a/ext/mbstring/oniguruma/enc/mktable.c +++ b/ext/mbstring/oniguruma/enc/mktable.c @@ -2,7 +2,7 @@ mktable.c **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ * SUCH DAMAGE. */ +#include <stdlib.h> #include <stdio.h> #define NOT_RUBY @@ -614,15 +615,10 @@ static int IsPunct(int enc, int c) if (c >= 0x3c && c <= 0x3e) return 1; } - if (c >= 0x21 && c <= 0x23) return 1; - if (c >= 0x25 && c <= 0x2a) return 1; - if (c >= 0x2c && c <= 0x2f) return 1; - if (c >= 0x3a && c <= 0x3b) return 1; - if (c >= 0x3f && c <= 0x40) return 1; - if (c >= 0x5b && c <= 0x5d) return 1; - if (c == 0x5f) return 1; - if (c == 0x7b) return 1; - if (c == 0x7d) return 1; + if (c >= 0x21 && c <= 0x2f) return 1; + if (c >= 0x3a && c <= 0x40) return 1; + if (c >= 0x5b && c <= 0x60) return 1; + if (c >= 0x7b && c <= 0x7e) return 1; switch (enc) { case ISO_8859_1: diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c index 536d107d6d..6ab80a6c1c 100755 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -126,18 +126,6 @@ utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, if (*p == 0) { p++; - if (end > p + 2 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 4; - return 2; - } - *lower++ = '\0'; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -177,20 +165,6 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 2 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - (*pp) += 2; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -234,8 +208,7 @@ OnigEncodingType OnigEncodingUTF16_BE = { 4, /* max byte length */ 2, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c index cefcb60ad1..2248e4910f 100755 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -128,18 +128,6 @@ utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, const UChar* p = *pp; if (*(p+1) == 0) { - if (end > p + 3 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - *lower++ = 0xdf; - *lower = '\0'; - (*pp) += 4; - return 2; - } - *(lower+1) = '\0'; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,17 +164,6 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0) { int c, v; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 3 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - (*pp) += 2; - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -229,8 +206,7 @@ OnigEncodingType OnigEncodingUTF16_LE = { 4, /* max byte length */ 2, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c index c1f0668977..75133ca262 100755 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -85,20 +85,6 @@ utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { p += 3; - if (end > p + 4 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - *lower++ = '\0'; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 8; - return 4; - } - *lower++ = '\0'; *lower++ = '\0'; *lower++ = '\0'; @@ -139,20 +125,6 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p += 3; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 4 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -191,8 +163,7 @@ OnigEncodingType OnigEncodingUTF32_BE = { 4, /* max byte length */ 4, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c index c10cbfa777..21dca10c11 100755 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -84,20 +84,6 @@ utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, const UChar* p = *pp; if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { - if (end > p + 7 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - *lower++ = 0xdf; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = '\0'; - (*pp) += 8; - return 4; - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -137,20 +123,6 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { int c, v; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 7 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -189,8 +161,7 @@ OnigEncodingType OnigEncodingUTF32_LE = { 4, /* max byte length */ 4, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c index 42ce807f91..c7481d7050 100644 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -223,17 +223,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); } @@ -258,15 +247,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC return 2; } } -#if 0 - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } -#endif } } @@ -288,15 +268,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - (*pp) += 2; - return TRUE; - } - (*pp)++; if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); @@ -318,10 +289,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) return TRUE; } } - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - return TRUE; - } } } } @@ -3739,8 +3706,7 @@ OnigEncodingType OnigEncodingUTF8 = { 6, /* max byte length */ 1, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index fbf4fc0955..d55f1cc94f 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -5,53 +5,50 @@ </head> <body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969"> -<h1>Oniguruma</h1> +<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -2006/09/19 (C) K.Kosako +(c) K.Kosako, updated at: 2007/08/16 </p> +<dl> +<font color="orange"> +<dt><b>What's new</b> +</font> +<ul> +<li>2007/08/16: Version 4.7.1 released.</li> +<li>2007/07/14: Version 5.9.0 released.</li> +<li>2007/06/20: Version 2.5.9 released.</li> +<li>2007/06/20: Maintainer of 2.x was changed.</li> +</ul> +</dl> +<hr> + <p> Oniguruma is a regular expressions library.<br> The characteristics of this library is that different character encoding <br>for every regular expression object can be specified. +<br>(supported APIs: GNU regex, POSIX and Oniguruma native) </p> <dl> <dt><b>Supported character encodings:</b><br> ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br> EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br> -Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,<br> +Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br> ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br> ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br> ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br> -<font color="red"> -(GB 18030 encoding was contributed by KUBO Takehiro) -</font> -</p> -</dl> -<p> - -<dl> <font color="orange"> -<dt><b>What's new</b> +(GB18030 encoding was contributed by KUBO Takehiro)<br> +(CP1251 encoding was contributed by Byte) </font> -<ul> -<li>Version 4.4.4 released. (2006/09/19) -<li>Version 2.5.7 released. (2006/07/28) -</ul> +</p> </dl> <hr> -<dl> -<dt>There are two ways of using of it in this program. -<ul> -<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native) -<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br> - In Ruby 1.9, Oniguruma is already integrated by Kazuo Saito. -</ul> -</dl> +<dt><b>License:</b> BSD license. <dl> <dt><b>Platform:</b> @@ -62,32 +59,27 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br> </ul> <br> -<dt><b>License:</b><br> -When this software is partly used or it is distributed with Ruby, -this of Ruby follows the license of Ruby.<br> -It follows the BSD license in the case of the one except for it. -</p> <dt><b>Download:</b> <ul> -<li> <a href="archive/onig-4.4.4.tar.gz">Latest release version 4.4.4</a> (2006/09/19) <a href="HISTORY_4X.txt">Change Log</a> -<li> <a href="archive/onig-4.4.3.tar.gz">4.4.3</a> (2006/09/15) -<li> <a href="archive/onig-4.4.2.tar.gz">4.4.2</a> (2006/09/08) -<li> <a href="archive/onig-4.4.1.tar.gz">4.4.1</a> (2006/08/29) -<li> <a href="archive/onigd2_5_7.tar.gz">Latest release version 2.5.7</a> (2006/07/28) <a href="HISTORY_2X.txt">Change Log</a> -<li> <a href="archive/onigd2_5_6.tar.gz">2.5.6</a> (2006/05/29) -<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08) +<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14) <a href="HISTORY_5X.txt">Change Log</a> +<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04) +<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27) +<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a> +<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18) +<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a> </ul> <br> <font color="red"> -* 4.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br> -* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.[2-4] +Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.<br> +About 2.x, please contact him.<br> </font> +* 5.x supports Unicode Property/Script.<br> +* 2.x supports Ruby1.6/1.8.<br> <br> -<br> -<dt><b>Documents:</b> (version 4.4.4) +<dt><b>Documents:</b> (version 5.9.0) <ul> <li> <a href="doc/RE.txt">Regular Expressions</a> <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a> @@ -103,63 +95,66 @@ It follows the BSD license in the case of the one except for it. </ul> <br> -<dt><b>Links:</b> +<dt><b>Site Links:</b> <ul> -<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version) -<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive) <li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a> <li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page) -<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll</a> (Japanese page) +</ul> + +<br> +<dt><b>Links:</b> +<ul> +<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page) <li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page) -<li> <a href="http://kmaebashi.com/">new script language crowbar</a> (Japanese page) +<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page) +<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a> <li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page) <li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page) +<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page) <li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog) <li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a> +<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page) <li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a> -<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page) +<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page) <li> <a href="http://limechat.net/">LimeChat</a> (Japanese page) +<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page) +<li> <a href="http://monaos.org/">Mona OS</a> +<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page) <li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page) <li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page) <li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page) <li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page) -<li> <a href="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a> +<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a> <li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a> <li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page) <li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page) +<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding <li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page) <li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page) <li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page) +<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page) <li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page) -<li> <a href="http://www.ruby-lang.org/">Ruby</a> <li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page) <li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page) <li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page) -<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm</a> -<li> <a href="http://macromates.com/">TextMate (Mac OS X)</a> +<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a> <li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a> <li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a> <li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a> -<li> <a href="http://www.yokkasoft.net/">YokkaSoft</a> +<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page) </ul> <br> <dt><b>References:</b> <ul> <li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page) -<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a> +<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a> <li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a> <li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a> <li> <a href="http://regex.info/">Mastering Regular Expressions</a> <li> <a href="http://www.unicode.org/">Unicode Home Page</a> -</ul> - -<br> -<dt><b>Resources:</b> -<ul> <li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page) <li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page) -<li> <a href="http://staff.aist.go.jp/tanaka-akira/textprocess/">"Text Processing" Lecture documents (Tanaka Akira)</a> (Japanese page) </ul> <br> @@ -173,11 +168,13 @@ and I'm thankful to Akinori MUSHA. <dt><b>Other Libraries:</b> <ul> <li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a> -<li> <a href="http://www.pcre.org/">PCRE</a> <li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a> +<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a> +<li> <a href="http://www.pcre.org/">PCRE</a> <li> <a href="http://re2c.org/">re2c</a> <li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a> <li> <a href="http://laurikari.net/tre/">TRE</a> +<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a> <li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a> <li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a> <li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a> diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index 5fb6ec154c..68c8fdf834 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 4 -#define ONIGURUMA_VERSION_MINOR 4 -#define ONIGURUMA_VERSION_TEENY 4 +#define ONIGURUMA_VERSION_MINOR 7 +#define ONIGURUMA_VERSION_TEENY 1 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -106,17 +106,11 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag; #define ONIGENC_AMBIGUOUS_MATCH_NONE 0 #define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) #define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1) -/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */ -/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */ -/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */ #define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1) -#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30) #define ONIGENC_AMBIGUOUS_MATCH_FULL \ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \ - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \ - ONIGENC_AMBIGUOUS_MATCH_COMPOUND ) + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ) #define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag @@ -427,11 +421,11 @@ OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const /* encoding API */ ONIG_EXTERN -int onigenc_init P_(()); +int onigenc_init P_((void)); ONIG_EXTERN int onigenc_set_default_encoding P_((OnigEncoding enc)); ONIG_EXTERN -OnigEncoding onigenc_get_default_encoding P_(()); +OnigEncoding onigenc_get_default_encoding P_((void)); ONIG_EXTERN void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); ONIG_EXTERN @@ -717,6 +711,7 @@ struct re_registers { typedef struct re_registers OnigRegion; typedef struct { + OnigEncoding enc; OnigUChar* par; OnigUChar* par_end; } OnigErrorInfo; @@ -889,7 +884,7 @@ int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint co ONIG_EXTERN void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); ONIG_EXTERN -OnigAmbigType onig_get_default_ambig_flag P_(()); +OnigAmbigType onig_get_default_ambig_flag P_((void)); ONIG_EXTERN int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag)); ONIG_EXTERN diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index a679a20b26..6a0976dee2 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +34,7 @@ OnigAmbigType OnigDefaultAmbigFlag = ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); extern OnigAmbigType -onig_get_default_ambig_flag() +onig_get_default_ambig_flag(void) { return OnigDefaultAmbigFlag; } @@ -47,10 +47,6 @@ onig_set_default_ambig_flag(OnigAmbigType ambig_flag) } -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - static UChar* k_strdup(UChar* s, UChar* end) { @@ -539,6 +535,8 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) add_length(reg, mbuf->used); return add_bytes(reg, mbuf->p, mbuf->used); #else + static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; + int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; @@ -660,7 +658,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -684,7 +682,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, #ifdef USE_SUBEXP_CALL reg->num_call > 0 || #endif - IS_QUALIFIER_IN_REPEAT(qn)) { + IS_QUANTIFIER_IN_REPEAT(qn)) { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); } else { @@ -696,7 +694,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, } static int -is_anychar_star_qualifier(QualifierNode* qn) +is_anychar_star_quantifier(QuantifierNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && NTYPE(qn->target) == N_ANYCHAR) @@ -705,13 +703,13 @@ is_anychar_star_qualifier(QualifierNode* qn) return 0; } -#define QUALIFIER_EXPAND_LIMIT_SIZE 50 +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 #define CKN_ON (ckn > 0) #ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) { int len, mod_tlen, cklen; int ckn; @@ -791,7 +789,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } static int -compile_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) { int r, mod_tlen; int ckn; @@ -803,7 +801,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - if (is_anychar_star_qualifier(qn)) { + if (is_anychar_star_quantifier(qn)) { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { @@ -945,7 +943,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) #else /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -970,8 +968,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen = tlen; if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = SIZE_OP_JUMP; } else { @@ -994,7 +992,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { len = tlen * qn->lower; len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); } @@ -1010,7 +1008,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } static int -compile_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -1019,7 +1017,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) if (tlen < 0) return tlen; - if (is_anychar_star_qualifier(qn)) { + if (is_anychar_star_quantifier(qn)) { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1044,8 +1042,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen = tlen; if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); @@ -1109,7 +1107,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; r = compile_tree_n_times(qn->target, qn->lower, reg); @@ -1227,7 +1225,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QualifierNode* qn = &NQUALIFIER(node->target); + QuantifierNode* qn = &NQUANTIFIER(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -1317,7 +1315,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QualifierNode* qn = &NQUALIFIER(node->target); + QuantifierNode* qn = &NQUANTIFIER(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; @@ -1540,8 +1538,8 @@ compile_length_tree(Node* node, regex_t* reg) break; #endif - case N_QUALIFIER: - r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg); + case N_QUANTIFIER: + r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); break; case N_EFFECT: @@ -1703,8 +1701,8 @@ compile_tree(Node* node, regex_t* reg) break; #endif - case N_QUALIFIER: - r = compile_qualifier_node(&(NQUALIFIER(node)), reg); + case N_QUANTIFIER: + r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); break; case N_EFFECT: @@ -1741,13 +1739,13 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: + case N_QUANTIFIER: { - Node** ptarget = &(NQUALIFIER(node).target); + Node** ptarget = &(NQUANTIFIER(node).target); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) { - onig_reduce_nested_qualifier(node, *ptarget); + if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + onig_reduce_nested_quantifier(node, *ptarget); } } break; @@ -1821,8 +1819,8 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_by_map(NCONS(node).left, map); } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = renumber_by_map(NQUALIFIER(node).target, map); + case N_QUANTIFIER: + r = renumber_by_map(NQUANTIFIER(node).target, map); break; case N_EFFECT: r = renumber_by_map(NEFFECT(node).target, map); @@ -1851,8 +1849,8 @@ numbered_ref_check(Node* node) r = numbered_ref_check(NCONS(node).left); } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = numbered_ref_check(NQUALIFIER(node).target); + case N_QUANTIFIER: + r = numbered_ref_check(NQUANTIFIER(node).target); break; case N_EFFECT: r = numbered_ref_check(NEFFECT(node).target); @@ -1933,7 +1931,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK static int -qualifiers_memory_node_info(Node* node) +quantifiers_memory_node_info(Node* node) { int r = 0; @@ -1943,7 +1941,7 @@ qualifiers_memory_node_info(Node* node) { int v; do { - v = qualifiers_memory_node_info(NCONS(node).left); + v = quantifiers_memory_node_info(NCONS(node).left); if (v > r) r = v; } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); } @@ -1955,15 +1953,15 @@ qualifiers_memory_node_info(Node* node) return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ } else - r = qualifiers_memory_node_info(NCALL(node).target); + r = quantifiers_memory_node_info(NCALL(node).target); break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->upper != 0) { - r = qualifiers_memory_node_info(qn->target); + r = quantifiers_memory_node_info(qn->target); } } break; @@ -1978,7 +1976,7 @@ qualifiers_memory_node_info(Node* node) case EFFECT_OPTION: case EFFECT_STOP_BACKTRACK: - r = qualifiers_memory_node_info(en->target); + r = quantifiers_memory_node_info(en->target); break; default: break; @@ -2083,9 +2081,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) *min = 1; break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower > 0) { r = get_min_match_length(qn->target, min, env); @@ -2204,9 +2202,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->upper != 0) { r = get_max_match_length(qn->target, max, env); @@ -2311,9 +2309,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower == qn->upper) { r = get_char_length_tree1(qn->target, reg, &tlen, level); if (r == 0) @@ -2623,9 +2621,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; @@ -2686,8 +2684,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask, + case N_QUANTIFIER: + r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, anchor_mask); break; @@ -2762,10 +2760,10 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } break; - case N_QUALIFIER: - r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head); + case N_QUANTIFIER: + r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); if (r == RECURSION_EXIST) { - if (NQUALIFIER(node).lower == 0) r = 0; + if (NQUANTIFIER(node).lower == 0) r = 0; } break; @@ -2821,8 +2819,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env); + case N_QUANTIFIER: + r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); break; case N_ANCHOR: @@ -2876,8 +2874,8 @@ subexp_recursive_check(Node* node) } while (IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = subexp_recursive_check(NQUALIFIER(node).target); + case N_QUANTIFIER: + r = subexp_recursive_check(NQUANTIFIER(node).target); break; case N_ANCHOR: @@ -2941,11 +2939,11 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_QUALIFIER: - r = subexp_recursive_check_trav(NQUALIFIER(node).target, env); - if (NQUALIFIER(node).upper == 0) { + case N_QUANTIFIER: + r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); + if (NQUANTIFIER(node).upper == 0) { if (r == FOUND_CALLED_NODE) - NQUALIFIER(node).is_refered = 1; + NQUANTIFIER(node).is_refered = 1; } break; @@ -3008,8 +3006,8 @@ setup_subexp_call(Node* node, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = setup_subexp_call(NQUALIFIER(node).target, env); + case N_QUANTIFIER: + r = setup_subexp_call(NQUANTIFIER(node).target, env); break; case N_EFFECT: r = setup_subexp_call(NEFFECT(node).target, env); @@ -3158,10 +3156,10 @@ next_setup(Node* node, Node* next_node, regex_t* reg) retry: type = NTYPE(node); - if (type == N_QUALIFIER) { - QualifierNode* qn = &(NQUALIFIER(node)); + if (type == N_QUANTIFIER) { + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QUALIFIER_PEEK_NEXT +#ifdef USE_QUANTIFIER_PEEK_NEXT qn->next_head_exact = get_head_value_node(next_node, 1, reg); #endif /* automatic posseivation a*b ==> (?>a*)b */ @@ -3327,11 +3325,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { int child_state = state; int add_state = 0; - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); Node* target = qn->target; int var_num; @@ -3345,8 +3343,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) if (NTYPE(qn->target) == N_EFFECT) { EffectNode* en = &(NEFFECT(qn->target)); if (en->type == EFFECT_MEMORY) { - if (NTYPE(en->target) == N_QUALIFIER) { - QualifierNode* q = &(NQUALIFIER(en->target)); + if (NTYPE(en->target) == N_QUANTIFIER) { + QuantifierNode* q = &(NQUANTIFIER(en->target)); if (IS_REPEAT_INFINITE(q->upper) && q->greedy == qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); @@ -3509,10 +3507,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { OnigDistance d; - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); Node* target = qn->target; if ((state & IN_REPEAT) != 0) { @@ -3525,7 +3523,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (d == 0) { qn->target_empty_info = NQ_TARGET_IS_EMPTY; #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK - r = qualifiers_memory_node_info(target); + r = quantifiers_memory_node_info(target); if (r < 0) break; if (r > 0) { qn->target_empty_info = r; @@ -3567,15 +3565,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r) break; } onig_node_free(target); - break; /* break case N_QUALIFIER: */ + break; /* break case N_QUANTIFIER: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == N_QUALIFIER) { - QualifierNode* tqn = &(NQUALIFIER(target)); + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); if (IS_NOT_NULL(tqn->head_exact)) { qn->head_exact = tqn->head_exact; tqn->head_exact = NULL; @@ -3615,8 +3613,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) { Node* target = en->target; r = setup_tree(target, reg, state, env); - if (NTYPE(target) == N_QUALIFIER) { - QualifierNode* tqn = &(NQUALIFIER(target)); + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ int qtype = NTYPE(tqn->target); @@ -3645,7 +3643,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ - N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL ) + N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) #define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) #define ALLOWED_EFFECT_IN_LB_NOT 0 @@ -4080,7 +4078,14 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) v1 = now->len; v2 = alt->len; - if (v1 <= 2 && v2 <= 2) { + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { /* ByteValTable[x] is big value --> low price */ v2 = map_position_value(enc, now->s[0]); v1 = map_position_value(enc, alt->s[0]); @@ -4143,10 +4148,9 @@ static int add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, OnigEncoding enc, OnigAmbigType ambig_flag) { - int i, j, n, len; + int i, n, len; UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - OnigCodePoint code, ccode; - const OnigCompAmbigCodes* ccs; + OnigCodePoint code; const OnigPairAmbigCodes* pccs; OnigAmbigType amb; @@ -4164,21 +4168,6 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, add_char_opt_map_info(map, buf[0], enc); } } - - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); - for (i = 0; i < n; i++) { - if (ccs[i].code == code) { - for (j = 0; j < ccs[i].n; j++) { - ccode = ccs[i].items[j].code[0]; - len = ONIGENC_CODE_TO_MBC(enc, ccode, buf); - if (len < 0) return len; - add_char_opt_map_info(map, buf[0], enc); - } - break; - } - } - } } return 0; } @@ -4572,12 +4561,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { int i; OnigDistance min, max; NodeOptInfo nopt; - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); r = optimize_node_left(qn->target, &nopt, env); if (r) break; @@ -4831,6 +4820,38 @@ clear_optimize_info(regex_t* reg) #ifdef ONIG_DEBUG +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enc_len(enc, p); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + static void print_distance_range(FILE* f, OnigDistance a, OnigDistance b) { @@ -5122,6 +5143,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->state = ONIG_STATE_COMPILING; +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + if (reg->alloc == 0) { init_size = (pattern_end - pattern) * 2; if (init_size <= 0) init_size = COMPILE_INIT_SIZE; @@ -5277,6 +5302,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, err: if (IS_NOT_NULL(scan_env.error)) { if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; einfo->par = scan_env.error; einfo->par_end = scan_env.error_end; } @@ -5379,13 +5405,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, } extern int -onig_init() +onig_init(void) { if (onig_inited != 0) return 0; onig_inited = 1; + THREAD_SYSTEM_INIT; THREAD_ATOMIC_START; onigenc_init(); @@ -5401,9 +5428,9 @@ onig_init() extern int -onig_end() +onig_end(void) { - extern int onig_free_shared_cclass_table(); + extern int onig_free_shared_cclass_table(void); THREAD_ATOMIC_START; @@ -5422,6 +5449,7 @@ onig_end() onig_inited = 0; THREAD_ATOMIC_END; + THREAD_SYSTEM_END; return 0; } @@ -5470,8 +5498,6 @@ OnigOpInfoType OnigOpInfo[] = { { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, { OP_WORD, "word", ARG_NON }, { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_SB, "word-sb", ARG_NON }, - { OP_WORD_MB, "word-mb", ARG_NON }, { OP_WORD_BOUND, "word-bound", ARG_NON }, { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, { OP_WORD_BEGIN, "word-begin", ARG_NON }, @@ -5969,11 +5995,11 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #endif - case N_QUALIFIER: - fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node, - NQUALIFIER(node).lower, NQUALIFIER(node).upper, - (NQUALIFIER(node).greedy ? "" : "?")); - print_indent_tree(f, NQUALIFIER(node).target, indent + add); + case N_QUANTIFIER: + fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, + NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, + (NQUANTIFIER(node).greedy ? "" : "?")); + print_indent_tree(f, NQUANTIFIER(node).target, indent + add); break; case N_EFFECT: @@ -6002,7 +6028,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } - if (type != N_LIST && type != N_ALT && type != N_QUALIFIER && + if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && type != N_EFFECT) fprintf(f, "\n"); fflush(f); diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c index bbbf1a2f94..958917e122 100644 --- a/ext/mbstring/oniguruma/regenc.c +++ b/ext/mbstring/oniguruma/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,13 +32,13 @@ OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; extern int -onigenc_init() +onigenc_init(void) { return 0; } extern OnigEncoding -onigenc_get_default_encoding() +onigenc_get_default_encoding(void) { return OnigEncDefaultCharEncoding; } diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h index 833dd2af46..58ee3e7f22 100644 --- a/ext/mbstring/oniguruma/regenc.h +++ b/ext/mbstring/oniguruma/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index 46dc6ae15a..d6ec91856d 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -183,6 +183,48 @@ onig_error_code_to_format(int code) } +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (len + 5 <= buf_size) { + sprintf((char* )(&(buf[len])), "\\%03o", + (unsigned int)(code & 0377)); + len += 5; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enc_len(enc, p); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + /* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 @@ -198,7 +240,8 @@ onig_error_code_to_str(s, code, va_alist) { UChar *p, *q; OnigErrorInfo* einfo; - int len; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; va_list vargs; va_init_list(vargs, code); @@ -212,23 +255,20 @@ onig_error_code_to_str(s, code, va_alist) case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: case ONIGERR_INVALID_CHAR_PROPERTY_NAME: einfo = va_arg(vargs, OnigErrorInfo*); - len = einfo->par_end - einfo->par; + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { q++; if (*q == 'n') { /* '%n': name */ - if (len > MAX_ERROR_PAR_LEN) { - xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3); - p += (MAX_ERROR_PAR_LEN - 3); + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { xmemcpy(p, "...", 3); p += 3; } - else { - xmemcpy(p, einfo->par, len); - p += len; - } q++; } else @@ -278,9 +318,6 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) va_init_list(args, fmt); n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); - if (n < 0 || n >= bufsize) { - n = bufsize - 1; - } va_end(args); need = (pat_end - pat) * 4 + 4; diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index a1685c8c65..918aa67aa8 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -76,7 +76,7 @@ history_root_free(OnigRegion* r) } static OnigCaptureTreeNode* -history_node_new() +history_node_new(void) { OnigCaptureTreeNode* node; @@ -233,7 +233,7 @@ onig_region_init(OnigRegion* region) } extern OnigRegion* -onig_region_new() +onig_region_new(void) { OnigRegion* r; @@ -371,37 +371,59 @@ typedef struct { OnigOptionType options; OnigRegion* region; const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif #ifdef USE_COMBINATION_EXPLOSION_CHECK void* state_check_buff; int state_check_buff_size; #endif } MatchArg; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while (0) +#else #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ } while (0) +#endif #ifdef USE_COMBINATION_EXPLOSION_CHECK #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 -#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\ - (msa).state_check_buff_size = size; \ - if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ (msa).state_check_buff = (void* )xmalloc(size);\ else \ (msa).state_check_buff = (void* )xalloca(size);\ - xmemset((msa).state_check_buff, 0, (size_t )size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ }\ }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ } while (0) #define MATCH_ARG_FREE(msa) do {\ @@ -411,7 +433,7 @@ typedef struct { }\ } while (0); #else -#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) #endif @@ -1283,14 +1305,14 @@ static int MaxStackDepth = 0; /* * :nodoc: */ -static VALUE onig_stat_print() +static VALUE onig_stat_print(void) { onig_print_statistics(stderr); return Qnil; } #endif -extern void onig_statistics_init() +extern void onig_statistics_init(void) { int i; for (i = 0; i < 256; i++) { @@ -1476,8 +1498,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, case OP_END: STAT_OP_IN(OP_END); n = s - sstart; if (n > best_len) { - OnigRegion* region = msa->region; + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif best_len = n; + region = msa->region; if (region) { #ifdef USE_POSIX_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { @@ -1553,6 +1586,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #endif } /* if (region) */ } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif STAT_OP_OUT; if (IS_FIND_CONDITION(option)) { @@ -1590,24 +1627,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, ss = s; sp = p; - exact1_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) { -#if 1 - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - s = ss; - p = sp; - goto exact1_ic_retry; - } - else - goto fail; -#else goto fail; -#endif } p++; q++; } @@ -1696,24 +1721,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, ss = s; sp = p; - exactn_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) { -#if 1 - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - s = ss; - p = sp; - goto exactn_ic_retry; - } - else - goto fail; -#else goto fail; -#endif } p++; q++; } @@ -2949,20 +2962,12 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, tsave = t; psave = p; - retry: while (t < tend) { lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { if (*t++ != *q++) { - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - t = tsave; - p = psave; - goto retry; - } - else - return 0; + return 0; } lowlen--; } @@ -3262,7 +3267,12 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); - STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif if (region #ifdef USE_POSIX_REGION_OPTION @@ -3567,16 +3577,31 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ - if (r >= 0) goto match;\ - goto finish; /* error */ \ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ } +#endif /* anchor optimize: resume search range */ if (reg->anchor != 0 && str < end) { - UChar* semi_end; + UChar *min_semi_end, *max_semi_end; if (reg->anchor & ANCHOR_BEGIN_POSITION) { /* search start-position only */ @@ -3602,49 +3627,58 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else if (reg->anchor & ANCHOR_END_BUF) { - semi_end = (UChar* )end; + min_semi_end = max_semi_end = (UChar* )end; end_buf: - if ((OnigDistance )(semi_end - str) < reg->anchor_dmin) + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) goto mismatch_no_msa; if (range > start) { - if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) { - start = semi_end - reg->anchor_dmax; + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; if (start < end) start = onigenc_get_right_adjust_char_head(reg->enc, str, start); else { /* match with empty at end */ start = onigenc_get_prev_char_head(reg->enc, str, end); } } - if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) { - range = semi_end - reg->anchor_dmin + 1; + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; } if (start >= range) goto mismatch_no_msa; } else { - if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) { - range = semi_end - reg->anchor_dmax; + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; } - if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) { - start = semi_end - reg->anchor_dmin; + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - if (range > start) goto mismatch_no_msa; } + if (range > start) goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - semi_end = pre_end; - if (semi_end > str && start <= semi_end) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { goto end_buf; } } else { - semi_end = (UChar* )end; + min_semi_end = (UChar* )end; goto end_buf; } } @@ -3666,7 +3700,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_INIT(msa, option, region, start); #ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; + msa.state_check_buff = (void* )0; msa.state_check_buff_size = 0; #endif MATCH_AND_RETURN_CHECK; @@ -3681,7 +3715,12 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif MATCH_ARG_INIT(msa, option, region, orig_start); - STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif s = (UChar* )start; if (range > start) { /* forward search */ @@ -3809,6 +3848,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif r = ONIG_MISMATCH; finish: diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c index 1586012b8e..f5ad1f35a2 100755 --- a/ext/mbstring/oniguruma/regext.c +++ b/ext/mbstring/oniguruma/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index fdc7974ae6..248957c9d9 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index 936148036f..e7ddad8c5d 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -71,7 +71,7 @@ /* internal config */ #define USE_RECYCLE_NODE #define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QUALIFIER_PEEK_NEXT +#define USE_QUANTIFIER_PEEK_NEXT #define USE_ST_HASH_TABLE #define USE_SHARED_CCLASS_TABLE @@ -86,34 +86,29 @@ #define USE_VARIABLE_META_CHARS #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE /* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ /* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ #define THREAD_ATOMIC_START /* depend on thread system */ #define THREAD_ATOMIC_END /* depend on thread system */ #define THREAD_PASS /* depend on thread system */ -#define CHECK_INTERRUPT /* depend on application */ #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc #define xfree free #else #include "ruby.h" -#include "version.h" #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ #define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ #define USE_MULTI_THREAD_SYSTEM - +#define THREAD_SYSTEM_INIT +#define THREAD_SYSTEM_END #define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_PASS rb_thread_schedule() -#define CHECK_INTERRUPT do {\ - if (rb_trap_pending) {\ - if (! rb_prohibit_interrupt) {\ - rb_trap_exec();\ - }\ - }\ -} while (0) #define DEFAULT_WARN_FUNCTION onig_rb_warn #define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning @@ -121,7 +116,7 @@ #endif /* else NOT_RUBY */ #define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 #define THREAD_PASS_LIMIT_COUNT 8 #define xmemset memset @@ -129,16 +124,13 @@ #define xmemmove memmove #if defined(_WIN32) && !defined(__GNUC__) #define xalloca _alloca -#ifdef NOT_RUBY -#if _MSC_VER < 1500 -# define vsnprintf _vsnprintf -#endif +#ifndef vsnprintf +#define vsnprintf _vsnprintf #endif #else #define xalloca alloca #endif - #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) #define ONIG_STATE_INC(reg) (reg)->state++ #define ONIG_STATE_DEC(reg) (reg)->state-- @@ -235,6 +227,10 @@ #endif #endif +#ifdef __BORLANDC__ +#include <malloc.h> +#endif + #ifdef ONIG_DEBUG # include <stdio.h> #endif @@ -258,7 +254,8 @@ #define NULL_UCHARP ((UChar* )0) #ifndef PLATFORM_UNALIGNED_WORD_ACCESS -#define WORD_ALIGNMENT_SIZE SIZEOF_INT +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ (pad_size) = WORD_ALIGNMENT_SIZE \ @@ -586,8 +583,6 @@ enum OpCode { OP_WORD, OP_NOT_WORD, - OP_WORD_SB, - OP_WORD_MB, OP_WORD_BOUND, OP_NOT_WORD_BOUND, OP_WORD_BEGIN, diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 26fc35b119..af5c8593e6 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -64,13 +64,13 @@ extern void onig_null_warn(const char* s) { } extern void onig_rb_warn(const char* s) { - rb_warn(s); + rb_warn("%s", s); } extern void onig_rb_warning(const char* s) { - rb_warning(s); + rb_warning("%s", s); } #endif @@ -1051,9 +1051,9 @@ onig_node_free(Node* node) } break; - case N_QUALIFIER: - if (NQUALIFIER(node).target) - onig_node_free(NQUALIFIER(node).target); + case N_QUANTIFIER: + if (NQUANTIFIER(node).target) + onig_node_free(NQUANTIFIER(node).target); break; case N_EFFECT: @@ -1088,7 +1088,7 @@ onig_node_free(Node* node) #ifdef USE_RECYCLE_NODE extern int -onig_free_node_list() +onig_free_node_list(void) { FreeNode* n; @@ -1104,7 +1104,7 @@ onig_free_node_list() #endif static Node* -node_new() +node_new(void) { Node* node; @@ -1133,7 +1133,7 @@ initialize_cclass(CClassNode* cc) } static Node* -node_new_cclass() +node_new_cclass(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1202,7 +1202,7 @@ node_new_ctype(int type) } static Node* -node_new_anychar() +node_new_anychar(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1318,25 +1318,25 @@ node_new_call(UChar* name, UChar* name_end) #endif static Node* -node_new_qualifier(int lower, int upper, int by_number) +node_new_quantifier(int lower, int upper, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_QUALIFIER; - NQUALIFIER(node).state = 0; - NQUALIFIER(node).target = NULL; - NQUALIFIER(node).lower = lower; - NQUALIFIER(node).upper = upper; - NQUALIFIER(node).greedy = 1; - NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQUALIFIER(node).head_exact = NULL_NODE; - NQUALIFIER(node).next_head_exact = NULL_NODE; - NQUALIFIER(node).is_refered = 0; + node->type = N_QUANTIFIER; + NQUANTIFIER(node).state = 0; + NQUANTIFIER(node).target = NULL; + NQUANTIFIER(node).lower = lower; + NQUANTIFIER(node).upper = upper; + NQUANTIFIER(node).greedy = 1; + NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQUANTIFIER(node).head_exact = NULL_NODE; + NQUANTIFIER(node).next_head_exact = NULL_NODE; + NQUANTIFIER(node).is_refered = 0; if (by_number != 0) - NQUALIFIER(node).state |= NST_BY_NUMBER; + NQUANTIFIER(node).state |= NST_BY_NUMBER; #ifdef USE_COMBINATION_EXPLOSION_CHECK - NQUALIFIER(node).comb_exp_check_num = 0; + NQUANTIFIER(node).comb_exp_check_num = 0; #endif return node; @@ -1481,6 +1481,7 @@ onig_node_new_str(const UChar* s, const UChar* end) return node_new_str(s, end); } +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG static Node* node_new_str_raw(UChar* s, UChar* end) { @@ -1488,20 +1489,21 @@ node_new_str_raw(UChar* s, UChar* end) NSTRING_SET_RAW(node); return node; } +#endif static Node* -node_new_empty() +node_new_empty(void) { return node_new_str(NULL, NULL); } static Node* -node_new_str_raw_char(UChar c) +node_new_str_char(UChar c) { UChar p[1]; p[0] = c; - return node_new_str_raw(p, p + 1); + return node_new_str(p, p + 1); } static Node* @@ -1531,6 +1533,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc) return 0; } +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR +static int +node_str_head_pad(StrNode* sn, int num, UChar val) +{ + UChar buf[NODE_STR_BUF_SIZE]; + int i, len; + + len = sn->end - sn->s; + onig_strcpy(buf, sn->s, sn->end); + onig_strcpy(&(sn->s[num]), buf, buf + len); + sn->end += num; + + for (i = 0; i < num; i++) { + sn->s[i] = val; + } +} +#endif + extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) { @@ -1925,29 +1945,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) } static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_CCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - CCLASS_CLEAR_NOT(cc); - } - - return 0; -} - -static int and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) { int r, not1, not2; @@ -2089,7 +2086,7 @@ conv_backslash_value(int c, ScanEnv* env) } static int -is_invalid_qualifier_target(Node* node) +is_invalid_quantifier_target(Node* node) { switch (NTYPE(node)) { case N_ANCHOR: @@ -2098,19 +2095,19 @@ is_invalid_qualifier_target(Node* node) case N_EFFECT: if (NEFFECT(node).type == EFFECT_OPTION) - return is_invalid_qualifier_target(NEFFECT(node).target); + return is_invalid_quantifier_target(NEFFECT(node).target); break; case N_LIST: /* ex. (?:\G\A)* */ do { - if (! is_invalid_qualifier_target(NCONS(node).left)) return 0; + if (! is_invalid_quantifier_target(NCONS(node).left)) return 0; } while (IS_NOT_NULL(node = NCONS(node).right)); return 0; break; case N_ALT: /* ex. (?:abc|\A)* */ do { - if (is_invalid_qualifier_target(NCONS(node).left)) return 1; + if (is_invalid_quantifier_target(NCONS(node).left)) return 1; } while (IS_NOT_NULL(node = NCONS(node).right)); break; @@ -2122,7 +2119,7 @@ is_invalid_qualifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_qualifier_num(QualifierNode* qf) +popular_quantifier_num(QuantifierNode* qf) { if (qf->greedy) { if (qf->lower == 0) { @@ -2166,15 +2163,15 @@ static enum ReduceType ReduceTypeTable[6][6] = { }; extern void -onig_reduce_nested_qualifier(Node* pnode, Node* cnode) +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QualifierNode *p, *c; + QuantifierNode *p, *c; - p = &(NQUALIFIER(pnode)); - c = &(NQUALIFIER(cnode)); - pnum = popular_qualifier_num(p); - cnum = popular_qualifier_num(c); + p = &(NQUANTIFIER(pnode)); + c = &(NQUANTIFIER(cnode)); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: @@ -2282,7 +2279,7 @@ typedef struct { static int -fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) { int low, up, syn_allow, non_low = 0; int r = 0; @@ -3035,7 +3032,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '{': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; - r = fetch_range_qualifier(&p, end, tok, env); + r = fetch_range_quantifier(&p, end, tok, env); if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ @@ -3454,7 +3451,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '{': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; - r = fetch_range_qualifier(&p, end, tok, env); + r = fetch_range_quantifier(&p, end, tok, env); if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ @@ -3512,7 +3509,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_END_BUF : ANCHOR_END_LINE); + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; case '[': @@ -4619,11 +4616,11 @@ static const char* ReduceQStr[] = { }; static int -set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QualifierNode* qn; + QuantifierNode* qn; - qn = &(NQUALIFIER(qnode)); + qn = &(NQUANTIFIER(qnode)); if (qn->lower == 1 && qn->upper == 1) { return 1; } @@ -4642,15 +4639,15 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QualifierNode* qnt = &(NQUALIFIER(target)); - int nestq_num = popular_qualifier_num(qn); - int targetq_num = popular_qualifier_num(qnt); + QuantifierNode* qnt = &(NQUANTIFIER(target)); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) && + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { UChar buf[WARN_BUFSIZE]; @@ -4686,7 +4683,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) #endif if (targetq_num >= 0) { if (nestq_num >= 0) { - onig_reduce_nested_qualifier(qnode, target); + onig_reduce_nested_quantifier(qnode, target); goto q_exit; } else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ @@ -4708,61 +4705,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) return 0; } -static int -make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, - CClassNode* cc, Node** root) -{ - int r, i, j, k, clen, len, ncode, n; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - Node **ptail, *snode = NULL_NODE; - const OnigCompAmbigCodes* ccs; - const OnigCompAmbigCodeItem* ci; - OnigAmbigType amb; - - n = 0; - *root = NULL_NODE; - ptail = root; - - - for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { - if ((amb & ambig_flag) == 0) continue; - - ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); - for (i = 0; i < ncode; i++) { - if (onig_is_code_in_cc(enc, ccs[i].code, cc)) { - for (j = 0; j < ccs[i].n; j++) { - ci = &(ccs[i].items[j]); - if (ci->len > 1) { /* compound only */ - if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc); - - clen = ci->len; - for (k = 0; k < clen; k++) { - len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf); - - if (k == 0) { - snode = node_new_str_raw(buf, buf + len); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) return r; - } - } - - *ptail = node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); - ptail = &(NCONS(*ptail).right); - n++; - } - } - } - } - } - - return n; -} - - #ifdef USE_SHARED_CCLASS_TABLE #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 @@ -4826,11 +4768,11 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg) } extern int -onig_free_shared_cclass_table() +onig_free_shared_cclass_table(void) { if (IS_NOT_NULL(OnigTypeCClassTable)) { onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); - xfree(OnigTypeCClassTable); + onig_st_free_table(OnigTypeCClassTable); OnigTypeCClassTable = NULL; } @@ -4911,23 +4853,36 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_RAW_BYTE: tk_raw_byte: { - *np = node_new_str_raw_char((UChar )tok->u.c); + *np = node_new_str_char((UChar )tok->u.c); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); len = 1; while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enc_len(env->enc, NSTRING(*np).s)) { + r = fetch_token(tok, src, end, env); + goto string_end; + } + } + r = fetch_token(tok, src, end, env); if (r < 0) return r; if (r != TK_RAW_BYTE) { -#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - if (len >= enc_len(env->enc, NSTRING(*np).s)) { - NSTRING_CLEAR_RAW(*np); +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0); + if (len + rem == enc_len(env->enc, NSTRING(*np).s)) { + goto string_end; + } } #endif - goto string_end; + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; } r = node_str_cat_char(*np, (UChar )tok->u.c); if (r < 0) return r; + len++; } } @@ -5098,24 +5053,6 @@ parse_exp(Node** np, OnigToken* tok, int term, } } } - - if (IS_IGNORECASE(env->option) && - (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - int res; - Node *alt_root, *work; - - res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc, - cc, &alt_root); - if (res < 0) return res; - if (res > 0) { - work = node_new_alt(*np, alt_root); - if (IS_NULL(work)) { - onig_node_free(alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } - } } break; @@ -5127,9 +5064,9 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_ANYCHAR_ANYTIME: *np = node_new_anychar(); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - qn = node_new_qualifier(0, REPEAT_INFINITE, 0); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUALIFIER(qn).target = *np; + NQUANTIFIER(qn).target = *np; *np = qn; break; @@ -5185,14 +5122,14 @@ parse_exp(Node** np, OnigToken* tok, int term, repeat: if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_qualifier_target(*targetp)) + if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper, + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUALIFIER(qn).greedy = tok->u.repeat.greedy; - r = set_qualifier(qn, *targetp, group, env); + NQUANTIFIER(qn).greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); if (r < 0) return r; if (tok->u.repeat.possessive != 0) { diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index c0cdded7b4..b25618a33f 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,7 +37,7 @@ #define N_CTYPE (1<< 2) #define N_ANYCHAR (1<< 3) #define N_BACKREF (1<< 4) -#define N_QUALIFIER (1<< 5) +#define N_QUANTIFIER (1<< 5) #define N_EFFECT (1<< 6) #define N_ANCHOR (1<< 7) #define N_LIST (1<< 8) @@ -52,7 +52,7 @@ #define NSTRING(node) ((node)->u.str) #define NCCLASS(node) ((node)->u.cclass) #define NCTYPE(node) ((node)->u.ctype) -#define NQUALIFIER(node) ((node)->u.qualifier) +#define NQUANTIFIER(node) ((node)->u.quantifier) #define NANCHOR(node) ((node)->u.anchor) #define NBACKREF(node) ((node)->u.backref) #define NEFFECT(node) ((node)->u.effect) @@ -131,7 +131,7 @@ typedef struct { #ifdef USE_COMBINATION_EXPLOSION_CHECK int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ #endif -} QualifierNode; +} QuantifierNode; /* status bits */ #define NST_MIN_FIXED (1<<0) @@ -170,8 +170,8 @@ typedef struct { #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) typedef struct { int state; @@ -230,15 +230,15 @@ typedef struct { typedef struct _Node { int type; union { - StrNode str; - CClassNode cclass; - QualifierNode qualifier; - EffectNode effect; + StrNode str; + CClassNode cclass; + QuantifierNode quantifier; + EffectNode effect; #ifdef USE_SUBEXP_CALL - CallNode call; + CallNode call; #endif - BackrefNode backref; - AnchorNode anchor; + BackrefNode backref; + AnchorNode anchor; struct { struct _Node* left; struct _Node* right; @@ -306,7 +306,7 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode)); +extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); @@ -315,7 +315,7 @@ extern Node* onig_node_new_anchor P_((int type)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); -extern int onig_free_node_list(); +extern int onig_free_node_list P_((void)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c index 38f8ed55a0..e54b5c4089 100644 --- a/ext/mbstring/oniguruma/regposerr.c +++ b/ext/mbstring/oniguruma/regposerr.c @@ -76,7 +76,7 @@ regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size) s = ""; } else { - sprintf(tbuf, "undefined error code (%d)", posix_ecode); + sprintf(tbuf, "undefined error code (%d)", posix_ecode); s = tbuf; } diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index 23ae95693a..a3bacf722e 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c index 542e599c8f..9114e39e6b 100644 --- a/ext/mbstring/oniguruma/regsyntax.c +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c index cbebdb026c..5fad0cc18c 100644 --- a/ext/mbstring/oniguruma/regversion.c +++ b/ext/mbstring/oniguruma/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ onig_copyright(void) { static char s[58]; - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); |