diff options
Diffstat (limited to 'ext/mbstring')
54 files changed, 1678 insertions, 898 deletions
diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index ad14720677..49e0a86171 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -149,6 +149,7 @@ esac oniguruma/enc/utf16_le.c oniguruma/enc/utf32_be.c oniguruma/enc/utf32_le.c + oniguruma/enc/gb18030.c ]) fi ]) diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 4a3a7f0a1c..1ab4ce9e97 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -61,7 +61,7 @@ if (PHP_MBSTRING == "yes") { iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \ iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \ koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \ - utf32_be.c utf32_le.c", "mbstring"); + utf32_be.c utf32_le.c gb18030.c", "mbstring"); ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring"); } } diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING index ed3fa53b25..4d321bb93b 100644 --- a/ext/mbstring/oniguruma/COPYING +++ b/ext/mbstring/oniguruma/COPYING @@ -1,4 +1,4 @@ -OniGuruma LICENSE +Oniguruma LICENSE ----------------- When this software is partly used or it is distributed with Ruby, @@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby. It follows the BSD license in the case of the one except for it. /*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index c648c54551..17b696f84c 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,5 +1,267 @@ History +2006/07/18: Version 4.2.0 + +2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/18: [new] (thanks Wolfgang Nadasi-Donner) + add back reference with nest level. + \k<name+n>, \k<name-n> +2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX + and ONIG_SYN_XXX number literals. + +2006/07/03: Version 4.1.2 + +2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner) + allow \G in look-behind. + add ANCHOR_BEGIN_POSITION flag in setup_tree(). +2006/06/12: [impl] (thanks matz) + fix cast from char* to const char* + in onig_snprintf_with_pattern(). + fix cast from char* to const char* + for PopularQStr[] and ReduceQStr[]. + +2006/05/22: Version 4.1.1 + +2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/22: [impl] add position string argument to STACK_BASE_CHECK(). +2006/05/22: [bug] (thanks NARUSE, Yui) + add STK_NULL_CHECK_END to IS_TO_VOID_TARGET(). + ex. core dump in + /(?<pare>\(([^\(\)]++|\g<pare>)*+\))/.match('((a))') + +2006/05/15: Version 4.1.0 + +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/15: [impl] thread atomic changes for onig_end() and + onig_free_node_list(). +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja. +2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe() + and re_recompile_pattern(). + add config USE_RECOMPILE_API. +2006/05/15: [impl] improved thread safe implementation of onig_search() + and onig_match(). + +2006/05/11: Version 4.0.4 + +2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/11: [bug] (thanks Yuji Kaneda) + dead-lock in onig_end(). +2006/05/11: [dist] update index.html. + +2006/05/08: Version 4.0.3 + +2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/08: [bug] (thanks Allan Odgaard) + Segmentation fault in backward search. + ex. /^\t.*$/ +2006/04/18: [dist] update index.html. +2006/04/05: [dist] update index.html. +2006/03/24: [dist] update doc/RE, doc/RE.ja. + +2006/03/23: Version 4.0.2 + +2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP + and ONIG_OPTION_CAPTURE_GROUP check. +2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS. +2006/03/22: [impl] remove USE_NAMED_GROUP condition from + ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect(). +2006/03/22: [new] add API onig_noname_group_capture_is_active(). +2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType. + add typedef OnigRegexType regex_t + unless ONIG_ESCAPE_REGEX_T_COLLISION is defined. +2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000 + to 10000. (for docdiff program) +2006/02/17: [dist] change COPYING year 2005 -> 2006. + +2006/02/07: Version 4.0.1 + +2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table(). +2006/02/03: [ruby] add -m 0644 option to install command in "make 19". +2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML. + change from IS_POSIXLINE() to IS_MULTILINE() + for ANCHOR_ANYCHAR_START/_ML decision + in optimize_node_left(). +2006/01/26: [dist] update index.html for Oniguruma 2.5.3. +2006/01/25: [dist] update URL in index.html. + +2006/01/24: Version 4.0.0 + +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin]. +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/01/24: [dist] remove warnings from sample/encode.c. +2006/01/24: [dist] change install description in README(.ja). +2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS. +2006/01/24: [dist] --- support shared library --- + use GNU libtool/automake. + change configure.in and add Makefile.am, sample/Makefile.am. + add AUTHORS file. +2006/01/24: [dist] test programs return exit code -1 when test fails. +2006/01/24: [bug] (thanks KIMURA Koichi) + invalid syntax definition in ONIG_SYNTAX_GREP. + ONIG_SYN_OP_BRACE_INTERVAL + -> ONIG_SYN_OP_ESC_BRACE_INTERVAL +2006/01/23: [dist] fix configure.in for onig-config. +2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS. + (U+000d, U+0085, U+2028, U+2029) +2005/12/29: [dist] change pmatch array size to 25 in testconv.rb. +2005/12/26: [dist] fix name in test.rb. +2005/12/26: [dist] update index.html for 2.5.1. + +2005/11/29: Version 3.9.1 + +2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/21: [bug] (thanks Allan Odgaard) + utf-8 character comments in extended mode leads + invalid result. + ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) / + fix onigenc_unicode_is_code_ctype() and + utf8_is_code_ctype(). +2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe) + begin-line anchor and BM search optimization leads + invalid result in UTF-16/32. + fix in set_optimize_exact_info(). + +2005/11/20: Version 3.9.0 + +2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin]. +2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR. + (!!! NO SUPPORT experimental option !!!) +2005/11/15: [bug] (thanks Allan Odgaard) + tok->escape was not cleared in fetch_token_in_cc(). + ex. [\s&&[^\n]] makes wrong result. +2005/10/18: [impl] (thanks nobu) + change sjis_mbc_enc_len() + and node_new_cclass_by_codepoint_range() scope to static. +2005/09/05: [dist] remove link to MultiFind. +2005/09/01: [dist] add link to yagrep. + +2005/08/23: Version 3.8.9 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [inst] fix Makefile.in for make ctest/ptest. + +2005/08/23: Version 3.8.8 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc(). +2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at(). +2005/08/23: [impl] (thanks akr) + add ONIG_OPTION_MAXBIT for escape conflict with + Ruby's option. +2005/08/22: [impl] escape GCC 4.0 warnings for testc.c. +2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840] + UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node(). + abort on /\S*/ =~ "\xfe" +2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c. +2005/08/22: [impl] fix testconvu.rb. +2005/08/22: [impl] escape GCC 4.0 warnings. + +2005/08/09: Version 3.8.7 + +2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/09: [bug] (thanks Allan Odgaard) + should not call enc_len() for s == range + in onig_search(). +2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install. + +2005/07/27: Version 3.8.6 + +2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux]. +2005/07/27: [impl] update onig-config.in. +2005/07/26: [new] (thanks Yen-Ju Chen) + add Oniguruma configuration check program. + (onig-config.in) + +2005/07/14: Version 3.8.5 + +2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux]. +2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux]. +2005/07/11: [bug] (thanks nobu) [ruby-dev:26505] + invalid handling for /\c\x/ and /\C-\x/. + fix fetch_escaped_value(). +2005/07/05: [impl] (thanks Alexey Zakhlestine) + escape GCC 4.0 warnings. + +2005/07/01: Version 3.8.4 + +2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux]. +2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/30: [dist] add GB 18030 test to sample/encode.c. +2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head(). +2005/06/30: [new] (contributed by KUBO Takehiro) + add new character encoding ONIG_ENCODING_GB18030. +2005/06/30: [bug] invalid ctype check for multibyte encodings. + ("graph", "print") + fix onigenc_mb2/4_is_code_ctype(), + eucjp_is_code_ctype() and sjis_is_code_ctype(). +2005/06/30: [bug] invalid conversion from code point to mbc in + onigenc_mb4_code_to_mbc(). + +2005/06/28: Version 3.8.3 + +2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner) + invalid check for never ending recursion. + lower zero quantifier should be treated as + a non-recursive call alternative. + ex. /(?<bal>[^()]*(\(\g<bal>\)[^()]*)*)/ +2005/06/15: [impl] add divide_ambig_string_node_sub(). +2005/06/15: [dist] add a test to sample/encode.c. +2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group) + +2005/06/01: Version 3.8.2 + +2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja. +2005/05/31: [impl] minor change in node_new(). +2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/30: [bug] (thanks Allan Odgaard) + FreeNodeList null check should be on thread-atomic + in node_new(). + +2005/05/11: Version 3.8.1 + +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32]. +2005/05/11: [dist] update win32/Makefile (make 19). +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux]. +2005/05/06: [impl] (thanks nobu) [ruby-core:4815] + add #ifdef USE_VARIABLE_META_CHARS to goto label. +2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux]. +2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION + to onig_rb_warn() and onig_rb_warning(). + +2005/04/15: Version 3.8.0 + +2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux]. +2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/04/01: [impl] (thanks Joe Orton) + (thanks Moriyoshi Koizumi) + many const-ification to many *.[ch] files. + +2005/03/25: Version 3.7.2 + +2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/23: [new] add ONIG_SYNTAX_ASIS. +2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE. +2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API) +2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/08: [impl] (thanks matz) [ruby-dev:25783] + should not allocate memory for key data in st.c. + move st_*_strend() functions from st.c. fixed some + potential memory leaks. + (imported from Ruby 1.9 2005-03-08) + 2005/03/07: Version 3.7.1 2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux]. @@ -24,7 +286,7 @@ History remove reggnu.c from make 19. 2005/02/19: [dist] update doc/API and doc/API.ja. 2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. -2005/02/19: [impl] (thanks Alexey Zakhlestin) +2005/02/19: [impl] (thanks Alexey Zakhlestine) change UChar* to const UChar* in oniguruma.h, regenc.h and regparse.h. 2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and @@ -1366,8 +1628,30 @@ svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/ <create tag> svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX" -<show all tags> + +<CVS: show all tags> cvs history -T -<add tag> +<CVS: add tag> cvs rtag "VERSION_X_X_X" oniguruma + + +<GNU Autotools: bootstrap> +* write Makefile.am and configure.in. +> aclocal +> libtoolize +> automake --foreign --add-missing +> autoconf +> configure --with-rubydir=... CFLAGS="-O2 -Wall" + + +<GNU libtool: version management> + + VERSION = current:revision:age + + current: interface number (from 0) + revision: implementation number of same interface (from 0) + age: number of supported previous interfaces + (if current only supported then age == 0) + +//END diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README index dc4fb3b64b..f2cc7c9815 100644 --- a/ext/mbstring/oniguruma/README +++ b/ext/mbstring/oniguruma/README @@ -1,4 +1,4 @@ -README 2005/02/04 +README 2006/05/15 Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> @@ -14,11 +14,12 @@ Supported character encodings: ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, KOI8-R, KOI8 (*), + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 +* GB 18030: contributed by KUBO Takehiro * KOI8 is not included in library archive by default setup. (need to edit Makefile if you want to use it.) ------------------------------------------------------------ @@ -31,15 +32,20 @@ Install 2. make 3. make install - library file: libonig.a + * uninstall - test (ASCII/EUC-JP) + make uninstall - make ctest + * test (ASCII/EUC-JP) - uninstall + make atest - make uninstall + * configuration check + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix @@ -73,8 +79,21 @@ Regular Expressions Usage - Include oniguruma.h in your program. (native API) - See doc/API for native API. + Include oniguruma.h in your program. (Oniguruma API) + See doc/API for Oniguruma API. + + If you want to disable UChar type (== unsigned char) definition + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + include oniguruma.h. + + If you want to disable regex_t type definition in oniguruma.h, + define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. + + Example of the compiling/linking command line in Unix or Cygwin, + (prefix == /usr/local case) + + cc sample.c -L/usr/local/lib -lonig + If you want to use static link library(onig_s.lib) in Win32, add option -DONIG_EXTERN=extern to C compiler. @@ -83,19 +102,20 @@ Usage Sample Programs - sample/simple.c example of the minimum (native API) + sample/simple.c example of the minimum (Oniguruma API) sample/names.c example of the named group callback. sample/encode.c example of some encodings. sample/listcap.c example of the capture history. sample/posix.c POSIX API sample. sample/sql.c example of the variable meta characters. (SQL-like pattern matching) - sample/syntax.c Perl and Java syntax test. + sample/syntax.c Perl, Java and ASIS syntax test. Source Files oniguruma.h Oniguruma API header file. (public) + onig-config.in configuration check program template. regenc.h character encodings framework header file. regint.h internal definitions @@ -125,9 +145,10 @@ Source Files enc/euc_tw.c EUC-TW encoding. enc/euc_kr.c EUC-KR, EUC-CN encoding. enc/sjis.c Shift_JIS encoding. - enc/big5.c Big5 encoding. - enc/koi8.c KOI8 encoding. - enc/koi8_r.c KOI8-R encoding. + enc/big5.c Big5 encoding. + enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro) + enc/koi8.c KOI8 encoding. + enc/koi8_r.c KOI8-R encoding. enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) @@ -159,7 +180,6 @@ Source Files API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6 + re_compile_fastmap() is removed. - + re_recompile_pattern() is added. + re_alloc_pattern() is added. @@ -169,7 +189,6 @@ ToDo ? Unicode Property. ? ambig-flag Katakana <-> Hiragana. ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) - ? add ONIG_SYNTAX_ASIS. ?? \X (== \PM\pM*) ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. ?? variable line separator. diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja index 44553abfef..2394e958ff 100644 --- a/ext/mbstring/oniguruma/README.ja +++ b/ext/mbstring/oniguruma/README.ja @@ -1,4 +1,4 @@ -README.ja 2005/02/04 +README.ja 2006/05/15 µ´¼Ö ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp> @@ -14,11 +14,12 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, KOI8-R, KOI8 (*), + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 +* GB 18030: µ×ÊÝ·òÍλáÄó¶¡ * KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£ (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È) ------------------------------------------------------------ @@ -31,15 +32,21 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ 2. make 3. make install - ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a + ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë + + make uninstall Æ°ºî¥Æ¥¹¥È (ASCII/EUC-JP) - make ctest + make atest - ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë - make uninstall + ¹½À®³Îǧ + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix @@ -71,8 +78,28 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ »ÈÍÑÊýË¡ - »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£ - Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£ + »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£ + Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç + ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬ + ͸ú¤Ë¤Ê¤ë¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢ + ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦ + ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í¸ú¤Ë¤Ê¤ë¡£ + + Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎ㡧 + (prefix¤¬/usr/local¤Î¤È¤) + cc sample.c -L/usr/local/lib -lonig + + GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ + ¤¤¤ì¤Ð¡¢»ÈÍѤǤ¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£ + ÀÅŪ¥é¥¤¥Ö¥é¥ê¤È¶¦Í¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î + ´Ä¶ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£ + Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢ ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£ @@ -80,18 +107,19 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ »ÈÍÑÎã¥×¥í¥°¥é¥à - sample/simple.c ºÇ¾®Îã (native API) + sample/simple.c ºÇ¾®Îã (Oniguruma API) sample/names.c ̾Á°ÉÕ¤¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã sample/posix.c POSIX API»ÈÍÑÎã sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó) - sample/syntax.c Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È + sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È ¥½¡¼¥¹¥Õ¥¡¥¤¥ë oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«) + onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ regint.h ÆâÉôÀë¸À @@ -122,6 +150,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡) enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/iso8859_1.c ISO-8859-1 (Latin-1) @@ -155,7 +184,6 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£ - + re_recompile_pattern() ¤¬Äɲ䵤줿¡£ + re_alloc_pattern() ¤¬Äɲ䵤줿¡£ @@ -165,7 +193,6 @@ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ ? Unicode¥×¥í¥Ñ¥Æ¥£ ? ambig-flag Katakana <-> Hiragana ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z) - ? ONIG_SYNTAX_ASISÄɲà ?? \X (== \PM\pM*) ?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ ?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤¤ë @@ -174,4 +201,4 @@ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ and I'm thankful to Akinori MUSHA. -Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> +¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp> diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in index 5ca2056fb3..6e97e15820 100644 --- a/ext/mbstring/oniguruma/config.h.in +++ b/ext/mbstring/oniguruma/config.h.in @@ -1,69 +1,105 @@ -/* config.h.in. Generated automatically from configure.in by autoheader. */ +/* config.h.in. Generated from configure.in by autoheader. */ -/* Define if using alloca.c. */ -#undef C_ALLOCA - -/* Define to empty if the keyword does not work. */ -#undef const - -/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. - This function is required for alloca.c support on those systems. */ +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ #undef CRAY_STACKSEG_END -/* Define if you have alloca, as a function or macro. */ +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ #undef HAVE_ALLOCA -/* Define if you have <alloca.h> and it should be used (not on Ultrix). */ +/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). + */ #undef HAVE_ALLOCA_H -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at run-time. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown - */ -#undef STACK_DIRECTION +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H -/* Define if you have the ANSI C header files. */ -#undef STDC_HEADERS +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H -/* Define if you can safely include both <sys/time.h> and <time.h>. */ -#undef TIME_WITH_SYS_TIME +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H -/* The number of bytes in a int. */ -#undef SIZEOF_INT +/* Define if compilerr supports prototypes */ +#undef HAVE_PROTOTYPES -/* The number of bytes in a long. */ -#undef SIZEOF_LONG +/* Define if compiler supports stdarg prototypes */ +#undef HAVE_STDARG_PROTOTYPES -/* The number of bytes in a short. */ -#undef SIZEOF_SHORT +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H -/* Define if you have the <stdlib.h> header file. */ +/* Define to 1 if you have the <stdlib.h> header file. */ #undef HAVE_STDLIB_H -/* Define if you have the <string.h> header file. */ +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ #undef HAVE_STRING_H -/* Define if you have the <strings.h> header file. */ -#undef HAVE_STRINGS_H +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H -/* Define if you have the <sys/types.h> header file. */ -#undef HAVE_SYS_TYPES_H +/* Define to 1 if you have the <sys/times.h> header file. */ +#undef HAVE_SYS_TIMES_H -/* Define if you have the <sys/time.h> header file. */ +/* Define to 1 if you have the <sys/time.h> header file. */ #undef HAVE_SYS_TIME_H -/* Define if you have the <sys/times.h> header file. */ -#undef HAVE_SYS_TIMES_H +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H -/* Define if you have the <unistd.h> header file. */ +/* Define to 1 if you have the <unistd.h> header file. */ #undef HAVE_UNISTD_H -/* Define if you have the function argument prototype */ -#undef HAVE_PROTOTYPES +/* Name of package */ +#undef PACKAGE -/* Define if you have the variable length prototypes and stdarg.h */ -#undef HAVE_STDARG_PROTOTYPES +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of a `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of a `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of a `short', as computed by sizeof. */ +#undef SIZEOF_SHORT +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at run-time. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */ +#undef TIME_WITH_SYS_TIME + +/* Version number of package */ +#undef VERSION + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c index 763872e963..86792666a4 100644 --- a/ext/mbstring/oniguruma/enc/big5.c +++ b/ext/mbstring/oniguruma/enc/big5.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_BIG5[] = { +static const int EncLen_BIG5[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c index 5f13e33eb4..71c81ee9fe 100644 --- a/ext/mbstring/oniguruma/enc/euc_jp.c +++ b/ext/mbstring/oniguruma/enc/euc_jp.c @@ -31,7 +31,7 @@ #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) -static int EncLen_EUCJP[] = { +static const int EncLen_EUCJP[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) static int eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } static UChar* diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c index c1e83b7e66..57bf801536 100644 --- a/ext/mbstring/oniguruma/enc/euc_kr.c +++ b/ext/mbstring/oniguruma/enc/euc_kr.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_EUCKR[] = { +static const int EncLen_EUCKR[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c index 4e5851a451..6f396e75e6 100644 --- a/ext/mbstring/oniguruma/enc/euc_tw.c +++ b/ext/mbstring/oniguruma/enc/euc_tw.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_EUCTW[] = { +static const int EncLen_EUCTW[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c index 53ad52ee13..4dd708d841 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_1.c +++ b/ext/mbstring/oniguruma/enc/iso8859_1.c @@ -32,7 +32,7 @@ #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_1_CtypeTable[256] = { +static const unsigned short EncISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c index a9331cebf3..e317f49752 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_10.c +++ b/ext/mbstring/oniguruma/enc/iso8859_10.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ ((EncISO_8859_10_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_10_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_10_CtypeTable[256] = { +static const unsigned short EncISO_8859_10_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa2, 0xb2 }, { 0xa3, 0xb3 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c index bb1098807a..6afaa27f41 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_11.c +++ b/ext/mbstring/oniguruma/enc/iso8859_11.c @@ -32,7 +32,7 @@ #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ ((EncISO_8859_11_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_11_CtypeTable[256] = { +static const unsigned short EncISO_8859_11_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c index 827ca508e8..abd7644527 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_13.c +++ b/ext/mbstring/oniguruma/enc/iso8859_13.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ ((EncISO_8859_13_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_13_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_13_CtypeTable[256] = { +static const unsigned short EncISO_8859_13_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c index 4fe5ab29d1..d76771a1cf 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_14.c +++ b/ext/mbstring/oniguruma/enc/iso8859_14.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ ((EncISO_8859_14_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_14_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_14_CtypeTable[256] = { +static const unsigned short EncISO_8859_14_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xa2 }, { 0xa2, 0xa1 }, { 0xa4, 0xa5 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c index 1a8bd7b4c5..d6611ed290 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_15.c +++ b/ext/mbstring/oniguruma/enc/iso8859_15.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ ((EncISO_8859_15_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_15_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_15_CtypeTable[256] = { +static const unsigned short EncISO_8859_15_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa6, 0xa8 }, { 0xa8, 0xa6 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c index e283db17cc..23b868065c 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_16.c +++ b/ext/mbstring/oniguruma/enc/iso8859_16.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ ((EncISO_8859_16_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_16_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_16_CtypeTable[256] = { +static const unsigned short EncISO_8859_16_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xa2 }, { 0xa2, 0xa1 }, { 0xa3, 0xb3 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c index e86415b9c9..5f21ff78ae 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_2.c +++ b/ext/mbstring/oniguruma/enc/iso8859_2.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ ((EncISO_8859_2_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_2_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_2_CtypeTable[256] = { +static const unsigned short EncISO_8859_2_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -177,9 +177,9 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, static int iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa3, 0xb3 }, { 0xa5, 0xb5 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c index 76d2bec8a8..9ac3dab179 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_3.c +++ b/ext/mbstring/oniguruma/enc/iso8859_3.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ ((EncISO_8859_3_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_3_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_3_CtypeTable[256] = { +static const unsigned short EncISO_8859_3_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa6, 0xb6 }, { 0xa9, 0xb9 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c index 7569006725..c54a2fa149 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_4.c +++ b/ext/mbstring/oniguruma/enc/iso8859_4.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ ((EncISO_8859_4_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_4_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_4_CtypeTable[256] = { +static const unsigned short EncISO_8859_4_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa3, 0xb3 }, { 0xa5, 0xb5 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c index 2f7677b3e7..5b941e2eb9 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_5.c +++ b/ext/mbstring/oniguruma/enc/iso8859_5.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ ((EncISO_8859_5_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_5_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_5_CtypeTable[256] = { +static const unsigned short EncISO_8859_5_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -152,9 +152,9 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xf1 }, { 0xa2, 0xf2 }, { 0xa3, 0xf3 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c index 0fcb9e8b83..bb5515d30b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_6.c +++ b/ext/mbstring/oniguruma/enc/iso8859_6.c @@ -32,7 +32,7 @@ #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ ((EncISO_8859_6_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_6_CtypeTable[256] = { +static const unsigned short EncISO_8859_6_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c index 8b2cb9ec59..2529dae666 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_7.c +++ b/ext/mbstring/oniguruma/enc/iso8859_7.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ ((EncISO_8859_7_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_7_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_7_CtypeTable[256] = { +static const unsigned short EncISO_8859_7_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -159,9 +159,9 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xb6, 0xdc }, { 0xb8, 0xdd }, { 0xb9, 0xde }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c index 3c95b9b137..d7f0fc5947 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_8.c +++ b/ext/mbstring/oniguruma/enc/iso8859_8.c @@ -32,7 +32,7 @@ #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ ((EncISO_8859_8_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_8_CtypeTable[256] = { +static const unsigned short EncISO_8859_8_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c index 1b061ff6ea..f4bcac1ae3 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_9.c +++ b/ext/mbstring/oniguruma/enc/iso8859_9.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ ((EncISO_8859_9_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_9_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_9_CtypeTable[256] = { +static const unsigned short EncISO_8859_9_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -186,9 +186,9 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c index f8a5a1da61..27f97f3072 100644 --- a/ext/mbstring/oniguruma/enc/koi8.c +++ b/ext/mbstring/oniguruma/enc/koi8.c @@ -33,7 +33,7 @@ #define ENC_IS_KOI8_CTYPE(code,ctype) \ ((EncKOI8_CtypeTable[code] & ctype) != 0) -static UChar EncKOI8_ToLowerCaseTable[256] = { +static const UChar EncKOI8_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncKOI8_ToLowerCaseTable[256] = { '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' }; -static unsigned short EncKOI8_CtypeTable[256] = { +static const unsigned short EncKOI8_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -105,9 +105,9 @@ static unsigned short EncKOI8_CtypeTable[256] = { static int koi8_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) + const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower) { - UChar* p = (UChar *)*pp; + const OnigUChar* p = *pp; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -123,9 +123,9 @@ koi8_mbc_to_normalize(OnigAmbigType flag, } static int -koi8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) { - UChar* p = (UChar *)*pp; + const OnigUChar* p = *pp; (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && @@ -151,9 +151,9 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int koi8_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c index 7c626df616..d2a4440f2c 100644 --- a/ext/mbstring/oniguruma/enc/koi8_r.c +++ b/ext/mbstring/oniguruma/enc/koi8_r.c @@ -33,7 +33,7 @@ #define ENC_IS_KOI8_R_CTYPE(code,ctype) \ ((EncKOI8_R_CtypeTable[code] & ctype) != 0) -static UChar EncKOI8_R_ToLowerCaseTable[256] = { +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,7 +68,7 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = { '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' }; -static unsigned short EncKOI8_R_CtypeTable[256] = { +static const unsigned short EncKOI8_R_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -150,9 +150,9 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c index e13407bccf..f7d7d52265 100644 --- a/ext/mbstring/oniguruma/enc/sjis.c +++ b/ext/mbstring/oniguruma/enc/sjis.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_SJIS[] = { +static const int EncLen_SJIS[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -76,7 +76,7 @@ sjis_mbc_enc_len(const UChar* p) return EncLen_SJIS[*p]; } -extern int +static int sjis_code_to_mbclen(OnigCodePoint code) { if (code < 256) { @@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) static int sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); } - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } static UChar* diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c index e3be9450a5..a8cf539014 100644 --- a/ext/mbstring/oniguruma/enc/unicode.c +++ b/ext/mbstring/oniguruma/enc/unicode.c @@ -30,7 +30,7 @@ #include "regenc.h" -unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -65,7 +65,7 @@ unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 }; -static OnigCodePoint CRAlnum[] = { +static const OnigCodePoint CRAlnum[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 414, #else @@ -490,7 +490,7 @@ static OnigCodePoint CRAlnum[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRAlnum */ -static OnigCodePoint CRAlpha[] = { +static const OnigCodePoint CRAlpha[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 396, #else @@ -897,7 +897,7 @@ static OnigCodePoint CRAlpha[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRAlpha */ -static OnigCodePoint CRBlank[] = { +static const OnigCodePoint CRBlank[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 9, #else @@ -917,7 +917,7 @@ static OnigCodePoint CRBlank[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRBlank */ -static OnigCodePoint CRCntrl[] = { +static const OnigCodePoint CRCntrl[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 19, #else @@ -947,7 +947,7 @@ static OnigCodePoint CRCntrl[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRCntrl */ -static OnigCodePoint CRDigit[] = { +static const OnigCodePoint CRDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 23, #else @@ -981,7 +981,7 @@ static OnigCodePoint CRDigit[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRDigit */ -static OnigCodePoint CRGraph[] = { +static const OnigCodePoint CRGraph[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 405, #else @@ -1397,7 +1397,7 @@ static OnigCodePoint CRGraph[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRGraph */ -static OnigCodePoint CRLower[] = { +static const OnigCodePoint CRLower[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 424, #else @@ -1832,7 +1832,7 @@ static OnigCodePoint CRLower[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRLower */ -static OnigCodePoint CRPrint[] = { +static const OnigCodePoint CRPrint[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 405, #else @@ -2248,7 +2248,7 @@ static OnigCodePoint CRPrint[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRPrint */ -static OnigCodePoint CRPunct[] = { +static const OnigCodePoint CRPunct[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 86, #else @@ -2345,7 +2345,7 @@ static OnigCodePoint CRPunct[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRPunct */ -static OnigCodePoint CRSpace[] = { +static const OnigCodePoint CRSpace[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 11, #else @@ -2367,7 +2367,7 @@ static OnigCodePoint CRSpace[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRSpace */ -static OnigCodePoint CRUpper[] = { +static const OnigCodePoint CRUpper[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 421, #else @@ -2799,7 +2799,7 @@ static OnigCodePoint CRUpper[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRUpper */ -static OnigCodePoint CRXDigit[] = { +static const OnigCodePoint CRXDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 3, #else @@ -2810,7 +2810,7 @@ static OnigCodePoint CRXDigit[] = { 0x0061, 0x0066 }; -static OnigCodePoint CRASCII[] = { +static const OnigCodePoint CRASCII[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 1, #else @@ -2819,7 +2819,7 @@ static OnigCodePoint CRASCII[] = { 0x0000, 0x007f }; -static OnigCodePoint CRWord[] = { +static const OnigCodePoint CRWord[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 436, #else @@ -3320,6 +3320,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) case ONIGENC_CTYPE_ALNUM: return onig_is_in_code_range((UChar* )CRAlnum, code); break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; default: return ONIGENCERR_TYPE_BUG; @@ -3337,9 +3340,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) extern int onigenc_unicode_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { - static OnigCodePoint EmptyRange[] = { 0 }; + static const OnigCodePoint EmptyRange[] = { 0 }; #define CR_SET(list) do { \ *mbr = list; \ diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c index ad33ddbeeb..0dd2832f70 100755 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) #define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) -static int EncLen_UTF16[] = { +static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -63,6 +63,12 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end) if (p + 1 < end) { if (*(p+1) == 0x0a && *p == 0x00) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif } return 0; } diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c index db892dcd14..93cc6138a6 100755 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) #define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) -static int EncLen_UTF16[] = { +static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -69,6 +69,12 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end) if (p + 1 < end) { if (*p == 0x0a && *(p+1) == 0x00) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif } return 0; } diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c index 60feb040b8..36b477286c 100755 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,14 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end) if (p + 3 < end) { if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+3) == 0x0d || *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif } return 0; } diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c index bba9689f76..1e9487d1d9 100755 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,14 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end) if (p + 3 < end) { if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00 + && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif } return 0; } diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c index 592bebfe8f..0e816176ba 100644 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ #define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) -static int EncLen_UTF8[] = { +static const int EncLen_UTF8[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p) return EncLen_UTF8[*p]; } +static int +utf8_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (*p == 0x0d) return 1; + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + static OnigCodePoint utf8_mbc_to_code(const UChar* p, const UChar* end) { @@ -307,16 +330,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) } -static OnigCodePoint EmptyRange[] = { 0 }; +static const OnigCodePoint EmptyRange[] = { 0 }; -static OnigCodePoint SBAlnum[] = { +static const OnigCodePoint SBAlnum[] = { 3, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a }; -static OnigCodePoint MBAlnum[] = { +static const OnigCodePoint MBAlnum[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 411, #else @@ -738,13 +761,13 @@ static OnigCodePoint MBAlnum[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBAlnum */ -static OnigCodePoint SBAlpha[] = { +static const OnigCodePoint SBAlpha[] = { 2, 0x0041, 0x005a, 0x0061, 0x007a }; -static OnigCodePoint MBAlpha[] = { +static const OnigCodePoint MBAlpha[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 394, #else @@ -1149,13 +1172,13 @@ static OnigCodePoint MBAlpha[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBAlpha */ -static OnigCodePoint SBBlank[] = { +static const OnigCodePoint SBBlank[] = { 2, 0x0009, 0x0009, 0x0020, 0x0020 }; -static OnigCodePoint MBBlank[] = { +static const OnigCodePoint MBBlank[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 7, #else @@ -1173,13 +1196,13 @@ static OnigCodePoint MBBlank[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBBlank */ -static OnigCodePoint SBCntrl[] = { +static const OnigCodePoint SBCntrl[] = { 2, 0x0000, 0x001f, 0x007f, 0x007f }; -static OnigCodePoint MBCntrl[] = { +static const OnigCodePoint MBCntrl[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 18, #else @@ -1208,12 +1231,12 @@ static OnigCodePoint MBCntrl[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBCntrl */ -static OnigCodePoint SBDigit[] = { +static const OnigCodePoint SBDigit[] = { 1, 0x0030, 0x0039 }; -static OnigCodePoint MBDigit[] = { +static const OnigCodePoint MBDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 22, #else @@ -1245,12 +1268,12 @@ static OnigCodePoint MBDigit[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBDigit */ -static OnigCodePoint SBGraph[] = { +static const OnigCodePoint SBGraph[] = { 1, 0x0021, 0x007e }; -static OnigCodePoint MBGraph[] = { +static const OnigCodePoint MBGraph[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 404, #else @@ -1665,12 +1688,12 @@ static OnigCodePoint MBGraph[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBGraph */ -static OnigCodePoint SBLower[] = { +static const OnigCodePoint SBLower[] = { 1, 0x0061, 0x007a }; -static OnigCodePoint MBLower[] = { +static const OnigCodePoint MBLower[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 423, #else @@ -2104,13 +2127,13 @@ static OnigCodePoint MBLower[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBLower */ -static OnigCodePoint SBPrint[] = { +static const OnigCodePoint SBPrint[] = { 2, 0x0009, 0x000d, 0x0020, 0x007e }; -static OnigCodePoint MBPrint[] = { +static const OnigCodePoint MBPrint[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 403, #else @@ -2524,7 +2547,7 @@ static OnigCodePoint MBPrint[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBPrint */ -static OnigCodePoint SBPunct[] = { +static const OnigCodePoint SBPunct[] = { 9, 0x0021, 0x0023, 0x0025, 0x002a, @@ -2537,7 +2560,7 @@ static OnigCodePoint SBPunct[] = { 0x007d, 0x007d }; /* end of SBPunct */ -static OnigCodePoint MBPunct[] = { +static const OnigCodePoint MBPunct[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 77, #else @@ -2625,13 +2648,13 @@ static OnigCodePoint MBPunct[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBPunct */ -static OnigCodePoint SBSpace[] = { +static const OnigCodePoint SBSpace[] = { 2, 0x0009, 0x000d, 0x0020, 0x0020 }; -static OnigCodePoint MBSpace[] = { +static const OnigCodePoint MBSpace[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 9, #else @@ -2651,12 +2674,12 @@ static OnigCodePoint MBSpace[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBSpace */ -static OnigCodePoint SBUpper[] = { +static const OnigCodePoint SBUpper[] = { 1, 0x0041, 0x005a }; -static OnigCodePoint MBUpper[] = { +static const OnigCodePoint MBUpper[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 420, #else @@ -3087,19 +3110,19 @@ static OnigCodePoint MBUpper[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBUpper */ -static OnigCodePoint SBXDigit[] = { +static const OnigCodePoint SBXDigit[] = { 3, 0x0030, 0x0039, 0x0041, 0x0046, 0x0061, 0x0066 }; -static OnigCodePoint SBASCII[] = { +static const OnigCodePoint SBASCII[] = { 1, 0x0000, 0x007f }; -static OnigCodePoint SBWord[] = { +static const OnigCodePoint SBWord[] = { 4, 0x0030, 0x0039, 0x0041, 0x005a, @@ -3107,7 +3130,7 @@ static OnigCodePoint SBWord[] = { 0x0061, 0x007a }; -static OnigCodePoint MBWord[] = { +static const OnigCodePoint MBWord[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 432, #else @@ -3554,7 +3577,7 @@ static OnigCodePoint MBWord[] = { static int utf8_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { #define CR_SET(sbl,mbl) do { \ *sbr = sbl; \ @@ -3622,7 +3645,7 @@ static int utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) { #ifdef USE_UNICODE_FULL_RANGE_CTYPE - OnigCodePoint *range; + const OnigCodePoint *range; #endif if (code < 256) { @@ -3674,6 +3697,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) case ONIGENC_CTYPE_ALNUM: range = MBAlnum; break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; default: return ONIGENCERR_TYPE_BUG; @@ -3723,7 +3749,7 @@ OnigEncodingType OnigEncodingUTF8 = { , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ }, - onigenc_is_mbc_newline_0x0a, + utf8_is_mbc_newline, utf8_mbc_to_code, utf8_code_to_mbclen, utf8_code_to_mbc, diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index 02e844c363..a2d6c97b97 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -5,18 +5,10 @@ </head> <body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969"> -<!-- -<a href="http://miuras.net/matsushita.html"> -<img src="anti_matsushita.PNG" height="46" width="266"> -</a> ---> -<a href="http://miuras.net/matsushita.html">M</a> -<a href="http://www.micropac.co.jp/nec/">N</a> - <h1>Oniguruma</h1> <p> -2005/03/07 (C) K.Kosako +2006/07/18 (C) K.Kosako </p> <p> @@ -29,10 +21,13 @@ The characteristics of this library is that different character encoding <dt><b>Supported character encodings:</b><br> ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br> EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br> -Shift_JIS, Big5, KOI8-R, KOI8,<br> +Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,<br> ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br> ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br> -ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 +ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br> +<font color="red"> +(GB 18030 encoding was contributed by KUBO Takehiro) +</font> </p> </dl> <p> @@ -42,8 +37,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 <dt><b>What's new</b> </font> <ul> -<li>released Version 3.7.1 (2005/03/07) -<li>released Version 2.4.2 (2005/03/05) +<li>Version 4.2.0 released. (2006/07/18) +<li>Version 2.5.6 released. (2006/05/29) </ul> </dl> @@ -75,23 +70,24 @@ It follows the BSD license in the case of the one except for it. <dt><b>Download:</b> <ul> -<li> <a href="archive/onigd20050307.tar.gz">Latest release version 3.7.1</a> (2005/03/07) <a href="HISTORY_3X.txt">Change Log</a> -<li> <a href="archive/onigd20050219.tar.gz">3.7.0</a> (2005/02/19) -<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04) -<li> <a href="archive/onigd2_4_2.tar.gz">Latest release version 2.4.2</a> (2005/03/05) <a href="HISTORY_2X.txt">Change Log</a> -<li> <a href="archive/onigd2_4_1.tar.gz">2.4.1</a> (2005/01/05) -<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01) +<li> <a href="archive/onig-4.2.0.tar.gz">Latest release version 4.2.0</a> (2006/07/18) <a href="HISTORY_4X.txt">Change Log</a> +<li> <a href="archive/onig-4.1.2.tar.gz">4.1.2</a> (2006/07/03) +<li> <a href="archive/onig-4.1.1.tar.gz">4.1.1</a> (2006/05/22) +<li> <a href="archive/onig-4.1.0.tar.gz">4.1.0</a> (2006/05/15) +<li> <a href="archive/onigd2_5_6.tar.gz">Latest release version 2.5.6</a> (2006/05/29) <a href="HISTORY_2X.txt">Change Log</a> +<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08) +<li> <a href="archive/onigd2_5_4.tar.gz">2.5.4</a> (2006/02/27) </ul> <br> <font color="red"> -* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br> -* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8. +* 4.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br> +* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.[2-4] </font> <br> <br> -<dt><b>Documents:</b> (version 3.7.1) +<dt><b>Documents:</b> (version 4.2.0) <ul> <li> <a href="doc/RE.txt">Regular Expressions</a> <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a> @@ -112,23 +108,37 @@ It follows the BSD license in the case of the one except for it. <li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version) <li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive) <li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a> -<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page) +<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page) +<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page) +<li> <a href="http://kmaebashi.com/">new script language crowbar</a> (Japanese page) +<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page) +<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page) <li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog) -<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a> -<li> <a href="http://www.artman21.net/">Jedit X</a> +<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a> +<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a> <li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page) +<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page) <li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page) -<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page) <li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page) -<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page) -<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page) -<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a> -<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a> -<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page) +<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page) +<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page) +<li> <a href="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a> +<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a> +<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page) +<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page) +<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page) <li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page) +<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page) +<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page) <li> <a href="http://www.ruby-lang.org/">Ruby</a> -<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page) -<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a> +<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page) +<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page) +<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page) +<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm</a> +<li> <a href="http://macromates.com/">TextMate (Mac OS X)</a> +<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a> +<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a> +<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a> </ul> <br> @@ -138,41 +148,42 @@ It follows the BSD license in the case of the one except for it. <li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a> <li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a> <li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a> -<li> <a href="http://www.pcre.org/">PCRE</a> -<!-- -<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page) -<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page) ---> -<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page) -<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page) <li> <a href="http://regex.info/">Mastering Regular Expressions</a> +<li> <a href="http://www.unicode.org/">Unicode Home Page</a> </ul> <br> -<!-- -<dt><b>ToDo:</b> +<dt><b>Resources:</b> <ul> -<li> support character types for all code point range. +<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page) +<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page) +<li> <a href="http://staff.aist.go.jp/tanaka-akira/textprocess/">"Text Processing" Lecture documents (Tanaka Akira)</a> (Japanese page) +<li> <a href="resource/JRC2006_panel.pdf">"Regex library in Ruby 1.9/2.0" Japan Ruby Conference 2006 (K.Kosako)</a> (Japanese) </ul> ---> + +<br> </dl> <p> and I'm thankful to Akinori MUSHA. </p> -<!-- <hr> -<font color="red"> -2004-06-14<br> -To: "Greg A. Woods"<br> -I can't send mail to you. (rejected)<br> -Please set the nmatch argument of regexec() to 1, -and use Oniguruma 3.7.1 or 2.4.2.<br> -The nmatch argument should be array size of a pmatch.<br> -But I don't know whether this problem is related to the crash -that you reported. -</font> ---> +<dl> +<dt><b>Other Libraries:</b> +<ul> +<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a> +<li> <a href="http://www.pcre.org/">PCRE</a> +<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a> +<li> <a href="http://re2c.org/">re2c</a> +<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a> +<li> <a href="http://laurikari.net/tre/">TRE</a> +<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a> +<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a> +<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a> +</ul> +</dl> + <hr> +<a href="../">Back to Home</a> </body> </html> diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h index 4c029304b6..d9b1419146 100644 --- a/ext/mbstring/oniguruma/onigcmpt200.h +++ b/ext/mbstring/oniguruma/onigcmpt200.h @@ -29,6 +29,12 @@ #define REGCODE_EUCJP REG_ENCODING_EUC_JP #define REGCODE_SJIS REG_ENCODING_SJIS +/* Don't use REGCODE_XXXX. (obsoleted) */ +#define MBCTYPE_ASCII RE_MBCTYPE_ASCII +#define MBCTYPE_EUC RE_MBCTYPE_EUC +#define MBCTYPE_SJIS RE_MBCTYPE_SJIS +#define MBCTYPE_UTF8 RE_MBCTYPE_UTF8 + typedef unsigned char* RegTransTableType; #define RegOptionType OnigOptionType #define RegDistance OnigDistance diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h index b203f6c8a3..3da9f235c2 100644 --- a/ext/mbstring/oniguruma/oniggnu.h +++ b/ext/mbstring/oniguruma/oniggnu.h @@ -35,10 +35,10 @@ extern "C" { #endif -#define MBCTYPE_ASCII 0 -#define MBCTYPE_EUC 1 -#define MBCTYPE_SJIS 2 -#define MBCTYPE_UTF8 3 +#define RE_MBCTYPE_ASCII 0 +#define RE_MBCTYPE_EUC 1 +#define RE_MBCTYPE_SJIS 2 +#define RE_MBCTYPE_UTF8 3 /* GNU regex options */ #ifndef RE_NREGS diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index 2790356108..ef8a49f7da 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,9 +36,9 @@ extern "C" { #endif #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 3 -#define ONIGURUMA_VERSION_MINOR 7 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_MAJOR 4 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -167,10 +167,10 @@ typedef struct { int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end); - int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs); - int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs); + int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs); + int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs); int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); - int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]); + int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]); OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); } OnigEncodingType; @@ -206,6 +206,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; +ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIG_ENCODING_ASCII (&OnigEncodingASCII) #define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) @@ -236,6 +237,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) #define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) #define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) +#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) #endif /* else RUBY && M17N */ @@ -448,7 +450,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); #define ONIG_NREGION 10 #define ONIG_MAX_BACKREF_NUM 1000 #define ONIG_MAX_REPEAT_NUM 100000 -#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 /* constants */ #define ONIG_MAX_ERROR_MESSAGE_LEN 90 @@ -457,8 +459,8 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE /* options */ -#define ONIG_OPTION_NONE 0 -#define ONIG_OPTION_IGNORECASE 1L +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U #define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) #define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) #define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) @@ -471,6 +473,7 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -484,6 +487,7 @@ typedef struct { OnigOptionType options; /* default option */ } OnigSyntaxType; +ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; @@ -491,9 +495,11 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; /* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) @@ -501,6 +507,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) /* default syntax */ @@ -508,80 +515,81 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax /* syntax (operators) */ -#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0) -#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */ -#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */ -#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3) -#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */ -#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5) -#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */ -#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7) -#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */ -#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */ -#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */ -#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */ -#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */ -#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */ -#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */ -#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */ -#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */ -#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */ -#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */ -#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */ -#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */ -#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */ -#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */ -#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */ -#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */ -#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */ -#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */ -#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */ -#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */ -#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */ -#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */ - -#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */ -#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */ -#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */ -#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */ -#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */ -#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */ -#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */ -#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */ -#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */ -#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */ -#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */ -#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ -#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ -#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */ -#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */ -#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ /* syntax (behavior) */ -#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ -#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */ -#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */ -#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */ -#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */ -#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */ -#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/ -#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ -#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ -#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */ -#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ /* syntax (behavior) in char class [...] */ -#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ -#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */ -#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22) -#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ /* syntax (behavior) warning */ -#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */ -#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ /* meta character specifiers (onig_set_meta_char()) */ #define ONIG_META_CHAR_ESCAPE 0 @@ -660,6 +668,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 /* errors related to thread */ #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 @@ -766,7 +775,13 @@ typedef struct re_pattern_buffer { /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */ -} regex_t; +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif typedef struct { @@ -788,19 +803,19 @@ void onig_set_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN -int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN -void onig_free P_((regex_t*)); +void onig_free P_((OnigRegex)); ONIG_EXTERN -int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); +int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN -int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); +int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN OnigRegion* onig_region_new P_((void)); ONIG_EXTERN @@ -816,29 +831,31 @@ int onig_region_resize P_((OnigRegion* region, int n)); ONIG_EXTERN int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); ONIG_EXTERN -int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); +int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); ONIG_EXTERN -int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); +int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); ONIG_EXTERN -int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg)); +int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); ONIG_EXTERN -int onig_number_of_names P_((regex_t* reg)); +int onig_number_of_names P_((OnigRegex reg)); ONIG_EXTERN -int onig_number_of_captures P_((regex_t* reg)); +int onig_number_of_captures P_((OnigRegex reg)); ONIG_EXTERN -int onig_number_of_capture_histories P_((regex_t* reg)); +int onig_number_of_capture_histories P_((OnigRegex reg)); ONIG_EXTERN OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); ONIG_EXTERN int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); ONIG_EXTERN -OnigEncoding onig_get_encoding P_((regex_t* reg)); +int onig_noname_group_capture_is_active P_((OnigRegex reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((OnigRegex reg)); ONIG_EXTERN -OnigOptionType onig_get_options P_((regex_t* reg)); +OnigOptionType onig_get_options P_((OnigRegex reg)); ONIG_EXTERN -OnigAmbigType onig_get_ambig_flag P_((regex_t* reg)); +OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg)); ONIG_EXTERN -OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); +OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); ONIG_EXTERN diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index a2315fcec5..db58be72f0 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1268,6 +1268,13 @@ compile_length_tree(Node* node, regex_t* reg) { BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif if (br->back_num == 1) { r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); @@ -1381,9 +1388,21 @@ compile_tree(Node* node, regex_t* reg) case N_BACKREF: { - int i; BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif if (br->back_num == 1) { n = br->back_static[0]; if (IS_IGNORECASE(reg->options)) { @@ -1405,17 +1424,19 @@ compile_tree(Node* node, regex_t* reg) } } else { + int i; int* p; if (IS_IGNORECASE(reg->options)) { - add_opcode(reg, OP_BACKREF_MULTI_IC); + r = add_opcode(reg, OP_BACKREF_MULTI_IC); } else { - add_opcode(reg, OP_BACKREF_MULTI); + r = add_opcode(reg, OP_BACKREF_MULTI); } - if (r) return r; - add_length(reg, br->back_num); + + add_bacref_mems: + r = add_length(reg, br->back_num); if (r) return r; p = BACKREFS_P(br); for (i = br->back_num - 1; i >= 0; i--) { @@ -2120,29 +2141,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len) return get_char_length_tree1(node, reg, len, 0); } -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_CCLASS_NOT(cc)) - return !found; - else - return found; -} - /* x is not included y ==> 1 : 0 */ static int is_not_included(Node* x, Node* y, regex_t* reg) @@ -2516,6 +2514,9 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) case N_QUALIFIER: r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head); + if (r == RECURSION_EXIST) { + if (NQUALIFIER(node).lower == 0) r = 0; + } break; case N_ANCHOR: @@ -2943,15 +2944,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg) return 0; } + +static int +divide_ambig_string_node_sub(regex_t* reg, int prev_ambig, + UChar* prev_start, UChar* prev, + UChar* end, Node*** tailp, Node** root) +{ + UChar *tmp, *wp; + Node* snode; + + if (prev_ambig != 0) { + tmp = prev_start; + wp = prev_start; + while (tmp < prev) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + snode = onig_node_new_str(prev_start, wp); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + NSTRING_SET_AMBIG(snode); + if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); + } + else { + snode = onig_node_new_str(prev_start, prev); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + + if (*tailp == (Node** )0) { + *root = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY); + *tailp = &(NCONS(*root).right); + } + else { + **tailp = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY); + *tailp = &(NCONS(**tailp).right); + } + + return 0; +} + static int divide_ambig_string_node(Node* node, regex_t* reg) { StrNode* sn = &NSTRING(node); int ambig, prev_ambig; UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; - Node *snode; Node *root = NULL_NODE; Node **tailp = (Node** )0; + int r; start = prev_start = p = sn->s; end = sn->end; @@ -2964,33 +3005,9 @@ divide_ambig_string_node(Node* node, regex_t* reg) if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end))) { - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < prev) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); - } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); - } - else { - snode = onig_node_new_str(prev_start, prev); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - } - - if (tailp == (Node** )0) { - root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); - tailp = &(NCONS(root).right); - } - else { - *tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); - tailp = &(NCONS(*tailp).right); - } + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev, + end, &tailp, &root); + if (r != 0) return r; prev_ambig = ambig; prev_start = prev; @@ -3011,33 +3028,9 @@ divide_ambig_string_node(Node* node, regex_t* reg) } } else { - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < end) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); - } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode); - } - else { - snode = onig_node_new_str(prev_start, end); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - } - - if (tailp == (Node** )0) { - root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); - tailp = &(NCONS(node).right); - } - else { - *tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); - tailp = &(NCONS(*tailp).right); - } + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end, + end, &tailp, &root); + if (r != 0) return r; swap_node(node, root); onig_node_str_clear(root); /* should be after swap! */ @@ -3116,6 +3109,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } @@ -3263,11 +3261,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_EFFECT_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) - /* can't allow all anchors, because \G in look-behind through Search(). - ex. /(?<=\G)zz/.match("azz") => success. */ +( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) case ANCHOR_LOOK_BEHIND: { @@ -3383,7 +3379,7 @@ typedef struct { static int map_position_value(OnigEncoding enc, int i) { - static short int ByteValTable[] = { + static const short int ByteValTable[] = { 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, @@ -3408,7 +3404,7 @@ static int distance_value(MinMaxLen* mm) { /* 1000 / (min-max-dist + 1) */ - static short int dist_vals[] = { + static const short int dist_vals[] = { 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, @@ -3711,7 +3707,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) static void clear_opt_map_info(OptMapInfo* map) { - static OptMapInfo clean_info = { + static const OptMapInfo clean_info = { {0, 0}, {0, 0}, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -3758,8 +3754,8 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, int i, j, n, len; UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; OnigCodePoint code, ccode; - OnigCompAmbigCodes* ccs; - OnigPairAmbigCodes* pccs; + const OnigCompAmbigCodes* ccs; + const OnigPairAmbigCodes* pccs; OnigAmbigType amb; add_char_opt_map_info(map, p[0], enc); @@ -4197,8 +4193,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { - if (IS_POSIXLINE(env->options)) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL); + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); } @@ -4316,10 +4312,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); reg->exact_end = reg->exact + e->len; - if (e->anc.left_anchor & ANCHOR_BEGIN_LINE) - allow_reverse = 1; - else - allow_reverse = + allow_reverse = ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { @@ -4391,7 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r) return r; reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL); + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); @@ -4503,7 +4496,7 @@ print_anchor(FILE* f, int anchor) q = 1; fprintf(f, "anychar-star"); } - if (anchor & ANCHOR_ANYCHAR_STAR_PL) { + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { if (q) fprintf(f, ", "); fprintf(f, "anychar-star-pl"); } @@ -4514,8 +4507,8 @@ print_anchor(FILE* f, int anchor) static void print_optimize_info(FILE* f, regex_t* reg) { - static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; fprintf(f, "optimize: %s\n", on[reg->optimize]); fprintf(f, " anchor: "); print_anchor(f, reg->anchor); @@ -4624,7 +4617,6 @@ onig_chain_reduce(regex_t* reg) { regex_t *head, *prev; - THREAD_ATOMIC_START; prev = reg; head = prev->chain; if (IS_NOT_NULL(head)) { @@ -4636,7 +4628,6 @@ onig_chain_reduce(regex_t* reg) prev->chain = (regex_t* )NULL; REGEX_TRANSFER(reg, head); } - THREAD_ATOMIC_END; } #if 0 @@ -4875,6 +4866,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, return r; } +#ifdef USE_RECOMPILE_API extern int onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, @@ -4893,6 +4885,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } return 0; } +#endif static int onig_inited = 0; @@ -4906,6 +4899,11 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + *reg = (regex_t* )xmalloc(sizeof(regex_t)); if (IS_NULL(*reg)) return ONIGERR_MEMORY; (*reg)->state = ONIG_STATE_MODIFY; @@ -4991,14 +4989,14 @@ onig_end() onig_print_statistics(stderr); #endif -#ifdef USE_RECYCLE_NODE - onig_free_node_list(); -#endif - #ifdef USE_SHARED_CCLASS_TABLE onig_free_shared_cclass_table(); #endif +#ifdef USE_RECYCLE_NODE + onig_free_node_list(); +#endif + onig_inited = 0; THREAD_ATOMIC_END; @@ -5052,35 +5050,36 @@ OnigOpInfoType OnigOpInfo[] = { { OP_END_LINE, "end-line", ARG_NON }, { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF3, "backref3", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF3, "backref3", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, { OP_PUSH_POS, "push-pos", ARG_NON }, @@ -5312,6 +5311,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; + case OP_BACKREF_AT_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + case OP_REPEAT: case OP_REPEAT_NG: { diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c index a767ca60b6..bbbf1a2f94 100644 --- a/ext/mbstring/oniguruma/regenc.c +++ b/ext/mbstring/oniguruma/regenc.c @@ -175,7 +175,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) #define USE_APPLICATION_TO_LOWER_CASE_TABLE -unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -251,7 +251,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = { #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ #ifdef USE_UPPER_CASE_TABLE -UChar OnigEncAsciiToUpperCaseTable[256] = { +const UChar OnigEncAsciiToUpperCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -287,7 +287,7 @@ UChar OnigEncAsciiToUpperCaseTable[256] = { }; #endif -unsigned short OnigEncAsciiCtypeTable[256] = { +const unsigned short OnigEncAsciiCtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -323,7 +323,7 @@ unsigned short OnigEncAsciiCtypeTable[256] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; -UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -359,7 +359,7 @@ UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { }; #ifdef USE_UPPER_CASE_TABLE -UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -417,7 +417,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); } -OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { +const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { { 0x41, 0x61 }, { 0x42, 0x62 }, { 0x43, 0x63 }, @@ -475,7 +475,7 @@ OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { extern int onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { *ccs = OnigAsciiPairAmbigCodes; @@ -488,16 +488,16 @@ onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, extern int onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag, - OnigCompAmbigCodes** ccs) + const OnigCompAmbigCodes** ccs) { return 0; } extern int onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, @@ -577,9 +577,9 @@ onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, extern int onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, - OnigCompAmbigCodes** ccs) + const OnigCompAmbigCodes** ccs) { - static OnigCompAmbigCodes folds[] = { + static const OnigCompAmbigCodes folds[] = { { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } } }; @@ -593,7 +593,7 @@ onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, extern int onigenc_not_support_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { return ONIG_NO_SUPPORT_CONFIG; } @@ -830,10 +830,10 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) if ((code & 0xff000000) != 0) { *p++ = (UChar )((code >> 24) & 0xff); } - if ((code & 0xff0000) != 0) { + if ((code & 0xff0000) != 0 || p != buf) { *p++ = (UChar )((code >> 16) & 0xff); } - if ((code & 0xff00) != 0) { + if ((code & 0xff00) != 0 || p != buf) { *p++ = (UChar )((code >> 8) & 0xff); } *p++ = (UChar )(code & 0xff); @@ -849,40 +849,32 @@ extern int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } extern int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } extern int diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h index 510455146e..58ee3e7f22 100644 --- a/ext/mbstring/oniguruma/regenc.h +++ b/ext/mbstring/oniguruma/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -65,15 +65,17 @@ #else /* ONIG_RUBY_M17N */ #define USE_UNICODE_FULL_RANGE_CTYPE +/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); /* methods for single byte encoding */ @@ -105,7 +107,7 @@ ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** code /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ @@ -115,10 +117,10 @@ ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoin #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0) -ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; -ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; +ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; #endif /* is not ONIG_RUBY_M17N */ @@ -133,7 +135,7 @@ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index 560b5e12c5..811ca2b012 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #define va_init_list(a,b) va_start(a) #endif -extern char* +extern UChar* onig_error_code_to_format(int code) { char *p; - if (code >= 0) return (char* )0; + if (code >= 0) return (UChar* )0; switch (code) { case ONIG_MISMATCH: @@ -170,6 +170,8 @@ onig_error_code_to_format(int code) p = "invalid character property name {%n}"; break; case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: p = "over thread pass limit count"; break; @@ -177,7 +179,7 @@ onig_error_code_to_format(int code) p = "undefined error code"; break; } - return p; + return (UChar* )p; } @@ -256,36 +258,36 @@ onig_error_code_to_str(s, code, va_alist) void #ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc, - char* pat, char* pat_end, char *fmt, ...) +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) #else onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) - char buf[]; + UChar buf[]; int bufsize; OnigEncoding enc; - char* pat; - char* pat_end; - const char *fmt; + UChar* pat; + UChar* pat_end; + const UChar *fmt; va_dcl #endif { int n, need, len; UChar *p, *s, *bp; - char bs[6]; + UChar bs[6]; va_list args; - va_init_list(args, fmt); - n = vsnprintf(buf, bufsize, fmt, args); + va_init_list(args, (const char* )fmt); + n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); need = (pat_end - pat) * 4 + 4; if (n + need < bufsize) { - strcat(buf, ": /"); + strcat((char* )buf, ": /"); s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); p = pat; - while (p < (UChar* )pat_end) { + while (p < pat_end) { if (*p == MC_ESC(enc)) { *s++ = *p++; len = enc_len(enc, p); @@ -304,7 +306,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) int blen; while (len-- > 0) { - sprintf(bs, "\\%03o", *p++ & 0377); + sprintf((char* )bs, "\\%03o", *p++ & 0377); blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (blen-- > 0) *s++ = *bp++; @@ -313,7 +315,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprintf(bs, "\\%03o", *p++ & 0377); + sprintf((char* )bs, "\\%03o", *p++ & 0377); len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (len-- > 0) *s++ = *bp++; diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 25d97773fb..7a1a35a0e0 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +29,12 @@ #include "regint.h" +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) +#endif + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -354,7 +360,8 @@ typedef struct _StackType { /* stack type check mask */ #define STK_MASK_POP_USED 0x00ff #define IS_TO_VOID_TARGET(stk) \ - (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START) + (((stk)->type & STK_MASK_POP_USED) || \ + (stk)->type == STK_NULL_CHECK_START || (stk)->type == STK_NULL_CHECK_END) typedef struct { void* stack_p; @@ -603,15 +610,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p) \ - if ((p) < stk_base) goto stack_error; +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } #else -#define STACK_BASE_CHECK(p) +#define STACK_BASE_CHECK(p, at) #endif #define STACK_POP_ONE do {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) #define STACK_POP do {\ @@ -619,14 +629,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, case STACK_POP_LEVEL_FREE:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -637,7 +647,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, default:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -658,7 +668,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_POP_TIL_POS_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ if (stk->type == STK_POS_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -677,7 +687,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ if (stk->type == STK_LOOK_BEHIND_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -697,7 +707,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -712,7 +722,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType *k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -727,7 +737,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ (isnull) = (k->u.null_check.pstr == (s));\ @@ -742,7 +752,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -762,7 +772,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (k->u.null_check.pstr != (s)) {\ @@ -802,7 +812,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -850,7 +860,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ if (k->u.repeat.num == (id)) {\ @@ -868,7 +878,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ if (k->type == STK_CALL_FRAME) {\ if (level == 0) {\ (addr) = k->u.call_frame.ret_addr;\ @@ -988,6 +998,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } #endif +#ifdef USE_BACKREF_AT_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , StackType* top, StackType* stk_base + , int ignore_case, int ambig_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, ambig_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_AT_LEVEL */ + + #ifdef RUBY_PLATFORM typedef struct { @@ -1003,7 +1084,7 @@ trap_ensure(VALUE arg) TrapEnsureArg* ta = (TrapEnsureArg* )arg; if (ta->state == 0) { /* trap_exec() is not normal return */ - ONIG_STATE_DEC(ta->reg); + ONIG_STATE_DEC_THREAD(ta->reg); if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) xfree(ta->stk_base); @@ -1165,27 +1246,43 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code) } static int -code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) { - unsigned int in_cc; - CClassNode* cc = (CClassNode* )node; + int found; - if (enclen == 1) { - in_cc = BITSET_AT(cc->bs, code); + if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } } else { - UChar* p = ((BBuf* )(cc->mbuf))->p; - in_cc = onig_is_in_code_range(p, code); + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); } - if (IS_CCLASS_NOT(cc)) { - return (in_cc ? 0 : 1); + if (IS_CCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; } else { - return (in_cc ? 1 : 0); + len = ONIGENC_CODE_TO_MBCLEN(enc, code); } + return is_code_in_cc(len, code, cc); } + /* matching region of POSIX API */ typedef int regoff_t; @@ -1739,8 +1836,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, mb_len = enc_len(encode, s); ss = s; s += mb_len; + DATA_ENSURE(0); code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; + if (is_code_in_cc(mb_len, code, node) == 0) goto fail; } STAT_OP_OUT; break; @@ -1946,6 +2044,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STAT_OP_OUT; continue; } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + STAT_OP_OUT; + continue; + } +#endif goto fail; break; @@ -1966,6 +2070,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STAT_OP_OUT; continue; } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enc_len(encode, s); + if (ON_STR_END(ss + enc_len(encode, ss))) { + STAT_OP_OUT; + continue; + } + } +#endif goto fail; break; @@ -2188,6 +2301,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, continue; } break; + +#ifdef USE_BACKREF_AT_LEVEL + case OP_BACKREF_AT_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag + , (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + STAT_OP_OUT; + continue; + } + + break; +#endif case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); @@ -2915,7 +3057,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On UChar *prev; MatchArg msa; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -2924,15 +3068,19 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); @@ -2952,7 +3100,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); return r; } @@ -3029,7 +3177,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; } - else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) goto retry_gate; break; } @@ -3132,7 +3284,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + prev = onigenc_get_prev_char_head(reg->enc, str, p); if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; @@ -3149,7 +3301,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, goto retry; } } - else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) { + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; @@ -3188,7 +3344,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, UChar *s, *prev; MatchArg msa; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -3197,15 +3355,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3305,12 +3467,12 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { goto begin_position; } } else if (str == end) { /* empty string */ - static const UChar* address_for_empty_string = ""; + static const UChar* address_for_empty_string = (UChar* )""; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search: empty string.\n"); @@ -3398,7 +3560,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_AND_RETURN_CHECK; prev = s; s += enc_len(reg->enc, s); - } while (s <= range); /* exec s == range, because empty match with /$/. */ + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK; + } } else { /* backward search */ if (reg->optimize != ONIG_OPTIMIZE_NONE) { @@ -3461,7 +3627,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, finish: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3482,7 +3648,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3490,7 +3656,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, return r; match: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); MATCH_ARG_FREE(msa); return s - str; } diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c index 6839708be7..f5ad1f35a2 100755 --- a/ext/mbstring/oniguruma/regext.c +++ b/ext/mbstring/oniguruma/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -194,6 +194,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, return r; } +#ifdef USE_RECOMPILE_API extern int onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) @@ -211,3 +212,4 @@ onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_e } return 0; } +#endif diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index 70e8582ff2..248957c9d9 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -93,6 +93,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) return r; } +#ifdef USE_RECOMPILE_API extern int re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) { @@ -113,6 +114,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) } return r; } +#endif extern void re_free_pattern(regex_t* reg) @@ -151,16 +153,16 @@ re_mbcinit(int mb_code) OnigEncoding enc; switch (mb_code) { - case MBCTYPE_ASCII: + case RE_MBCTYPE_ASCII: enc = ONIG_ENCODING_ASCII; break; - case MBCTYPE_EUC: + case RE_MBCTYPE_EUC: enc = ONIG_ENCODING_EUC_JP; break; - case MBCTYPE_SJIS: + case RE_MBCTYPE_SJIS: enc = ONIG_ENCODING_SJIS; break; - case MBCTYPE_UTF8: + case RE_MBCTYPE_UTF8: enc = ONIG_ENCODING_UTF8; break; default: diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index 2bd514b7c3..4c4341c616 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,9 +59,16 @@ /* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL +#define USE_BACKREF_AT_LEVEL #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* treat \r\n as line terminator. + !!! NO SUPPORT !!! + use this configuration on your own responsibility */ +/* #define USE_CRNL_AS_LINE_TERMINATOR */ + /* internal config */ #define USE_RECYCLE_NODE #define USE_OP_PUSH_OR_JUMP_EXACT @@ -105,8 +112,8 @@ }\ } while (0) -#define DEFAULT_WARN_FUNCTION rb_warn -#define DEFAULT_VERB_WARN_FUNCTION rb_warning +#define DEFAULT_WARN_FUNCTION onig_rb_warn +#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning #endif /* else NOT_RUBY */ @@ -124,13 +131,26 @@ #endif -#ifdef USE_MULTI_THREAD_SYSTEM -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) #else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#endif /* USE_MULTI_THREAD_SYSTEM */ +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #define onig_st_is_member st_is_member @@ -518,7 +538,7 @@ typedef struct _BBuf { #define ANCHOR_LOOK_BEHIND_NOT (1<<13) #define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ /* operation code */ enum OpCode { @@ -579,6 +599,7 @@ enum OpCode { OP_BACKREFN_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, + OP_BACKREF_AT_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -721,6 +742,11 @@ typedef void* PointerType; #define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time #define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime +#define IS_MC_ESC_CODE(code, enc, syn) \ + ((code) == MC_ESC(enc) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + #define SYN_POSIX_COMMON_OP \ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ ONIG_SYN_OP_DECIMAL_BACKREF | \ @@ -781,13 +807,14 @@ extern void onig_print_statistics P_((FILE* f)); #endif #endif -extern char* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...)); +extern UChar* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); #endif /* REGINT_H */ diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 58e122f486..d70dbb6c3b 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,20 @@ OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; extern void onig_null_warn(const char* s) { } +#ifdef RUBY_PLATFORM +extern void +onig_rb_warn(const char* s) +{ + rb_warn(s); +} + +extern void +onig_rb_warning(const char* s) +{ + rb_warning(s); +} +#endif + #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; #else @@ -305,6 +319,88 @@ typedef struct { #include "st.h" +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); + +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, +}; + +static st_table* +onig_st_init_strend_table_with_size(int size) +{ + return onig_st_init_table_with_size(&type_strend_hash, size); +} + +static int +onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) +{ + st_strend_key* key; + int result; + + key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +static int +strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -350,8 +446,10 @@ onig_print_names(FILE* fp, regex_t* reg) static int i_free_name_entry(UChar* key, NameEntry* e, void* arg) { - xfree(e->name); /* == key */ + xfree(e->name); if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); return ST_DELETE; } @@ -801,6 +899,23 @@ onig_number_of_names(regex_t* reg) } #endif /* else USE_NAMED_GROUP */ +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 @@ -970,13 +1085,13 @@ onig_free_node_list() { FreeNode* n; - THREAD_ATOMIC_START; - while (FreeNodeList) { + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { n = FreeNodeList; FreeNodeList = FreeNodeList->next; xfree(n); } - THREAD_ATOMIC_END; + /* THREAD_ATOMIC_END; */ return 0; } #endif @@ -987,13 +1102,14 @@ node_new() Node* node; #ifdef USE_RECYCLE_NODE + THREAD_ATOMIC_START; if (IS_NOT_NULL(FreeNodeList)) { - THREAD_ATOMIC_START; node = (Node* )FreeNodeList; FreeNodeList = FreeNodeList->next; THREAD_ATOMIC_END; return node; } + THREAD_ATOMIC_END; #endif node = (Node* )xmalloc(sizeof(Node)); @@ -1020,9 +1136,9 @@ node_new_cclass() return node; } -extern Node* +static Node* node_new_cclass_by_codepoint_range(int not, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { CClassNode* cc; int n, i, j; @@ -1128,7 +1244,11 @@ onig_node_new_anchor(int type) } static Node* -node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_AT_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) { int i; Node* node = node_new(); @@ -1141,6 +1261,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) if (by_name != 0) NBACKREF(node).state |= NST_NAME_REF; +#ifdef USE_BACKREF_AT_LEVEL + if (exist_level != 0) { + NBACKREF(node).state |= NST_NEST_LEVEL; + NBACKREF(node).nest_level = nest_level; + } +#endif + for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { @@ -2125,6 +2252,10 @@ typedef struct { int ref1; int* refs; int by_name; +#ifdef USE_BACKREF_AT_LEVEL + int exist_level; + int level; /* \k<name+n> */ +#endif } backref; struct { UChar* name; @@ -2274,15 +2405,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) control: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); - if (c == MC_ESC(enc)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - else if (c == '?') + if (c == '?') { c = 0177; - else + } + else { + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } c &= 0x9f; + } break; } /* fall through */ @@ -2302,6 +2435,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); #ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_AT_LEVEL +/* + \k<name+n>, \k<name-n> +*/ +static int +fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end + , ScanEnv* env, int* level) +{ + int r, exist_level = 0; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')' || c == '+' || c == '-') break; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (c != '>') { + if (c == '+' || c == '-') { + int num; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + *level = (num * flag); + exist_level = 1; + + PFETCH(c); + if (c == '>') + goto first_check; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + first_check: + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_AT_LEVEL */ + /* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) @@ -2428,11 +2644,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "character class has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2442,11 +2658,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c) if (onig_warn == onig_null_warn) return ; if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, (env)->pattern, (env)->pattern_end, - "regular expression has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2537,6 +2753,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CHAR; tok->base = 0; tok->u.c = c; + tok->escaped = 0; + if (c == ']') { tok->type = TK_CC_CLOSE; } @@ -2708,7 +2926,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CC_CC_OPEN; } else { - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); } } } @@ -2747,7 +2965,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->backp = p; PFETCH(c); - if (c == MC_ESC(enc)) { + if (IS_MC_ESC_CODE(c, enc, syn)) { if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; @@ -3012,6 +3230,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level = 0; +#endif break; } @@ -3050,8 +3271,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int* backs; prev = p; + +#ifdef USE_BACKREF_AT_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else r = fetch_name(&p, end, &name_end, env, 1); +#endif if (r < 0) return r; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { onig_scan_env_set_error_string(env, @@ -3170,13 +3400,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) switch (c) { case '.': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS any_char: +#endif tok->type = TK_ANYCHAR; break; case '*': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS anytime: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3185,7 +3419,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '+': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS one_or_more_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3194,7 +3430,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '?': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS zero_or_one_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; @@ -3271,7 +3509,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CCEND_ESC_WARN(env, "]"); + CCEND_ESC_WARN(env, (UChar* )"]"); break; case '#': @@ -3297,14 +3535,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } } +#ifdef USE_VARIABLE_META_CHARS out: +#endif *src = p; return tok->type; } static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; @@ -3368,7 +3608,7 @@ static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { int c, r; - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; OnigEncoding enc = env->enc; r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); @@ -3506,19 +3746,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) #define POSIX_BRACKET_NAME_MAX_LEN 6 static PosixBracketEntryType PBS[] = { - { "alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "blank", ONIGENC_CTYPE_BLANK, 5 }, - { "cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "lower", ONIGENC_CTYPE_LOWER, 5 }, - { "print", ONIGENC_CTYPE_PRINT, 5 }, - { "punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "space", ONIGENC_CTYPE_SPACE, 5 }, - { "upper", ONIGENC_CTYPE_UPPER, 5 }, - { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */ + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3542,7 +3782,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0) + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; r = add_ctype_to_cc(cc, pb->ctype, not, env); @@ -3577,19 +3817,19 @@ static int property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) { static PosixBracketEntryType PBS[] = { - { "Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "Blank", ONIGENC_CTYPE_BLANK, 5 }, - { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "Lower", ONIGENC_CTYPE_LOWER, 5 }, - { "Print", ONIGENC_CTYPE_PRINT, 5 }, - { "Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "Space", ONIGENC_CTYPE_SPACE, 5 }, - { "Upper", ONIGENC_CTYPE_UPPER, 5 }, - { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3839,7 +4079,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, *src, env->pattern_end, 1, env->enc)) return ONIGERR_EMPTY_CHAR_CLASS; - CC_ESC_WARN(env, "]"); + CC_ESC_WARN(env, (UChar* )"]"); r = tok->type = TK_CHAR; /* allow []...] */ } @@ -3942,7 +4182,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_posix_bracket(cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); p = tok->backp; v = (OnigCodePoint )tok->u.c; in_israw = 0; @@ -3988,7 +4228,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto val_entry; } else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } state = CCS_RANGE; @@ -4003,12 +4243,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; /* [--x] or [a&&-x] is warned. */ if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto val_entry; } else if (state == CCS_RANGE) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [!--x] is allowed */ } else { /* CCS_COMPLETE */ @@ -4017,12 +4257,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; @@ -4326,10 +4566,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } else { -#ifdef USE_NAMED_GROUP if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; -#endif + *np = node_new_effect_memory(env->option, 0); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); num = scan_env_add_mem_entry(env); @@ -4358,11 +4597,11 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return 0; } -static char* PopularQStr[] = { +static const char* PopularQStr[] = { "?", "*", "+", "??", "*?", "+?" }; -static char* ReduceQStr[] = { +static const char* ReduceQStr[] = { "", "", "*", "*?", "??", "+ and ??", "+? and ?" }; @@ -4399,7 +4638,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) if (qn->by_number == 0 && qnt->by_number == 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { int nestq_num, targetq_num; - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; nestq_num = popular_qualifier_num(qn); targetq_num = popular_qualifier_num(qnt); @@ -4411,9 +4650,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) case RQ_DEL: if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - "redundant nested repeat operator"); - (*onig_verb_warn)(buf); + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4422,10 +4661,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "nested repeat operator %s and %s was replaced with '%s'", + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", PopularQStr[targetq_num], PopularQStr[nestq_num], ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)(buf); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4457,8 +4696,8 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, int r, i, j, k, clen, len, ncode, n; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; Node **ptail, *snode = NULL_NODE; - OnigCompAmbigCodes* ccs; - OnigCompAmbigCodeItem* ci; + const OnigCompAmbigCodes* ccs; + const OnigCompAmbigCodeItem* ci; OnigAmbigType amb; n = 0; @@ -4546,27 +4785,9 @@ static int type_cclass_hash(type_cclass_key* key) return val + (val >> 5); } -static int type_cclass_key_free(st_data_t x) -{ - xfree((void* )x); - return 0; -} - -static st_data_t type_cclass_key_clone(st_data_t x) -{ - type_cclass_key* new_key; - type_cclass_key* key = (type_cclass_key* )x; - - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - *new_key = *key; - return (st_data_t )new_key; -} - static struct st_hash_type type_type_cclass_hash = { type_cclass_cmp, type_cclass_hash, - type_cclass_key_free, - type_cclass_key_clone }; static st_table* OnigTypeCClassTable; @@ -4580,6 +4801,8 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg) if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); xfree(node); } + + if (IS_NOT_NULL(key)) xfree(key); return ST_DELETE; } @@ -4588,6 +4811,8 @@ onig_free_shared_cclass_table() { if (IS_NOT_NULL(OnigTypeCClassTable)) { onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + xfree(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; } return 0; @@ -4741,7 +4966,7 @@ parse_exp(Node** np, OnigToken* tok, int term, int ctype, not; #ifdef USE_SHARED_CCLASS_TABLE - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); @@ -4823,7 +5048,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (IS_IGNORECASE(env->option)) { int i, n, in_cc; - OnigPairAmbigCodes* ccs; + const OnigPairAmbigCodes* ccs; BitSetRef bs = cc->bs; OnigAmbigType amb; @@ -4892,8 +5117,13 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, env); + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); break; diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index 1a4ac7dea2..0958c909bf 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,7 @@ #define CTYPE_XDIGIT (1<<6) #define CTYPE_NOT_XDIGIT (1<<7) -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL) +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) #define EFFECT_MEMORY (1<<0) @@ -76,7 +76,7 @@ #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 7 +#define NODE_BACKREFS_SIZE 6 #define NSTR_RAW (1<<0) /* by backslashed number */ #define NSTR_AMBIG (1<<1) @@ -145,6 +145,7 @@ typedef struct { #define NST_NAMED_GROUP (1<<10) #define NST_NAME_REF (1<<11) #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) @@ -165,6 +166,7 @@ typedef struct { #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) #define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) typedef struct { @@ -212,6 +214,7 @@ typedef struct { int back_num; int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; + int nest_level; } BackrefNode; typedef struct { @@ -290,7 +293,6 @@ typedef struct { extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); #endif -extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index 34cbeb9a46..fa7b5c4b24 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -55,7 +55,7 @@ typedef struct { static int onig2posix_error_code(int code) { - static O2PERR o2p[] = { + static const O2PERR o2p[] = { { ONIG_MISMATCH, REG_NOMATCH }, { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, { ONIGERR_MEMORY, REG_ESPACE }, diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c index a0f36b8c33..9114e39e6b 100644 --- a/ext/mbstring/oniguruma/regsyntax.c +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +29,13 @@ #include "regint.h" +OnigSyntaxType OnigSyntaxASIS = { + 0 + , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE + , 0 + , ONIG_OPTION_NONE +}; + OnigSyntaxType OnigSyntaxPosixBasic = { ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_BRACE_INTERVAL ) @@ -63,7 +70,7 @@ OnigSyntaxType OnigSyntaxEmacs = { OnigSyntaxType OnigSyntaxGrep = { ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | @@ -110,6 +117,28 @@ OnigSyntaxType OnigSyntaxPerl = { , ONIG_OPTION_SINGLELINE }; +/* Perl + named group */ +OnigSyntaxType OnigSyntaxPerl_NG = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , ONIG_OPTION_SINGLELINE +}; + + extern int onig_set_default_syntax(OnigSyntaxType* syntax) diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c index 5f15c10e65..5fad0cc18c 100644 --- a/ext/mbstring/oniguruma/regversion.c +++ b/ext/mbstring/oniguruma/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ onig_copyright(void) { static char s[58]; - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako", + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c index 65c2cc58bd..2324da2635 100644 --- a/ext/mbstring/oniguruma/st.c +++ b/ext/mbstring/oniguruma/st.c @@ -56,8 +56,6 @@ static int numhash(long); static struct st_hash_type type_numhash = { numcmp, numhash, - st_nothing_key_free, - st_nothing_key_clone }; /* extern int strcmp(const char *, const char *); */ @@ -65,20 +63,6 @@ static int strhash(const char *); static struct st_hash_type type_strhash = { strcmp, strhash, - st_nothing_key_free, - st_nothing_key_clone -}; - -static int strend_cmp(st_strend_key*, st_strend_key*); -static int strend_hash(st_strend_key*); -static int strend_key_free(st_data_t key); -static st_data_t strend_key_clone(st_data_t x); - -static struct st_hash_type type_strend_hash = { - strend_cmp, - strend_hash, - strend_key_free, - strend_key_clone }; static void rehash(st_table *); @@ -100,7 +84,7 @@ static void rehash(st_table *); /* Table of prime numbers 2^n+a, 2<=n<=30. */ -static long primes[] = { +static const long primes[] = { 8 + 3, 16 + 3, 32 + 5, @@ -228,13 +212,6 @@ st_init_strtable_with_size(size) return st_init_table_with_size(&type_strhash, size); } -st_table* -st_init_strend_table_with_size(size) - int size; -{ - return st_init_table_with_size(&type_strend_hash, size); -} - void st_free_table(table) st_table *table; @@ -246,7 +223,6 @@ st_free_table(table) ptr = table->bins[i]; while (ptr != 0) { next = ptr->next; - table->type->key_free(ptr->key); free(ptr); ptr = next; } @@ -297,21 +273,6 @@ st_lookup(table, key, value) } } -int -st_lookup_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t *value; -{ - st_strend_key key; - - key.s = (unsigned char* )str_key; - key.end = (unsigned char* )end_key; - - return st_lookup(table, (st_data_t )(&key), value); -} - #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ do {\ st_table_entry *entry;\ @@ -352,22 +313,6 @@ st_insert(table, key, value) } } -int -st_insert_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t value; -{ - st_strend_key* key; - - key = alloc(st_strend_key); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; - - return st_insert(table, (st_data_t )key, value); -} - void st_add_direct(table, key, value) st_table *table; @@ -381,21 +326,6 @@ st_add_direct(table, key, value) ADD_DIRECT(table, key, value, hash_val, bin_pos); } -void -st_add_direct_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t value; -{ - st_strend_key* key; - - key = alloc(st_strend_key); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; - st_add_direct(table, (st_data_t )key, value); -} - static void rehash(table) register st_table *table; @@ -455,7 +385,6 @@ st_copy(old_table) return 0; } *entry = *ptr; - entry->key = old_table->type->key_clone(ptr->key); entry->next = new_table->bins[i]; new_table->bins[i] = entry; ptr = ptr->next; @@ -556,7 +485,7 @@ st_cleanup_safe(table, never) table->num_entries = num_entries; } -void +int st_foreach(table, func, arg) st_table *table; int (*func)(); @@ -569,7 +498,7 @@ st_foreach(table, func, arg) for(i = 0; i < table->num_bins; i++) { last = 0; for(ptr = table->bins[i]; ptr != 0;) { - retval = (*func)(ptr->key, ptr->record, arg, 0); + retval = (*func)(ptr->key, ptr->record, arg); switch (retval) { case ST_CHECK: /* check if hash is modified during iteration */ tmp = 0; @@ -580,8 +509,7 @@ st_foreach(table, func, arg) } if (!tmp) { /* call func with error notice */ - retval = (*func)(0, 0, arg, 1); - return; + return 1; } /* fall through */ case ST_CONTINUE: @@ -589,7 +517,7 @@ st_foreach(table, func, arg) ptr = ptr->next; break; case ST_STOP: - return; + return 0; case ST_DELETE: tmp = ptr; if (last == 0) { @@ -599,12 +527,12 @@ st_foreach(table, func, arg) last->next = ptr->next; } ptr = ptr->next; - table->type->key_free(tmp->key); free(tmp); table->num_entries--; } } } + return 0; } static int @@ -659,59 +587,3 @@ numhash(n) { return n; } - -extern int -st_nothing_key_free(st_data_t key) { return 0; } - -extern st_data_t -st_nothing_key_clone(st_data_t x) { return x; } - -static int strend_cmp(st_strend_key* x, st_strend_key* y) -{ - unsigned char *p, *q; - int c; - - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static int strend_hash(st_strend_key* x) -{ - int val; - unsigned char *p; - - val = 0; - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} - -static int strend_key_free(st_data_t x) -{ - xfree((void* )x); - return 0; -} - -static st_data_t strend_key_clone(st_data_t x) -{ - st_strend_key* new_key; - st_strend_key* key = (st_strend_key* )x; - - new_key = alloc(st_strend_key); - *new_key = *key; - return (st_data_t )new_key; -} diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h index c5cc4e625e..da65e7fef8 100644 --- a/ext/mbstring/oniguruma/st.h +++ b/ext/mbstring/oniguruma/st.h @@ -14,8 +14,6 @@ typedef struct st_table st_table; struct st_hash_type { int (*compare)(); int (*hash)(); - int (*key_free)(); - st_data_t (*key_clone)(); }; struct st_table { @@ -25,11 +23,6 @@ struct st_table { struct st_table_entry **bins; }; -typedef struct { - unsigned char* s; - unsigned char* end; -} st_strend_key; - #define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; @@ -51,23 +44,16 @@ st_table *st_init_numtable _((void)); st_table *st_init_numtable_with_size _((int)); st_table *st_init_strtable _((void)); st_table *st_init_strtable_with_size _((int)); -st_table *st_init_strend_table_with_size _((int)); int st_delete _((st_table *, st_data_t *, st_data_t *)); int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); int st_insert _((st_table *, st_data_t, st_data_t)); -int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t)); int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*)); -void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); void st_add_direct _((st_table *, st_data_t, st_data_t)); -void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t)); void st_free_table _((st_table *)); void st_cleanup_safe _((st_table *, st_data_t)); st_table *st_copy _((st_table *)); -extern st_data_t st_nothing_key_clone _((st_data_t key)); -extern int st_nothing_key_free _((st_data_t key)); - #define ST_NUMCMP ((int (*)()) 0) #define ST_NUMHASH ((int (*)()) -2) |