diff options
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/pcre/pcre_internal.h')
-rw-r--r-- | src/3rdparty/javascriptcore/JavaScriptCore/pcre/pcre_internal.h | 455 |
1 files changed, 0 insertions, 455 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/pcre/pcre_internal.h b/src/3rdparty/javascriptcore/JavaScriptCore/pcre/pcre_internal.h deleted file mode 100644 index 0016bb5..0000000 --- a/src/3rdparty/javascriptcore/JavaScriptCore/pcre/pcre_internal.h +++ /dev/null @@ -1,455 +0,0 @@ -/* This is JavaScriptCore's variant of the PCRE library. While this library -started out as a copy of PCRE, many of the features of PCRE have been -removed. This library now supports only the regular expression features -required by the JavaScript language specification, and has only the functions -needed by JavaScriptCore and the rest of WebKit. - - Originally written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge - Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - -/* This header contains definitions that are shared between the different -modules, but which are not relevant to the exported API. This includes some -functions whose names all begin with "_pcre_". */ - -#ifndef PCRE_INTERNAL_H -#define PCRE_INTERNAL_H - -/* Bit definitions for entries in the pcre_ctypes table. */ - -#define ctype_space 0x01 -#define ctype_xdigit 0x08 -#define ctype_word 0x10 /* alphameric or '_' */ - -/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set -of bits for a class map. Some classes are built by combining these tables. */ - -#define cbit_space 0 /* \s */ -#define cbit_digit 32 /* \d */ -#define cbit_word 64 /* \w */ -#define cbit_length 96 /* Length of the cbits table */ - -/* Offsets of the various tables from the base tables pointer, and -total length. */ - -#define lcc_offset 0 -#define fcc_offset 128 -#define cbits_offset 256 -#define ctypes_offset (cbits_offset + cbit_length) -#define tables_length (ctypes_offset + 128) - -#ifndef DFTABLES - -// Change the following to 1 to dump used regular expressions at process exit time. -#define REGEXP_HISTOGRAM 0 - -#include "Assertions.h" - -#if COMPILER(MSVC) -#pragma warning(disable: 4232) -#pragma warning(disable: 4244) -#endif - -#include "pcre.h" - -/* The value of LINK_SIZE determines the number of bytes used to store links as -offsets within the compiled regex. The default is 2, which allows for compiled -patterns up to 64K long. */ - -#define LINK_SIZE 3 - -/* Define DEBUG to get debugging output on stdout. */ - -#if 0 -#define DEBUG -#endif - -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements, or at least there were when I first wrote this. After -all, it had only been about 10 years then... */ - -#ifdef DEBUG -#define DPRINTF(p) printf p -#else -#define DPRINTF(p) /*nothing*/ -#endif - -/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored -in big-endian order) by default. These are used, for example, to link from the -start of a subpattern to its alternatives and its end. The use of 2 bytes per -offset limits the size of the compiled regex to around 64K, which is big enough -for almost everybody. However, I received a request for an even bigger limit. -For this reason, and also to make the code easier to maintain, the storing and -loading of offsets from the byte string is now handled by the functions that are -defined here. */ - -/* PCRE uses some other 2-byte quantities that do not change when the size of -offsets changes. There are used for repeat counts and for other things such as -capturing parenthesis numbers in back references. */ - -static inline void put2ByteValue(unsigned char* opcodePtr, int value) -{ - ASSERT(value >= 0 && value <= 0xFFFF); - opcodePtr[0] = value >> 8; - opcodePtr[1] = value; -} - -static inline void put3ByteValue(unsigned char* opcodePtr, int value) -{ - ASSERT(value >= 0 && value <= 0xFFFFFF); - opcodePtr[0] = value >> 16; - opcodePtr[1] = value >> 8; - opcodePtr[2] = value; -} - -static inline int get2ByteValue(const unsigned char* opcodePtr) -{ - return (opcodePtr[0] << 8) | opcodePtr[1]; -} - -static inline int get3ByteValue(const unsigned char* opcodePtr) -{ - return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2]; -} - -static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - put2ByteValue(opcodePtr, value); - opcodePtr += 2; -} - -static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - put3ByteValue(opcodePtr, value); - opcodePtr += 3; -} - -static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value) -{ -#if LINK_SIZE == 3 - put3ByteValue(opcodePtr, value); -#elif LINK_SIZE == 2 - put2ByteValue(opcodePtr, value); -#else -# error LINK_SIZE not supported. -#endif -} - -static inline int getLinkValueAllowZero(const unsigned char* opcodePtr) -{ -#if LINK_SIZE == 3 - return get3ByteValue(opcodePtr); -#elif LINK_SIZE == 2 - return get2ByteValue(opcodePtr); -#else -# error LINK_SIZE not supported. -#endif -} - -#define MAX_PATTERN_SIZE 1024 * 1024 // Derived by empirical testing of compile time in PCRE and WREC. -COMPILE_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)), pcre_max_pattern_fits_in_bytecode); - -static inline void putLinkValue(unsigned char* opcodePtr, int value) -{ - ASSERT(value); - putLinkValueAllowZero(opcodePtr, value); -} - -static inline int getLinkValue(const unsigned char* opcodePtr) -{ - int value = getLinkValueAllowZero(opcodePtr); - ASSERT(value); - return value; -} - -static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value) -{ - putLinkValue(opcodePtr, value); - opcodePtr += LINK_SIZE; -} - -static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value) -{ - putLinkValueAllowZero(opcodePtr, value); - opcodePtr += LINK_SIZE; -} - -// FIXME: These are really more of a "compiled regexp state" than "regexp options" -enum RegExpOptions { - UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */ - UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */ - UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */ - IsAnchoredOption = 0x02000000, /* can't use partial with this regex */ - IgnoreCaseOption = 0x00000001, - MatchAcrossMultipleLinesOption = 0x00000002 -}; - -/* Flags added to firstByte or reqByte; a "non-literal" item is either a -variable-length repeat, or a anything other than literal characters. */ - -#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */ -#define REQ_VARY 0x0200 /* reqByte followed non-literal item */ - -/* Miscellaneous definitions */ - -/* Flag bits and data types for the extended class (OP_XCLASS) for classes that -contain UTF-8 characters with values greater than 255. */ - -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ - -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ - -/* These are escaped items that aren't just an encoding of a particular data -value such as \n. They must have non-zero values, as check_escape() returns -their negation. Also, they must appear in the same order as in the opcode -definitions below, up to ESC_w. The final one must be -ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two -tests in the code for an escape > ESC_b and <= ESC_w to -detect the types that may be repeated. These are the types that consume -characters. If any new escapes are put in between that don't consume a -character, that code will have to change. */ - -enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF }; - -/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets -that extract substrings. Starting from 1 (i.e. after OP_END), the values up to -OP_EOD must correspond in order to the list of escapes immediately above. -Note that whenever this list is updated, the two macro definitions that follow -must also be updated to match. */ - -#define FOR_EACH_OPCODE(macro) \ - macro(END) \ - \ - macro(NOT_WORD_BOUNDARY) \ - macro(WORD_BOUNDARY) \ - macro(NOT_DIGIT) \ - macro(DIGIT) \ - macro(NOT_WHITESPACE) \ - macro(WHITESPACE) \ - macro(NOT_WORDCHAR) \ - macro(WORDCHAR) \ - \ - macro(NOT_NEWLINE) \ - \ - macro(CIRC) \ - macro(DOLL) \ - macro(BOL) \ - macro(EOL) \ - macro(CHAR) \ - macro(CHAR_IGNORING_CASE) \ - macro(ASCII_CHAR) \ - macro(ASCII_LETTER_IGNORING_CASE) \ - macro(NOT) \ - \ - macro(STAR) \ - macro(MINSTAR) \ - macro(PLUS) \ - macro(MINPLUS) \ - macro(QUERY) \ - macro(MINQUERY) \ - macro(UPTO) \ - macro(MINUPTO) \ - macro(EXACT) \ - \ - macro(NOTSTAR) \ - macro(NOTMINSTAR) \ - macro(NOTPLUS) \ - macro(NOTMINPLUS) \ - macro(NOTQUERY) \ - macro(NOTMINQUERY) \ - macro(NOTUPTO) \ - macro(NOTMINUPTO) \ - macro(NOTEXACT) \ - \ - macro(TYPESTAR) \ - macro(TYPEMINSTAR) \ - macro(TYPEPLUS) \ - macro(TYPEMINPLUS) \ - macro(TYPEQUERY) \ - macro(TYPEMINQUERY) \ - macro(TYPEUPTO) \ - macro(TYPEMINUPTO) \ - macro(TYPEEXACT) \ - \ - macro(CRSTAR) \ - macro(CRMINSTAR) \ - macro(CRPLUS) \ - macro(CRMINPLUS) \ - macro(CRQUERY) \ - macro(CRMINQUERY) \ - macro(CRRANGE) \ - macro(CRMINRANGE) \ - \ - macro(CLASS) \ - macro(NCLASS) \ - macro(XCLASS) \ - \ - macro(REF) \ - \ - macro(ALT) \ - macro(KET) \ - macro(KETRMAX) \ - macro(KETRMIN) \ - \ - macro(ASSERT) \ - macro(ASSERT_NOT) \ - \ - macro(BRAZERO) \ - macro(BRAMINZERO) \ - macro(BRANUMBER) \ - macro(BRA) - -#define OPCODE_ENUM_VALUE(opcode) OP_##opcode, -enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) }; - -/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and -study.c that all opcodes are less than 128 in value. This makes handling UTF-8 -character sequences easier. */ - -/* The highest extraction number before we have to start using additional -bytes. (Originally PCRE didn't have support for extraction counts higher than -this number.) The value is limited by the number of opcodes left after OP_BRA, -i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional -opcodes. */ - -/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above -are in conflict! */ - -#define EXTRACT_BASIC_MAX 100 - -/* The code vector runs on as long as necessary after the end. */ - -struct JSRegExp { - unsigned options; - - unsigned short topBracket; - unsigned short topBackref; - - unsigned short firstByte; - unsigned short reqByte; - -#if REGEXP_HISTOGRAM - size_t stringOffset; - size_t stringLength; -#endif -}; - -/* Internal shared data tables. These are tables that are used by more than one - of the exported public functions. They have to be "external" in the C sense, - but are not part of the PCRE public API. The data for these tables is in the - pcre_tables.c module. */ - -#define jsc_pcre_utf8_table1_size 6 - -extern const int jsc_pcre_utf8_table1[6]; -extern const int jsc_pcre_utf8_table2[6]; -extern const int jsc_pcre_utf8_table3[6]; -extern const unsigned char jsc_pcre_utf8_table4[0x40]; - -extern const unsigned char jsc_pcre_default_tables[tables_length]; - -static inline unsigned char toLowerCase(unsigned char c) -{ - static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset; - return lowerCaseChars[c]; -} - -static inline unsigned char flipCase(unsigned char c) -{ - static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset; - return flippedCaseChars[c]; -} - -static inline unsigned char classBitmapForChar(unsigned char c) -{ - static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset; - return charClassBitmaps[c]; -} - -static inline unsigned char charTypeForChar(unsigned char c) -{ - const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset; - return charTypeMap[c]; -} - -static inline bool isWordChar(UChar c) -{ - return c < 128 && (charTypeForChar(c) & ctype_word); -} - -static inline bool isSpaceChar(UChar c) -{ - return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0; -} - -static inline bool isNewline(UChar nl) -{ - return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029); -} - -static inline bool isBracketStartOpcode(unsigned char opcode) -{ - if (opcode >= OP_BRA) - return true; - switch (opcode) { - case OP_ASSERT: - case OP_ASSERT_NOT: - return true; - default: - return false; - } -} - -static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr) -{ - ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT); - do - opcodePtr += getLinkValue(opcodePtr + 1); - while (*opcodePtr == OP_ALT); -} - -/* Internal shared functions. These are functions that are used in more -that one of the source files. They have to have external linkage, but -but are not part of the public API and so not exported from the library. */ - -extern int jsc_pcre_ucp_othercase(unsigned); -extern bool jsc_pcre_xclass(int, const unsigned char*); - -#endif - -#endif - -/* End of pcre_internal.h */ |