diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/yarr/YarrPattern.h | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrPattern.h')
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.h | 189 |
1 files changed, 133 insertions, 56 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h index d42b0f979..2db32d94b 100644 --- a/Source/JavaScriptCore/yarr/YarrPattern.h +++ b/Source/JavaScriptCore/yarr/YarrPattern.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2013 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2013-2014, 2016 Apple Inc. All rights reserved. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * * Redistribution and use in source and binary forms, with or without @@ -24,26 +24,22 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrPattern_h -#define YarrPattern_h +#pragma once +#include "RegExpKey.h" #include <wtf/CheckedArithmetic.h> -#include <wtf/OwnPtr.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/RefCounted.h> #include <wtf/Vector.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { struct PatternDisjunction; struct CharacterRange { - UChar begin; - UChar end; + UChar32 begin; + UChar32 end; - CharacterRange(UChar begin, UChar end) + CharacterRange(UChar32 begin, UChar32 end) : begin(begin) , end(end) { @@ -65,9 +61,9 @@ public: , m_tableInverted(inverted) { } - Vector<UChar> m_matches; + Vector<UChar32> m_matches; Vector<CharacterRange> m_ranges; - Vector<UChar> m_matchesUnicode; + Vector<UChar32> m_matchesUnicode; Vector<CharacterRange> m_rangesUnicode; const char* m_table; @@ -96,7 +92,7 @@ struct PatternTerm { bool m_capture :1; bool m_invert :1; union { - UChar patternCharacter; + UChar32 patternCharacter; CharacterClass* characterClass; unsigned backReferenceSubpatternId; struct { @@ -112,18 +108,19 @@ struct PatternTerm { } anchors; }; QuantifierType quantityType; - Checked<unsigned> quantityCount; - int inputPosition; + Checked<unsigned> quantityMinCount; + Checked<unsigned> quantityMaxCount; + unsigned inputPosition; unsigned frameLocation; - PatternTerm(UChar ch) + PatternTerm(UChar32 ch) : type(PatternTerm::TypePatternCharacter) , m_capture(false) , m_invert(false) { patternCharacter = ch; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(CharacterClass* charClass, bool invert) @@ -133,7 +130,7 @@ struct PatternTerm { { characterClass = charClass; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false) @@ -146,7 +143,7 @@ struct PatternTerm { parentheses.isCopy = false; parentheses.isTerminal = false; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(Type type, bool invert = false) @@ -155,7 +152,7 @@ struct PatternTerm { , m_invert(invert) { quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(unsigned spatternId) @@ -165,7 +162,7 @@ struct PatternTerm { { backReferenceSubpatternId = spatternId; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } PatternTerm(bool bolAnchor, bool eolAnchor) @@ -176,7 +173,7 @@ struct PatternTerm { anchors.bolAnchor = bolAnchor; anchors.eolAnchor = eolAnchor; quantityType = QuantifierFixedCount; - quantityCount = 1; + quantityMinCount = quantityMaxCount = 1; } static PatternTerm ForwardReference() @@ -211,7 +208,18 @@ struct PatternTerm { void quantify(unsigned count, QuantifierType type) { - quantityCount = count; + quantityMinCount = 0; + quantityMaxCount = count; + quantityType = type; + } + + void quantify(unsigned minCount, unsigned maxCount, QuantifierType type) + { + // Currently only Parentheses can specify a non-zero min with a different max. + ASSERT(this->type == TypeParenthesesSubpattern || !minCount || minCount == maxCount); + ASSERT(minCount <= maxCount); + quantityMinCount = minCount; + quantityMaxCount = maxCount; quantityType = type; } }; @@ -270,12 +278,11 @@ public: PatternAlternative* addNewAlternative() { - PatternAlternative* alternative = new PatternAlternative(this); - m_alternatives.append(adoptPtr(alternative)); - return alternative; + m_alternatives.append(std::make_unique<PatternAlternative>(this)); + return static_cast<PatternAlternative*>(m_alternatives.last().get()); } - Vector<OwnPtr<PatternAlternative>> m_alternatives; + Vector<std::unique_ptr<PatternAlternative>> m_alternatives; PatternAlternative* m_parent; unsigned m_minimumSize; unsigned m_callFrameSize; @@ -286,13 +293,15 @@ public: // (please to be calling newlineCharacterClass() et al on your // friendly neighborhood YarrPattern instance to get nicely // cached copies). -CharacterClass* newlineCreate(); -CharacterClass* digitsCreate(); -CharacterClass* spacesCreate(); -CharacterClass* wordcharCreate(); -CharacterClass* nondigitsCreate(); -CharacterClass* nonspacesCreate(); -CharacterClass* nonwordcharCreate(); +std::unique_ptr<CharacterClass> newlineCreate(); +std::unique_ptr<CharacterClass> digitsCreate(); +std::unique_ptr<CharacterClass> spacesCreate(); +std::unique_ptr<CharacterClass> wordcharCreate(); +std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate(); +std::unique_ptr<CharacterClass> nondigitsCreate(); +std::unique_ptr<CharacterClass> nonspacesCreate(); +std::unique_ptr<CharacterClass> nonwordcharCreate(); +std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate(); struct TermChain { TermChain(PatternTerm term) @@ -303,8 +312,31 @@ struct TermChain { Vector<TermChain> hotTerms; }; + struct YarrPattern { - JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error); + JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, const char** error, void* stackLimit = nullptr); + + enum ErrorCode { + NoError, + PatternTooLarge, + QuantifierOutOfOrder, + QuantifierWithoutAtom, + QuantifierTooLarge, + MissingParentheses, + ParenthesesUnmatched, + ParenthesesTypeInvalid, + CharacterClassUnmatched, + CharacterClassOutOfOrder, + EscapeUnterminated, + InvalidUnicodeEscape, + InvalidIdentityEscape, + TooManyDisjunctions, + OffsetTooLarge, + InvalidRegularExpressionFlags, + NumberOfErrorCodes + }; + + JS_EXPORT_PRIVATE static const char* errorMessage(ErrorCode); void reset() { @@ -313,14 +345,18 @@ struct YarrPattern { m_containsBackreferences = false; m_containsBOL = false; + m_containsUnsignedLengthPattern = false; + m_hasCopiedParenSubexpressions = false; newlineCached = 0; digitsCached = 0; spacesCached = 0; wordcharCached = 0; + wordUnicodeIgnoreCaseCharCached = 0; nondigitsCached = 0; nonspacesCached = 0; nonwordcharCached = 0; + nonwordUnicodeIgnoreCasecharCached = 0; m_disjunctions.clear(); m_userCharacterClasses.clear(); @@ -331,71 +367,112 @@ struct YarrPattern { return m_maxBackReference > m_numSubpatterns; } + bool containsUnsignedLengthPattern() + { + return m_containsUnsignedLengthPattern; + } + CharacterClass* newlineCharacterClass() { - if (!newlineCached) - m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate())); + if (!newlineCached) { + m_userCharacterClasses.append(newlineCreate()); + newlineCached = m_userCharacterClasses.last().get(); + } return newlineCached; } CharacterClass* digitsCharacterClass() { - if (!digitsCached) - m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate())); + if (!digitsCached) { + m_userCharacterClasses.append(digitsCreate()); + digitsCached = m_userCharacterClasses.last().get(); + } return digitsCached; } CharacterClass* spacesCharacterClass() { - if (!spacesCached) - m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate())); + if (!spacesCached) { + m_userCharacterClasses.append(spacesCreate()); + spacesCached = m_userCharacterClasses.last().get(); + } return spacesCached; } CharacterClass* wordcharCharacterClass() { - if (!wordcharCached) - m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate())); + if (!wordcharCached) { + m_userCharacterClasses.append(wordcharCreate()); + wordcharCached = m_userCharacterClasses.last().get(); + } return wordcharCached; } + CharacterClass* wordUnicodeIgnoreCaseCharCharacterClass() + { + if (!wordUnicodeIgnoreCaseCharCached) { + m_userCharacterClasses.append(wordUnicodeIgnoreCaseCharCreate()); + wordUnicodeIgnoreCaseCharCached = m_userCharacterClasses.last().get(); + } + return wordUnicodeIgnoreCaseCharCached; + } CharacterClass* nondigitsCharacterClass() { - if (!nondigitsCached) - m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate())); + if (!nondigitsCached) { + m_userCharacterClasses.append(nondigitsCreate()); + nondigitsCached = m_userCharacterClasses.last().get(); + } return nondigitsCached; } CharacterClass* nonspacesCharacterClass() { - if (!nonspacesCached) - m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate())); + if (!nonspacesCached) { + m_userCharacterClasses.append(nonspacesCreate()); + nonspacesCached = m_userCharacterClasses.last().get(); + } return nonspacesCached; } CharacterClass* nonwordcharCharacterClass() { - if (!nonwordcharCached) - m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate())); + if (!nonwordcharCached) { + m_userCharacterClasses.append(nonwordcharCreate()); + nonwordcharCached = m_userCharacterClasses.last().get(); + } return nonwordcharCached; } + CharacterClass* nonwordUnicodeIgnoreCaseCharCharacterClass() + { + if (!nonwordUnicodeIgnoreCasecharCached) { + m_userCharacterClasses.append(nonwordUnicodeIgnoreCaseCharCreate()); + nonwordUnicodeIgnoreCasecharCached = m_userCharacterClasses.last().get(); + } + return nonwordUnicodeIgnoreCasecharCached; + } + + bool ignoreCase() const { return m_flags & FlagIgnoreCase; } + bool multiline() const { return m_flags & FlagMultiline; } + bool sticky() const { return m_flags & FlagSticky; } + bool unicode() const { return m_flags & FlagUnicode; } - bool m_ignoreCase : 1; - bool m_multiline : 1; bool m_containsBackreferences : 1; bool m_containsBOL : 1; + bool m_containsUnsignedLengthPattern : 1; + bool m_hasCopiedParenSubexpressions : 1; + RegExpFlags m_flags; unsigned m_numSubpatterns; unsigned m_maxBackReference; PatternDisjunction* m_body; - Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions; - Vector<OwnPtr<CharacterClass>> m_userCharacterClasses; + Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; private: - const char* compile(const String& patternString); + const char* compile(const String& patternString, void* stackLimit); CharacterClass* newlineCached; CharacterClass* digitsCached; CharacterClass* spacesCached; CharacterClass* wordcharCached; + CharacterClass* wordUnicodeIgnoreCaseCharCached; CharacterClass* nondigitsCached; CharacterClass* nonspacesCached; CharacterClass* nonwordcharCached; + CharacterClass* nonwordUnicodeIgnoreCasecharCached; }; } } // namespace JSC::Yarr - -#endif // YarrPattern_h |