diff options
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrInterpreter.h')
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.h | 97 |
1 files changed, 56 insertions, 41 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h index f37309436..43dcb1f40 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.h +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2009, 2010-2012, 2014, 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,12 +23,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef YarrInterpreter_h -#define YarrInterpreter_h +#pragma once +#include "ConcurrentJSLock.h" #include "YarrPattern.h" -#include <wtf/PassOwnPtr.h> -#include <wtf/unicode/Unicode.h> namespace WTF { class BumpPointerAllocator; @@ -76,10 +74,10 @@ struct ByteTerm { union { struct { union { - UChar patternCharacter; + UChar32 patternCharacter; struct { - UChar lo; - UChar hi; + UChar32 lo; + UChar32 hi; } casedCharacter; CharacterClass* characterClass; unsigned subpatternId; @@ -89,7 +87,8 @@ struct ByteTerm { unsigned parenthesesWidth; }; QuantifierType quantityType; - unsigned quantityCount; + unsigned quantityMinCount; + unsigned quantityMaxCount; } atom; struct { int next; @@ -107,11 +106,17 @@ struct ByteTerm { bool m_invert : 1; unsigned inputPosition; - ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + ByteTerm(UChar32 ch, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) , m_capture(false) , m_invert(false) { + atom.patternCharacter = ch; + atom.quantityType = quantityType; + atom.quantityMinCount = quantityCount.unsafeGet(); + atom.quantityMaxCount = quantityCount.unsafeGet(); + inputPosition = inputPos; + switch (quantityType) { case QuantifierFixedCount: type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; @@ -123,14 +128,9 @@ struct ByteTerm { type = ByteTerm::TypePatternCharacterNonGreedy; break; } - - atom.patternCharacter = ch; - atom.quantityType = quantityType; - atom.quantityCount = quantityCount.unsafeGet(); - inputPosition = inputPos; } - ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) + ByteTerm(UChar32 lo, UChar32 hi, unsigned inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) : frameLocation(frameLocation) , m_capture(false) , m_invert(false) @@ -150,22 +150,24 @@ struct ByteTerm { atom.casedCharacter.lo = lo; atom.casedCharacter.hi = hi; atom.quantityType = quantityType; - atom.quantityCount = quantityCount.unsafeGet(); + atom.quantityMinCount = quantityCount.unsafeGet(); + atom.quantityMaxCount = quantityCount.unsafeGet(); inputPosition = inputPos; } - ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) + ByteTerm(CharacterClass* characterClass, bool invert, unsigned inputPos) : type(ByteTerm::TypeCharacterClass) , m_capture(false) , m_invert(invert) { atom.characterClass = characterClass; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } - ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) + ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, unsigned inputPos) : type(type) , m_capture(capture) , m_invert(false) @@ -173,7 +175,8 @@ struct ByteTerm { atom.subpatternId = subpatternId; atom.parenthesesDisjunction = parenthesesInfo; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } @@ -183,21 +186,23 @@ struct ByteTerm { , m_invert(invert) { atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; } - ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) + ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, unsigned inputPos) : type(type) , m_capture(capture) , m_invert(invert) { atom.subpatternId = subpatternId; atom.quantityType = QuantifierFixedCount; - atom.quantityCount = 1; + atom.quantityMinCount = 1; + atom.quantityMaxCount = 1; inputPosition = inputPos; } - static ByteTerm BOL(int inputPos) + static ByteTerm BOL(unsigned inputPos) { ByteTerm term(TypeAssertionBOL); term.inputPosition = inputPos; @@ -218,21 +223,21 @@ struct ByteTerm { return term; } - static ByteTerm EOL(int inputPos) + static ByteTerm EOL(unsigned inputPos) { ByteTerm term(TypeAssertionEOL); term.inputPosition = inputPos; return term; } - static ByteTerm WordBoundary(bool invert, int inputPos) + static ByteTerm WordBoundary(bool invert, unsigned inputPos) { ByteTerm term(TypeAssertionWordBoundary, invert); term.inputPosition = inputPos; return term; } - static ByteTerm BackReference(unsigned subpatternId, int inputPos) + static ByteTerm BackReference(unsigned subpatternId, unsigned inputPos) { return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); } @@ -329,6 +334,8 @@ public: { } + size_t estimatedSizeInBytes() const { return terms.capacity() * sizeof(ByteTerm); } + Vector<ByteTerm> terms; unsigned m_numSubpatterns; unsigned m_frameSize; @@ -337,16 +344,19 @@ public: struct BytecodePattern { WTF_MAKE_FAST_ALLOCATED; public: - BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) - : m_body(body) - , m_ignoreCase(pattern.m_ignoreCase) - , m_multiline(pattern.m_multiline) + BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock) + : m_body(WTFMove(body)) + , m_flags(pattern.m_flags) , m_allocator(allocator) + , m_lock(lock) { m_body->terms.shrinkToFit(); newlineCharacterClass = pattern.newlineCharacterClass(); - wordcharCharacterClass = pattern.wordcharCharacterClass(); + if (unicode() && ignoreCase()) + wordcharCharacterClass = pattern.wordUnicodeIgnoreCaseCharCharacterClass(); + else + wordcharCharacterClass = pattern.wordcharCharacterClass(); m_allParenthesesInfo.swap(parenthesesInfoToAdopt); m_allParenthesesInfo.shrinkToFit(); @@ -355,26 +365,31 @@ public: m_userCharacterClasses.shrinkToFit(); } - OwnPtr<ByteDisjunction> m_body; - bool m_ignoreCase; - bool m_multiline; + size_t estimatedSizeInBytes() const { return m_body->estimatedSizeInBytes(); } + + bool ignoreCase() const { return m_flags & FlagIgnoreCase; } + bool multiline() const { return m_flags & FlagMultiline; } + bool sticky() const { return m_flags & FlagSticky; } + bool unicode() const { return m_flags & FlagUnicode; } + + std::unique_ptr<ByteDisjunction> m_body; + RegExpFlags m_flags; // Each BytecodePattern is associated with a RegExp, each RegExp is associated // with a VM. Cache a pointer to out VM's m_regExpAllocator. BumpPointerAllocator* m_allocator; + ConcurrentJSLock* m_lock; CharacterClass* newlineCharacterClass; CharacterClass* wordcharCharacterClass; private: - Vector<OwnPtr<ByteDisjunction>> m_allParenthesesInfo; - Vector<OwnPtr<CharacterClass>> m_userCharacterClasses; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; }; -JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); +JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*, ConcurrentJSLock* = nullptr); JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); } } // namespace JSC::Yarr - -#endif // YarrInterpreter_h |