diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/runtime/RegExp.cpp | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/JavaScriptCore/runtime/RegExp.cpp')
-rw-r--r-- | Source/JavaScriptCore/runtime/RegExp.cpp | 297 |
1 files changed, 98 insertions, 199 deletions
diff --git a/Source/JavaScriptCore/runtime/RegExp.cpp b/Source/JavaScriptCore/runtime/RegExp.cpp index 30c105d29..a372360ca 100644 --- a/Source/JavaScriptCore/runtime/RegExp.cpp +++ b/Source/JavaScriptCore/runtime/RegExp.cpp @@ -1,6 +1,6 @@ /* * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) - * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. + * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved. * Copyright (C) 2009 Torch Mobile, Inc. * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged * @@ -24,23 +24,16 @@ #include "RegExp.h" #include "Lexer.h" -#include "Operations.h" +#include "JSCInlines.h" #include "RegExpCache.h" +#include "RegExpInlines.h" #include "Yarr.h" #include "YarrJIT.h" #include <wtf/Assertions.h> -#define REGEXP_FUNC_TEST_DATA_GEN 0 - -#if REGEXP_FUNC_TEST_DATA_GEN -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#endif - namespace JSC { -const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) }; +const ClassInfo RegExp::s_info = { "RegExp", 0, 0, CREATE_METHOD_TABLE(RegExp) }; RegExpFlags regExpFlags(const String& string) { @@ -66,6 +59,18 @@ RegExpFlags regExpFlags(const String& string) flags = static_cast<RegExpFlags>(flags | FlagMultiline); break; + case 'u': + if (flags & FlagUnicode) + return InvalidFlags; + flags = static_cast<RegExpFlags>(flags | FlagUnicode); + break; + + case 'y': + if (flags & FlagSticky) + return InvalidFlags; + flags = static_cast<RegExpFlags>(flags | FlagSticky); + break; + default: return InvalidFlags; } @@ -75,33 +80,6 @@ RegExpFlags regExpFlags(const String& string) } #if REGEXP_FUNC_TEST_DATA_GEN -class RegExpFunctionalTestCollector { - // This class is not thread safe. -protected: - static const char* const s_fileName; - -public: - static RegExpFunctionalTestCollector* get(); - - ~RegExpFunctionalTestCollector(); - - void outputOneTest(RegExp*, String, int, int*, int); - void clearRegExp(RegExp* regExp) - { - if (regExp == m_lastRegExp) - m_lastRegExp = 0; - } - -private: - RegExpFunctionalTestCollector(); - - void outputEscapedString(const String&, bool escapeSlash = false); - - static RegExpFunctionalTestCollector* s_instance; - FILE* m_file; - RegExp* m_lastRegExp; -}; - const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData"; RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0; @@ -113,7 +91,7 @@ RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get() return s_instance; } -void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, String s, int startOffset, int* ovector, int result) +void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result) { if ((!m_lastRegExp) || (m_lastRegExp != regExp)) { m_lastRegExp = regExp; @@ -126,6 +104,10 @@ void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, String s, int fputc('i', m_file); if (regExp->multiline()) fputc('m', m_file); + if (regExp->sticky()) + fputc('y', m_file); + if (regExp->unicode()) + fputc('u', m_file); fprintf(m_file, "\n"); } @@ -227,6 +209,10 @@ RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) , m_constructionError(0) , m_numSubpatterns(0) #if ENABLE(REGEXP_TRACING) + , m_rtMatchOnlyTotalSubjectStringLen(0.0) + , m_rtMatchTotalSubjectStringLen(0.0) + , m_rtMatchOnlyCallCount(0) + , m_rtMatchOnlyFoundCount(0) , m_rtMatchCallCount(0) , m_rtMatchFoundCount(0) #endif @@ -236,8 +222,8 @@ RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) void RegExp::finishCreation(VM& vm) { Base::finishCreation(vm); - Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); - if (m_constructionError) + Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm.stackLimit()); + if (!isValid()) m_state = ParseError; else m_numSubpatterns = pattern.m_numSubpatterns; @@ -252,6 +238,16 @@ void RegExp::destroy(JSCell* cell) thisObject->RegExp::~RegExp(); } +size_t RegExp::estimatedSize(JSCell* cell) +{ + RegExp* thisObject = static_cast<RegExp*>(cell); + size_t regexDataSize = thisObject->m_regExpBytecode ? thisObject->m_regExpBytecode->estimatedSizeInBytes() : 0; +#if ENABLE(YARR_JIT) + regexDataSize += thisObject->m_regExpJITCode.size(); +#endif + return Base::estimatedSize(cell) + regexDataSize; +} + RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags) { RegExp* regExp = new (NotNull, allocateCell<RegExp>(vm.heap)) RegExp(vm, patternString, flags); @@ -266,11 +262,15 @@ RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags) void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) { - Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); + ConcurrentJSLocker locker(m_lock); + + Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit()); if (m_constructionError) { RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) m_state = ParseError; return; +#endif } ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); @@ -281,118 +281,49 @@ void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) } #if ENABLE(YARR_JIT) - if (!pattern.m_containsBackreferences && vm->canUseRegExpJIT()) { + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) { Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); -#if ENABLE(YARR_JIT_DEBUG) - if (!m_regExpJITCode.isFallBack()) - m_state = JITCode; - else - m_state = ByteCode; -#else if (!m_regExpJITCode.isFallBack()) { m_state = JITCode; return; } -#endif } #else UNUSED_PARAM(charSize); #endif - m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); + m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock); } -void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize) +int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ovector) { - if (hasCode()) { -#if ENABLE(YARR_JIT) - if (m_state != JITCode) - return; - if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode())) - return; - if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode())) - return; -#else - return; -#endif - } - - compile(&vm, charSize); + return matchInline(vm, s, startOffset, ovector); } -int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int, 32>& ovector) +bool RegExp::matchConcurrently( + VM& vm, const String& s, unsigned startOffset, int& position, Vector<int>& ovector) { -#if ENABLE(REGEXP_TRACING) - m_rtMatchCallCount++; -#endif - - ASSERT(m_state != ParseError); - compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); + ConcurrentJSLocker locker(m_lock); - int offsetVectorSize = (m_numSubpatterns + 1) * 2; - ovector.resize(offsetVectorSize); - int* offsetVector = ovector.data(); + if (!hasCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16)) + return false; - int result; -#if ENABLE(YARR_JIT) - if (m_state == JITCode) { - if (s.is8Bit()) - result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start; - else - result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start; -#if ENABLE(YARR_JIT_DEBUG) - matchCompareWithInterpreter(s, startOffset, offsetVector, result); -#endif - } else -#endif - result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector)); - - // FIXME: The YARR engine should handle unsigned or size_t length matches. - // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed. - // The offset vector handling needs to change as well. - // Right now we convert a match where the offsets overflowed into match failure. - // There are two places in WebCore that call the interpreter directly that need to - // have their offsets changed to int as well. They are yarr/RegularExpression.cpp - // and inspector/ContentSearchUtilities.cpp - if (s.length() > INT_MAX) { - bool overflowed = false; - - if (result < -1) - overflowed = true; - - for (unsigned i = 0; i <= m_numSubpatterns; i++) { - if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) { - overflowed = true; - offsetVector[i*2] = -1; - offsetVector[i*2+1] = -1; - } - } - - if (overflowed) - result = -1; - } - - ASSERT(result >= -1); - -#if REGEXP_FUNC_TEST_DATA_GEN - RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); -#endif - -#if ENABLE(REGEXP_TRACING) - if (result != -1) - m_rtMatchFoundCount++; -#endif - - return result; + position = match(vm, s, startOffset, ovector); + return true; } void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) { - Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError); + ConcurrentJSLocker locker(m_lock); + + Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit()); if (m_constructionError) { RELEASE_ASSERT_NOT_REACHED(); +#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) m_state = ParseError; return; +#endif } ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); @@ -403,103 +334,55 @@ void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) } #if ENABLE(YARR_JIT) - if (!pattern.m_containsBackreferences && vm->canUseRegExpJIT()) { + if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) { Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); -#if ENABLE(YARR_JIT_DEBUG) - if (!m_regExpJITCode.isFallBack()) - m_state = JITCode; - else - m_state = ByteCode; -#else if (!m_regExpJITCode.isFallBack()) { m_state = JITCode; return; } -#endif } #else UNUSED_PARAM(charSize); #endif - m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator); + m_state = ByteCode; + m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock); } -void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize) +MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) { - if (hasCode()) { -#if ENABLE(YARR_JIT) - if (m_state != JITCode) - return; - if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly())) - return; - if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly())) - return; -#else - return; -#endif - } - - compileMatchOnly(&vm, charSize); + return matchInline(vm, s, startOffset); } -MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) +bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, MatchResult& result) { -#if ENABLE(REGEXP_TRACING) - m_rtMatchCallCount++; -#endif + ConcurrentJSLocker locker(m_lock); - ASSERT(m_state != ParseError); - compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16); - -#if ENABLE(YARR_JIT) - if (m_state == JITCode) { - MatchResult result = s.is8Bit() ? - m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) : - m_regExpJITCode.execute(s.characters16(), startOffset, s.length()); -#if ENABLE(REGEXP_TRACING) - if (!result) - m_rtMatchFoundCount++; -#endif - return result; - } -#endif + if (!hasMatchOnlyCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16)) + return false; - int offsetVectorSize = (m_numSubpatterns + 1) * 2; - int* offsetVector; - Vector<int, 32> nonReturnedOvector; - nonReturnedOvector.resize(offsetVectorSize); - offsetVector = nonReturnedOvector.data(); - int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector)); -#if REGEXP_FUNC_TEST_DATA_GEN - RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result); -#endif - - if (r >= 0) { -#if ENABLE(REGEXP_TRACING) - m_rtMatchFoundCount++; -#endif - return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]); - } - - return MatchResult::failed(); + result = match(vm, s, startOffset); + return true; } -void RegExp::invalidateCode() +void RegExp::deleteCode() { + ConcurrentJSLocker locker(m_lock); + if (!hasCode()) return; m_state = NotCompiled; #if ENABLE(YARR_JIT) m_regExpJITCode.clear(); #endif - m_regExpBytecode.clear(); + m_regExpBytecode = nullptr; } #if ENABLE(YARR_JIT_DEBUG) void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult) { int offsetVectorSize = (m_numSubpatterns + 1) * 2; - Vector<int, 32> interpreterOvector; + Vector<int> interpreterOvector; interpreterOvector.resize(offsetVectorSize); int* interpreterOffsetVector = interpreterOvector.data(); int interpreterResult = 0; @@ -563,16 +446,32 @@ void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode; const size_t jitAddrSize = 20; - char jitAddr[jitAddrSize]; - if (m_state == JITCode) - snprintf(jitAddr, jitAddrSize, "fallback"); - else - snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr())); + char jit8BitMatchOnlyAddr[jitAddrSize]; + char jit16BitMatchOnlyAddr[jitAddrSize]; + char jit8BitMatchAddr[jitAddrSize]; + char jit16BitMatchAddr[jitAddrSize]; + if (m_state == ByteCode) { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- "); + snprintf(jit8BitMatchAddr, jitAddrSize, "fallback "); + snprintf(jit16BitMatchAddr, jitAddrSize, "---- "); + } else { + snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchOnlyAddr())); + snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchOnlyAddr())); + snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchAddr())); + snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchAddr())); + } #else - const char* jitAddr = "JIT Off"; + const char* jit8BitMatchOnlyAddr = "JIT Off"; + const char* jit16BitMatchOnlyAddr = ""; + const char* jit8BitMatchAddr = "JIT Off"; + const char* jit16BitMatchAddr = ""; #endif + unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount); + unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount); - printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount); + printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen); + printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen); } #endif |