summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/runtime/RegExp.cpp
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/runtime/RegExp.cpp
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/JavaScriptCore/runtime/RegExp.cpp')
-rw-r--r--Source/JavaScriptCore/runtime/RegExp.cpp297
1 files changed, 98 insertions, 199 deletions
diff --git a/Source/JavaScriptCore/runtime/RegExp.cpp b/Source/JavaScriptCore/runtime/RegExp.cpp
index 30c105d29..a372360ca 100644
--- a/Source/JavaScriptCore/runtime/RegExp.cpp
+++ b/Source/JavaScriptCore/runtime/RegExp.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
- * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007, 2008, 2016 Apple Inc. All rights reserved.
* Copyright (C) 2009 Torch Mobile, Inc.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
@@ -24,23 +24,16 @@
#include "RegExp.h"
#include "Lexer.h"
-#include "Operations.h"
+#include "JSCInlines.h"
#include "RegExpCache.h"
+#include "RegExpInlines.h"
#include "Yarr.h"
#include "YarrJIT.h"
#include <wtf/Assertions.h>
-#define REGEXP_FUNC_TEST_DATA_GEN 0
-
-#if REGEXP_FUNC_TEST_DATA_GEN
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#endif
-
namespace JSC {
-const ClassInfo RegExp::s_info = { "RegExp", 0, 0, 0, CREATE_METHOD_TABLE(RegExp) };
+const ClassInfo RegExp::s_info = { "RegExp", 0, 0, CREATE_METHOD_TABLE(RegExp) };
RegExpFlags regExpFlags(const String& string)
{
@@ -66,6 +59,18 @@ RegExpFlags regExpFlags(const String& string)
flags = static_cast<RegExpFlags>(flags | FlagMultiline);
break;
+ case 'u':
+ if (flags & FlagUnicode)
+ return InvalidFlags;
+ flags = static_cast<RegExpFlags>(flags | FlagUnicode);
+ break;
+
+ case 'y':
+ if (flags & FlagSticky)
+ return InvalidFlags;
+ flags = static_cast<RegExpFlags>(flags | FlagSticky);
+ break;
+
default:
return InvalidFlags;
}
@@ -75,33 +80,6 @@ RegExpFlags regExpFlags(const String& string)
}
#if REGEXP_FUNC_TEST_DATA_GEN
-class RegExpFunctionalTestCollector {
- // This class is not thread safe.
-protected:
- static const char* const s_fileName;
-
-public:
- static RegExpFunctionalTestCollector* get();
-
- ~RegExpFunctionalTestCollector();
-
- void outputOneTest(RegExp*, String, int, int*, int);
- void clearRegExp(RegExp* regExp)
- {
- if (regExp == m_lastRegExp)
- m_lastRegExp = 0;
- }
-
-private:
- RegExpFunctionalTestCollector();
-
- void outputEscapedString(const String&, bool escapeSlash = false);
-
- static RegExpFunctionalTestCollector* s_instance;
- FILE* m_file;
- RegExp* m_lastRegExp;
-};
-
const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData";
RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0;
@@ -113,7 +91,7 @@ RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get()
return s_instance;
}
-void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, String s, int startOffset, int* ovector, int result)
+void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result)
{
if ((!m_lastRegExp) || (m_lastRegExp != regExp)) {
m_lastRegExp = regExp;
@@ -126,6 +104,10 @@ void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, String s, int
fputc('i', m_file);
if (regExp->multiline())
fputc('m', m_file);
+ if (regExp->sticky())
+ fputc('y', m_file);
+ if (regExp->unicode())
+ fputc('u', m_file);
fprintf(m_file, "\n");
}
@@ -227,6 +209,10 @@ RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags)
, m_constructionError(0)
, m_numSubpatterns(0)
#if ENABLE(REGEXP_TRACING)
+ , m_rtMatchOnlyTotalSubjectStringLen(0.0)
+ , m_rtMatchTotalSubjectStringLen(0.0)
+ , m_rtMatchOnlyCallCount(0)
+ , m_rtMatchOnlyFoundCount(0)
, m_rtMatchCallCount(0)
, m_rtMatchFoundCount(0)
#endif
@@ -236,8 +222,8 @@ RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags)
void RegExp::finishCreation(VM& vm)
{
Base::finishCreation(vm);
- Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
- if (m_constructionError)
+ Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm.stackLimit());
+ if (!isValid())
m_state = ParseError;
else
m_numSubpatterns = pattern.m_numSubpatterns;
@@ -252,6 +238,16 @@ void RegExp::destroy(JSCell* cell)
thisObject->RegExp::~RegExp();
}
+size_t RegExp::estimatedSize(JSCell* cell)
+{
+ RegExp* thisObject = static_cast<RegExp*>(cell);
+ size_t regexDataSize = thisObject->m_regExpBytecode ? thisObject->m_regExpBytecode->estimatedSizeInBytes() : 0;
+#if ENABLE(YARR_JIT)
+ regexDataSize += thisObject->m_regExpJITCode.size();
+#endif
+ return Base::estimatedSize(cell) + regexDataSize;
+}
+
RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags)
{
RegExp* regExp = new (NotNull, allocateCell<RegExp>(vm.heap)) RegExp(vm, patternString, flags);
@@ -266,11 +262,15 @@ RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags)
void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
{
- Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ ConcurrentJSLocker locker(m_lock);
+
+ Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit());
if (m_constructionError) {
RELEASE_ASSERT_NOT_REACHED();
+#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE)
m_state = ParseError;
return;
+#endif
}
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
@@ -281,118 +281,49 @@ void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
}
#if ENABLE(YARR_JIT)
- if (!pattern.m_containsBackreferences && vm->canUseRegExpJIT()) {
+ if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) {
Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode);
-#if ENABLE(YARR_JIT_DEBUG)
- if (!m_regExpJITCode.isFallBack())
- m_state = JITCode;
- else
- m_state = ByteCode;
-#else
if (!m_regExpJITCode.isFallBack()) {
m_state = JITCode;
return;
}
-#endif
}
#else
UNUSED_PARAM(charSize);
#endif
- m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator);
+ m_state = ByteCode;
+ m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
}
-void RegExp::compileIfNecessary(VM& vm, Yarr::YarrCharSize charSize)
+int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ovector)
{
- if (hasCode()) {
-#if ENABLE(YARR_JIT)
- if (m_state != JITCode)
- return;
- if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCode()))
- return;
- if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCode()))
- return;
-#else
- return;
-#endif
- }
-
- compile(&vm, charSize);
+ return matchInline(vm, s, startOffset, ovector);
}
-int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int, 32>& ovector)
+bool RegExp::matchConcurrently(
+ VM& vm, const String& s, unsigned startOffset, int& position, Vector<int>& ovector)
{
-#if ENABLE(REGEXP_TRACING)
- m_rtMatchCallCount++;
-#endif
-
- ASSERT(m_state != ParseError);
- compileIfNecessary(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
+ ConcurrentJSLocker locker(m_lock);
- int offsetVectorSize = (m_numSubpatterns + 1) * 2;
- ovector.resize(offsetVectorSize);
- int* offsetVector = ovector.data();
+ if (!hasCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
+ return false;
- int result;
-#if ENABLE(YARR_JIT)
- if (m_state == JITCode) {
- if (s.is8Bit())
- result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start;
- else
- result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start;
-#if ENABLE(YARR_JIT_DEBUG)
- matchCompareWithInterpreter(s, startOffset, offsetVector, result);
-#endif
- } else
-#endif
- result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
-
- // FIXME: The YARR engine should handle unsigned or size_t length matches.
- // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
- // The offset vector handling needs to change as well.
- // Right now we convert a match where the offsets overflowed into match failure.
- // There are two places in WebCore that call the interpreter directly that need to
- // have their offsets changed to int as well. They are yarr/RegularExpression.cpp
- // and inspector/ContentSearchUtilities.cpp
- if (s.length() > INT_MAX) {
- bool overflowed = false;
-
- if (result < -1)
- overflowed = true;
-
- for (unsigned i = 0; i <= m_numSubpatterns; i++) {
- if ((offsetVector[i*2] < -1) || ((offsetVector[i*2] >= 0) && (offsetVector[i*2+1] < -1))) {
- overflowed = true;
- offsetVector[i*2] = -1;
- offsetVector[i*2+1] = -1;
- }
- }
-
- if (overflowed)
- result = -1;
- }
-
- ASSERT(result >= -1);
-
-#if REGEXP_FUNC_TEST_DATA_GEN
- RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
-#endif
-
-#if ENABLE(REGEXP_TRACING)
- if (result != -1)
- m_rtMatchFoundCount++;
-#endif
-
- return result;
+ position = match(vm, s, startOffset, ovector);
+ return true;
}
void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize)
{
- Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
+ ConcurrentJSLocker locker(m_lock);
+
+ Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit());
if (m_constructionError) {
RELEASE_ASSERT_NOT_REACHED();
+#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE)
m_state = ParseError;
return;
+#endif
}
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
@@ -403,103 +334,55 @@ void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize)
}
#if ENABLE(YARR_JIT)
- if (!pattern.m_containsBackreferences && vm->canUseRegExpJIT()) {
+ if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) {
Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly);
-#if ENABLE(YARR_JIT_DEBUG)
- if (!m_regExpJITCode.isFallBack())
- m_state = JITCode;
- else
- m_state = ByteCode;
-#else
if (!m_regExpJITCode.isFallBack()) {
m_state = JITCode;
return;
}
-#endif
}
#else
UNUSED_PARAM(charSize);
#endif
- m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator);
+ m_state = ByteCode;
+ m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
}
-void RegExp::compileIfNecessaryMatchOnly(VM& vm, Yarr::YarrCharSize charSize)
+MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset)
{
- if (hasCode()) {
-#if ENABLE(YARR_JIT)
- if (m_state != JITCode)
- return;
- if ((charSize == Yarr::Char8) && (m_regExpJITCode.has8BitCodeMatchOnly()))
- return;
- if ((charSize == Yarr::Char16) && (m_regExpJITCode.has16BitCodeMatchOnly()))
- return;
-#else
- return;
-#endif
- }
-
- compileMatchOnly(&vm, charSize);
+ return matchInline(vm, s, startOffset);
}
-MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset)
+bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, MatchResult& result)
{
-#if ENABLE(REGEXP_TRACING)
- m_rtMatchCallCount++;
-#endif
+ ConcurrentJSLocker locker(m_lock);
- ASSERT(m_state != ParseError);
- compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
-
-#if ENABLE(YARR_JIT)
- if (m_state == JITCode) {
- MatchResult result = s.is8Bit() ?
- m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) :
- m_regExpJITCode.execute(s.characters16(), startOffset, s.length());
-#if ENABLE(REGEXP_TRACING)
- if (!result)
- m_rtMatchFoundCount++;
-#endif
- return result;
- }
-#endif
+ if (!hasMatchOnlyCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16))
+ return false;
- int offsetVectorSize = (m_numSubpatterns + 1) * 2;
- int* offsetVector;
- Vector<int, 32> nonReturnedOvector;
- nonReturnedOvector.resize(offsetVectorSize);
- offsetVector = nonReturnedOvector.data();
- int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
-#if REGEXP_FUNC_TEST_DATA_GEN
- RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
-#endif
-
- if (r >= 0) {
-#if ENABLE(REGEXP_TRACING)
- m_rtMatchFoundCount++;
-#endif
- return MatchResult(r, reinterpret_cast<unsigned*>(offsetVector)[1]);
- }
-
- return MatchResult::failed();
+ result = match(vm, s, startOffset);
+ return true;
}
-void RegExp::invalidateCode()
+void RegExp::deleteCode()
{
+ ConcurrentJSLocker locker(m_lock);
+
if (!hasCode())
return;
m_state = NotCompiled;
#if ENABLE(YARR_JIT)
m_regExpJITCode.clear();
#endif
- m_regExpBytecode.clear();
+ m_regExpBytecode = nullptr;
}
#if ENABLE(YARR_JIT_DEBUG)
void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult)
{
int offsetVectorSize = (m_numSubpatterns + 1) * 2;
- Vector<int, 32> interpreterOvector;
+ Vector<int> interpreterOvector;
interpreterOvector.resize(offsetVectorSize);
int* interpreterOffsetVector = interpreterOvector.data();
int interpreterResult = 0;
@@ -563,16 +446,32 @@ void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int*
Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode;
const size_t jitAddrSize = 20;
- char jitAddr[jitAddrSize];
- if (m_state == JITCode)
- snprintf(jitAddr, jitAddrSize, "fallback");
- else
- snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
+ char jit8BitMatchOnlyAddr[jitAddrSize];
+ char jit16BitMatchOnlyAddr[jitAddrSize];
+ char jit8BitMatchAddr[jitAddrSize];
+ char jit16BitMatchAddr[jitAddrSize];
+ if (m_state == ByteCode) {
+ snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback ");
+ snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- ");
+ snprintf(jit8BitMatchAddr, jitAddrSize, "fallback ");
+ snprintf(jit16BitMatchAddr, jitAddrSize, "---- ");
+ } else {
+ snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchOnlyAddr()));
+ snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchOnlyAddr()));
+ snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchAddr()));
+ snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchAddr()));
+ }
#else
- const char* jitAddr = "JIT Off";
+ const char* jit8BitMatchOnlyAddr = "JIT Off";
+ const char* jit16BitMatchOnlyAddr = "";
+ const char* jit8BitMatchAddr = "JIT Off";
+ const char* jit16BitMatchAddr = "";
#endif
+ unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount);
+ unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount);
- printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
+ printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen);
+ printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen);
}
#endif