diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/parser/Lexer.h | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/JavaScriptCore/parser/Lexer.h')
-rw-r--r-- | Source/JavaScriptCore/parser/Lexer.h | 168 |
1 files changed, 89 insertions, 79 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.h b/Source/JavaScriptCore/parser/Lexer.h index 28c61ba19..7283fe7ca 100644 --- a/Source/JavaScriptCore/parser/Lexer.h +++ b/Source/JavaScriptCore/parser/Lexer.h @@ -20,59 +20,36 @@ * */ -#ifndef Lexer_h -#define Lexer_h +#pragma once #include "Lookup.h" #include "ParserArena.h" #include "ParserTokens.h" #include "SourceCode.h" #include <wtf/ASCIICType.h> -#include <wtf/SegmentedVector.h> #include <wtf/Vector.h> -#include <wtf/unicode/Unicode.h> namespace JSC { -class Keywords { -public: - bool isKeyword(const Identifier& ident) const - { - return m_keywordTable.entry(m_vm, ident); - } - - const HashEntry* getKeyword(const Identifier& ident) const - { - return m_keywordTable.entry(m_vm, ident); - } - - ~Keywords() - { - m_keywordTable.deleteTable(); - } - -private: - friend class VM; - - explicit Keywords(VM&); - - VM& m_vm; - const HashTable m_keywordTable; -}; - enum LexerFlags { LexerFlagsIgnoreReservedWords = 1, LexerFlagsDontBuildStrings = 2, LexexFlagsDontBuildKeywords = 4 }; +enum class LexerEscapeParseMode { Template, String }; + +struct ParsedUnicodeEscapeValue; + +bool isLexerKeyword(const Identifier&); + template <typename T> class Lexer { WTF_MAKE_NONCOPYABLE(Lexer); WTF_MAKE_FAST_ALLOCATED; public: - Lexer(VM*); + Lexer(VM*, JSParserBuiltinMode, JSParserScriptMode); ~Lexer(); // Character manipulation functions. @@ -83,8 +60,8 @@ public: // Functions to set up parsing. void setCode(const SourceCode&, ParserArena*); - void setIsReparsing() { m_isReparsing = true; } - bool isReparsing() const { return m_isReparsing; } + void setIsReparsingFunction() { m_isReparsingFunction = true; } + bool isReparsingFunction() const { return m_isReparsingFunction; } JSTokenType lex(JSToken*, unsigned, bool strictMode); bool nextTokenIsColon(); @@ -96,15 +73,21 @@ public: return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset()); } JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; } + JSTokenLocation lastTokenLocation() const { return m_lastTokenLocation; } void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; } int lastLineNumber() const { return m_lastLineNumber; } bool prevTerminator() const { return m_terminator; } - bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); - bool skipRegExp(); + JSTokenType scanRegExp(JSToken*, UChar patternPrefix = 0); + enum class RawStringsBuildMode { BuildRawStrings, DontBuildRawStrings }; + JSTokenType scanTemplateString(JSToken*, RawStringsBuildMode); // Functions for use after parsing. bool sawError() const { return m_error; } + void setSawError(bool sawError) { m_error = sawError; } String getErrorMessage() const { return m_lexErrorMessage; } + void setErrorMessage(const String& errorMessage) { m_lexErrorMessage = errorMessage; } + String sourceURL() const { return m_sourceURLDirective; } + String sourceMappingURL() const { return m_sourceMappingURLDirective; } void clear(); void setOffset(int offset, int lineStartOffset) { @@ -126,52 +109,34 @@ public: { m_lineNumber = line; } - - SourceProvider* sourceProvider() const { return m_source->provider(); } + void setTerminator(bool terminator) + { + m_terminator = terminator; + } JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode); + ALWAYS_INLINE StringView getToken(const JSToken& token) + { + SourceProvider* sourceProvider = m_source->provider(); + ASSERT_WITH_MESSAGE(token.m_location.startOffset <= token.m_location.endOffset, "Calling this function with the baked token."); + return sourceProvider->getRange(token.m_location.startOffset, token.m_location.endOffset); + } + private: void record8(int); void append8(const T*, size_t); void record16(int); void record16(T); + void recordUnicodeCodePoint(UChar32); void append16(const LChar*, size_t); void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); } ALWAYS_INLINE void shift(); ALWAYS_INLINE bool atEnd() const; ALWAYS_INLINE T peek(int offset) const; - struct UnicodeHexValue { - - enum ValueType { ValidHex, IncompleteHex, InvalidHex }; - - explicit UnicodeHexValue(int value) - : m_value(value) - { - } - explicit UnicodeHexValue(ValueType type) - : m_value(type == IncompleteHex ? -2 : -1) - { - } - - ValueType valueType() const - { - if (m_value >= 0) - return ValidHex; - return m_value == -2 ? IncompleteHex : InvalidHex; - } - bool isValid() const { return m_value >= 0; } - int value() const - { - ASSERT(m_value >= 0); - return m_value; - } - - private: - int m_value; - }; - UnicodeHexValue parseFourDigitUnicodeHex(); + + ParsedUnicodeEscapeValue parseUnicodeEscape(); void shiftLineTerminator(); ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; } @@ -181,7 +146,7 @@ private: ALWAYS_INLINE const T* currentSourcePtr() const; ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); } - ALWAYS_INLINE void setCodeStart(const StringImpl*); + ALWAYS_INLINE void setCodeStart(const StringView&); ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length); ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length); @@ -189,8 +154,11 @@ private: ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length); ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars); ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length); + ALWAYS_INLINE const Identifier* makeEmptyIdentifier(); ALWAYS_INLINE bool lastTokenWasRestrKeyword() const; + + ALWAYS_INLINE void skipWhitespace(); template <int shiftAmount> void internalShift(); template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*); @@ -203,13 +171,25 @@ private: }; template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode); template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode); + + template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE StringParseResult parseComplexEscape(bool strictMode, T stringQuoteCharacter); + ALWAYS_INLINE StringParseResult parseTemplateLiteral(JSTokenData*, RawStringsBuildMode); ALWAYS_INLINE void parseHex(double& returnValue); + ALWAYS_INLINE bool parseBinary(double& returnValue); ALWAYS_INLINE bool parseOctal(double& returnValue); ALWAYS_INLINE bool parseDecimal(double& returnValue); ALWAYS_INLINE void parseNumberAfterDecimalPoint(); ALWAYS_INLINE bool parseNumberAfterExponentIndicator(); ALWAYS_INLINE bool parseMultilineComment(); + ALWAYS_INLINE void parseCommentDirective(); + ALWAYS_INLINE String parseCommentDirectiveValue(); + + template <unsigned length> + ALWAYS_INLINE bool consume(const char (&input)[length]); + + void fillTokenInfo(JSToken*, JSTokenType, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition); + static const size_t initialReadBufferCapacity = 32; int m_lineNumber; @@ -217,6 +197,7 @@ private: Vector<LChar> m_buffer8; Vector<UChar> m_buffer16; + Vector<UChar> m_bufferForRawTemplateString16; bool m_terminator; int m_lastToken; @@ -228,16 +209,22 @@ private: const T* m_codeStartPlusOffset; const T* m_lineStart; JSTextPosition m_positionBeforeLastNewline; - bool m_isReparsing; + JSTokenLocation m_lastTokenLocation; + bool m_isReparsingFunction; bool m_atLineStart; bool m_error; String m_lexErrorMessage; + String m_sourceURLDirective; + String m_sourceMappingURLDirective; + T m_current; IdentifierArena* m_arena; VM* m_vm; + bool m_parsingBuiltinFunction; + JSParserScriptMode m_scriptMode; }; template <> @@ -249,7 +236,8 @@ ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch) template <> ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch) { - return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0xFEFF); + // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such. + return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0x180E || ch == 0xFEFF); } template <> @@ -303,18 +291,24 @@ ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UCh return &m_arena->makeIdentifier(m_vm, characters, length); } +template <typename T> +ALWAYS_INLINE const Identifier* Lexer<T>::makeEmptyIdentifier() +{ + return &m_arena->makeEmptyIdentifier(m_vm); +} + template <> -ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString) +ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringView& sourceString) { - ASSERT(sourceString->is8Bit()); - m_codeStart = sourceString->characters8(); + ASSERT(sourceString.is8Bit()); + m_codeStart = sourceString.characters8(); } template <> -ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString) +ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringView& sourceString) { - ASSERT(!sourceString->is8Bit()); - m_codeStart = sourceString->characters16(); + ASSERT(!sourceString.is8Bit()); + m_codeStart = sourceString.characters16(); } template <typename T> @@ -335,6 +329,12 @@ ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* chara return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length); } +#if ASSERT_DISABLED +ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; } +#else +bool isSafeBuiltinIdentifier(VM&, const Identifier*); +#endif + template <typename T> ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode) { @@ -370,10 +370,15 @@ ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, un ASSERT(currentOffset() >= currentLineStartOffset()); // Create the identifier if needed - if (lexerFlags & LexexFlagsDontBuildKeywords) + if (lexerFlags & LexexFlagsDontBuildKeywords +#if !ASSERT_DISABLED + && !m_parsingBuiltinFunction +#endif + ) tokenData->ident = 0; else tokenData->ident = makeLCharIdentifier(start, ptr - start); + tokenLocation->line = m_lineNumber; tokenLocation->lineStartOffset = currentLineStartOffset(); tokenLocation->startOffset = offsetFromSourcePtr(start); @@ -381,6 +386,13 @@ ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, un ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset); tokenRecord->m_startPosition = startPosition; tokenRecord->m_endPosition = currentPosition(); +#if !ASSERT_DISABLED + if (m_parsingBuiltinFunction) { + if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident)) + return ERRORTOK; + } +#endif + m_lastToken = IDENT; return IDENT; @@ -389,5 +401,3 @@ slowCase: } } // namespace JSC - -#endif // Lexer_h |