1 files changed, 89 insertions, 79 deletions
diff --git a/Source/JavaScriptCore/parser/Lexer.h b/Source/JavaScriptCore/parser/Lexer.h
index 28c61ba19..7283fe7ca 100644
--- a/Source/JavaScriptCore/parser/Lexer.h
+++ b/Source/JavaScriptCore/parser/Lexer.h
@@ -20,59 +20,36 @@
  *
  */
 
-#ifndef Lexer_h
-#define Lexer_h
+#pragma once
 
 #include "Lookup.h"
 #include "ParserArena.h"
 #include "ParserTokens.h"
 #include "SourceCode.h"
 #include <wtf/ASCIICType.h>
-#include <wtf/SegmentedVector.h>
 #include <wtf/Vector.h>
-#include <wtf/unicode/Unicode.h>
 
 namespace JSC {
 
-class Keywords {
-public:
-    bool isKeyword(const Identifier& ident) const
-    {
-        return m_keywordTable.entry(m_vm, ident);
-    }
-    
-    const HashEntry* getKeyword(const Identifier& ident) const
-    {
-        return m_keywordTable.entry(m_vm, ident);
-    }
-    
-    ~Keywords()
-    {
-        m_keywordTable.deleteTable();
-    }
-    
-private:
-    friend class VM;
-    
-    explicit Keywords(VM&);
-    
-    VM& m_vm;
-    const HashTable m_keywordTable;
-};
-
 enum LexerFlags {
     LexerFlagsIgnoreReservedWords = 1, 
     LexerFlagsDontBuildStrings = 2,
     LexexFlagsDontBuildKeywords = 4
 };
 
+enum class LexerEscapeParseMode { Template, String };
+
+struct ParsedUnicodeEscapeValue;
+
+bool isLexerKeyword(const Identifier&);
+
 template <typename T>
 class Lexer {
     WTF_MAKE_NONCOPYABLE(Lexer);
     WTF_MAKE_FAST_ALLOCATED;
 
 public:
-    Lexer(VM*);
+    Lexer(VM*, JSParserBuiltinMode, JSParserScriptMode);
     ~Lexer();
 
     // Character manipulation functions.
@@ -83,8 +60,8 @@ public:
 
     // Functions to set up parsing.
     void setCode(const SourceCode&, ParserArena*);
-    void setIsReparsing() { m_isReparsing = true; }
-    bool isReparsing() const { return m_isReparsing; }
+    void setIsReparsingFunction() { m_isReparsingFunction = true; }
+    bool isReparsingFunction() const { return m_isReparsingFunction; }
 
     JSTokenType lex(JSToken*, unsigned, bool strictMode);
     bool nextTokenIsColon();
@@ -96,15 +73,21 @@ public:
         return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset());
     }
     JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; }
+    JSTokenLocation lastTokenLocation() const { return m_lastTokenLocation; }
     void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
     int lastLineNumber() const { return m_lastLineNumber; }
     bool prevTerminator() const { return m_terminator; }
-    bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
-    bool skipRegExp();
+    JSTokenType scanRegExp(JSToken*, UChar patternPrefix = 0);
+    enum class RawStringsBuildMode { BuildRawStrings, DontBuildRawStrings };
+    JSTokenType scanTemplateString(JSToken*, RawStringsBuildMode);
 
     // Functions for use after parsing.
     bool sawError() const { return m_error; }
+    void setSawError(bool sawError) { m_error = sawError; }
     String getErrorMessage() const { return m_lexErrorMessage; }
+    void setErrorMessage(const String& errorMessage) { m_lexErrorMessage = errorMessage; }
+    String sourceURL() const { return m_sourceURLDirective; }
+    String sourceMappingURL() const { return m_sourceMappingURLDirective; }
     void clear();
     void setOffset(int offset, int lineStartOffset)
     {
@@ -126,52 +109,34 @@ public:
     {
         m_lineNumber = line;
     }
-
-    SourceProvider* sourceProvider() const { return m_source->provider(); }
+    void setTerminator(bool terminator)
+    {
+        m_terminator = terminator;
+    }
 
     JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode);
 
+    ALWAYS_INLINE StringView getToken(const JSToken& token)
+    {
+        SourceProvider* sourceProvider = m_source->provider();
+        ASSERT_WITH_MESSAGE(token.m_location.startOffset <= token.m_location.endOffset, "Calling this function with the baked token.");
+        return sourceProvider->getRange(token.m_location.startOffset, token.m_location.endOffset);
+    }
+
 private:
     void record8(int);
     void append8(const T*, size_t);
     void record16(int);
     void record16(T);
+    void recordUnicodeCodePoint(UChar32);
     void append16(const LChar*, size_t);
     void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
 
     ALWAYS_INLINE void shift();
     ALWAYS_INLINE bool atEnd() const;
     ALWAYS_INLINE T peek(int offset) const;
-    struct UnicodeHexValue {
-        
-        enum ValueType { ValidHex, IncompleteHex, InvalidHex };
-        
-        explicit UnicodeHexValue(int value)
-            : m_value(value)
-        {
-        }
-        explicit UnicodeHexValue(ValueType type)
-            : m_value(type == IncompleteHex ? -2 : -1)
-        {
-        }
-
-        ValueType valueType() const
-        {
-            if (m_value >= 0)
-                return ValidHex;
-            return m_value == -2 ? IncompleteHex : InvalidHex;
-        }
-        bool isValid() const { return m_value >= 0; }
-        int value() const
-        {
-            ASSERT(m_value >= 0);
-            return m_value;
-        }
-        
-    private:
-        int m_value;
-    };
-    UnicodeHexValue parseFourDigitUnicodeHex();
+
+    ParsedUnicodeEscapeValue parseUnicodeEscape();
     void shiftLineTerminator();
 
     ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
@@ -181,7 +146,7 @@ private:
     ALWAYS_INLINE const T* currentSourcePtr() const;
     ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
 
-    ALWAYS_INLINE void setCodeStart(const StringImpl*);
+    ALWAYS_INLINE void setCodeStart(const StringView&);
 
     ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
     ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
@@ -189,8 +154,11 @@ private:
     ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
     ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
     ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
+    ALWAYS_INLINE const Identifier* makeEmptyIdentifier();
 
     ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
+    
+    ALWAYS_INLINE void skipWhitespace();
 
     template <int shiftAmount> void internalShift();
     template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
@@ -203,13 +171,25 @@ private:
     };
     template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
     template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
+
+    template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE StringParseResult parseComplexEscape(bool strictMode, T stringQuoteCharacter);
+    ALWAYS_INLINE StringParseResult parseTemplateLiteral(JSTokenData*, RawStringsBuildMode);
     ALWAYS_INLINE void parseHex(double& returnValue);
+    ALWAYS_INLINE bool parseBinary(double& returnValue);
     ALWAYS_INLINE bool parseOctal(double& returnValue);
     ALWAYS_INLINE bool parseDecimal(double& returnValue);
     ALWAYS_INLINE void parseNumberAfterDecimalPoint();
     ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
     ALWAYS_INLINE bool parseMultilineComment();
 
+    ALWAYS_INLINE void parseCommentDirective();
+    ALWAYS_INLINE String parseCommentDirectiveValue();
+
+    template <unsigned length>
+    ALWAYS_INLINE bool consume(const char (&input)[length]);
+
+    void fillTokenInfo(JSToken*, JSTokenType, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition);
+
     static const size_t initialReadBufferCapacity = 32;
 
     int m_lineNumber;
@@ -217,6 +197,7 @@ private:
 
     Vector<LChar> m_buffer8;
     Vector<UChar> m_buffer16;
+    Vector<UChar> m_bufferForRawTemplateString16;
     bool m_terminator;
     int m_lastToken;
 
@@ -228,16 +209,22 @@ private:
     const T* m_codeStartPlusOffset;
     const T* m_lineStart;
     JSTextPosition m_positionBeforeLastNewline;
-    bool m_isReparsing;
+    JSTokenLocation m_lastTokenLocation;
+    bool m_isReparsingFunction;
     bool m_atLineStart;
     bool m_error;
     String m_lexErrorMessage;
 
+    String m_sourceURLDirective;
+    String m_sourceMappingURLDirective;
+
     T m_current;
 
     IdentifierArena* m_arena;
 
     VM* m_vm;
+    bool m_parsingBuiltinFunction;
+    JSParserScriptMode m_scriptMode;
 };
 
 template <>
@@ -249,7 +236,8 @@ ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
 template <>
 ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
 {
-    return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0xFEFF);
+    // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
+    return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR || ch == 0x180E || ch == 0xFEFF);
 }
 
 template <>
@@ -303,18 +291,24 @@ ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UCh
     return &m_arena->makeIdentifier(m_vm, characters, length);
 }
 
+template <typename T>
+ALWAYS_INLINE const Identifier* Lexer<T>::makeEmptyIdentifier()
+{
+    return &m_arena->makeEmptyIdentifier(m_vm);
+}
+
 template <>
-ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
+ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringView& sourceString)
 {
-    ASSERT(sourceString->is8Bit());
-    m_codeStart = sourceString->characters8();
+    ASSERT(sourceString.is8Bit());
+    m_codeStart = sourceString.characters8();
 }
 
 template <>
-ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
+ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringView& sourceString)
 {
-    ASSERT(!sourceString->is8Bit());
-    m_codeStart = sourceString->characters16();
+    ASSERT(!sourceString.is8Bit());
+    m_codeStart = sourceString.characters16();
 }
 
 template <typename T>
@@ -335,6 +329,12 @@ ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* chara
     return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
 }
 
+#if ASSERT_DISABLED
+ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; }
+#else
+bool isSafeBuiltinIdentifier(VM&, const Identifier*);
+#endif
+
 template <typename T>
 ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
 {
@@ -370,10 +370,15 @@ ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, un
     ASSERT(currentOffset() >= currentLineStartOffset());
 
     // Create the identifier if needed
-    if (lexerFlags & LexexFlagsDontBuildKeywords)
+    if (lexerFlags & LexexFlagsDontBuildKeywords
+#if !ASSERT_DISABLED
+        && !m_parsingBuiltinFunction
+#endif
+        )
         tokenData->ident = 0;
     else
         tokenData->ident = makeLCharIdentifier(start, ptr - start);
+
     tokenLocation->line = m_lineNumber;
     tokenLocation->lineStartOffset = currentLineStartOffset();
     tokenLocation->startOffset = offsetFromSourcePtr(start);
@@ -381,6 +386,13 @@ ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, un
     ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
     tokenRecord->m_startPosition = startPosition;
     tokenRecord->m_endPosition = currentPosition();
+#if !ASSERT_DISABLED
+    if (m_parsingBuiltinFunction) {
+        if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident))
+            return ERRORTOK;
+    }
+#endif
+
     m_lastToken = IDENT;
     return IDENT;
     
@@ -389,5 +401,3 @@ slowCase:
 }
 
 } // namespace JSC
-
-#endif // Lexer_h