1 files changed, 44 insertions, 259 deletions
diff --git a/deps/v8/src/scanner.h b/deps/v8/src/scanner.h
index df5cd72949..acb9b47bd9 100644
--- a/deps/v8/src/scanner.h
+++ b/deps/v8/src/scanner.h
@@ -35,65 +35,6 @@
 namespace v8 {
 namespace internal {
 
-
-class UTF8Buffer {
- public:
-  UTF8Buffer();
-  ~UTF8Buffer();
-
-  inline void AddChar(uc32 c) {
-    if (recording_) {
-      if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
-        buffer_.Add(static_cast<char>(c));
-      } else {
-        AddCharSlow(c);
-      }
-    }
-  }
-
-  void StartLiteral() {
-    buffer_.StartSequence();
-    recording_ = true;
-  }
-
-  Vector<const char> EndLiteral() {
-    if (recording_) {
-      recording_ = false;
-      buffer_.Add(kEndMarker);
-      Vector<char> sequence = buffer_.EndSequence();
-      return Vector<const char>(sequence.start(), sequence.length());
-    }
-    return Vector<const char>();
-  }
-
-  void DropLiteral() {
-    if (recording_) {
-      recording_ = false;
-      buffer_.DropSequence();
-    }
-  }
-
-  void Reset() {
-    buffer_.Reset();
-  }
-
-  // The end marker added after a parsed literal.
-  // Using zero allows the usage of strlen and similar functions on
-  // identifiers and numbers (but not strings, since they may contain zero
-  // bytes).
-  // TODO(lrn): Use '\xff' as end marker, since it cannot occur inside
-  // an utf-8 string. This requires changes in all places that uses
-  // str-functions on the literals, but allows a single pointer to represent
-  // the literal, even if it contains embedded zeros.
-  static const char kEndMarker = '\x00';
- private:
-  static const int kInitialCapacity = 256;
-  SequenceCollector<char, 4> buffer_;
-  bool recording_;
-  void AddCharSlow(uc32 c);
-};
-
-
 // UTF16 buffer to read characters from a character stream.
 class CharacterStreamUTF16Buffer: public UTF16Buffer {
  public:
@@ -134,175 +75,65 @@ class ExternalStringUTF16Buffer: public UTF16Buffer {
 };
 
 
-enum ParserLanguage { JAVASCRIPT, JSON };
-
-
-class Scanner {
+// Initializes a UTF16Buffer as input stream, using one of a number
+// of strategies depending on the available character sources.
+class StreamInitializer {
  public:
-  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+  UTF16Buffer* Init(Handle<String> source,
+                    unibrow::CharacterStream* stream,
+                    int start_position,
+                    int end_position);
+ private:
+  // Different UTF16 buffers used to pull characters from. Based on input one of
+  // these will be initialized as the actual data source.
+  CharacterStreamUTF16Buffer char_stream_buffer_;
+  ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
+      two_byte_string_buffer_;
+  ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
 
-  class LiteralScope {
-   public:
-    explicit LiteralScope(Scanner* self);
-    ~LiteralScope();
-    void Complete();
+  // Used to convert the source string into a character stream when a stream
+  // is not passed to the scanner.
+  SafeStringInputBuffer safe_string_input_buffer_;
+};
 
-   private:
-    Scanner* scanner_;
-    bool complete_;
-  };
+// ----------------------------------------------------------------------------
+// V8JavaScriptScanner
+// JavaScript scanner getting its input from either a V8 String or a unicode
+// CharacterStream.
 
-  Scanner();
+class V8JavaScriptScanner : public JavaScriptScanner {
+ public:
+  V8JavaScriptScanner() {}
+
+  Token::Value NextCheckStack();
 
   // Initialize the Scanner to scan source.
-  void Initialize(Handle<String> source,
-                  ParserLanguage language);
+  void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
   void Initialize(Handle<String> source,
                   unibrow::CharacterStream* stream,
-                  ParserLanguage language);
+                  int literal_flags = kAllLiterals);
   void Initialize(Handle<String> source,
                   int start_position, int end_position,
-                  ParserLanguage language);
-
-  // Returns the next token.
-  Token::Value Next();
-
-  // Returns the current token again.
-  Token::Value current_token() { return current_.token; }
-
-  // One token look-ahead (past the token returned by Next()).
-  Token::Value peek() const { return next_.token; }
-
-  // Returns true if there was a line terminator before the peek'ed token.
-  bool has_line_terminator_before_next() const {
-    return has_line_terminator_before_next_;
-  }
-
-  struct Location {
-    Location(int b, int e) : beg_pos(b), end_pos(e) { }
-    Location() : beg_pos(0), end_pos(0) { }
-    int beg_pos;
-    int end_pos;
-  };
-
-  // Returns the location information for the current token
-  // (the token returned by Next()).
-  Location location() const { return current_.location; }
-  Location peek_location() const { return next_.location; }
-
-  // Returns the literal string, if any, for the current token (the
-  // token returned by Next()). The string is 0-terminated and in
-  // UTF-8 format; they may contain 0-characters. Literal strings are
-  // collected for identifiers, strings, and numbers.
-  // These functions only give the correct result if the literal
-  // was scanned between calls to StartLiteral() and TerminateLiteral().
-  const char* literal_string() const {
-    return current_.literal_chars.start();
-  }
-
-  int literal_length() const {
-    // Excluding terminal '\x00' added by TerminateLiteral().
-    return current_.literal_chars.length() - 1;
-  }
-
-  Vector<const char> literal() const {
-    return Vector<const char>(literal_string(), literal_length());
-  }
-
-  // Returns the literal string for the next token (the token that
-  // would be returned if Next() were called).
-  const char* next_literal_string() const {
-    return next_.literal_chars.start();
-  }
-
-
-  // Returns the length of the next token (that would be returned if
-  // Next() were called).
-  int next_literal_length() const {
-    // Excluding terminal '\x00' added by TerminateLiteral().
-    return next_.literal_chars.length() - 1;
-  }
-
-  Vector<const char> next_literal() const {
-    return Vector<const char>(next_literal_string(), next_literal_length());
-  }
-
-  // Scans the input as a regular expression pattern, previous
-  // character(s) must be /(=). Returns true if a pattern is scanned.
-  bool ScanRegExpPattern(bool seen_equal);
-  // Returns true if regexp flags are scanned (always since flags can
-  // be empty).
-  bool ScanRegExpFlags();
+                  int literal_flags = kAllLiterals);
 
-  // Seek forward to the given position.  This operation does not
-  // work in general, for instance when there are pushed back
-  // characters, but works for seeking forward until simple delimiter
-  // tokens, which is what it is used for.
-  void SeekForward(int pos);
-
-  bool stack_overflow() { return stack_overflow_; }
+ protected:
+  StreamInitializer stream_initializer_;
+};
 
-  // Tells whether the buffer contains an identifier (no escapes).
-  // Used for checking if a property name is an identifier.
-  static bool IsIdentifier(unibrow::CharacterStream* buffer);
 
-  static const int kCharacterLookaheadBufferSize = 1;
-  static const int kNoEndPosition = 1;
+class JsonScanner : public Scanner {
+ public:
+  JsonScanner();
 
- private:
-  // The current and look-ahead token.
-  struct TokenDesc {
-    Token::Value token;
-    Location location;
-    Vector<const char> literal_chars;
-  };
-
-  void Init(Handle<String> source,
-            unibrow::CharacterStream* stream,
-            int start_position, int end_position,
-            ParserLanguage language);
-
-  // Literal buffer support
-  inline void StartLiteral();
-  inline void AddLiteralChar(uc32 ch);
-  inline void AddLiteralCharAdvance();
-  inline void TerminateLiteral();
-  // Stops scanning of a literal, e.g., due to an encountered error.
-  inline void DropLiteral();
-
-  // Low-level scanning support.
-  void Advance() { c0_ = source_->Advance(); }
-  void PushBack(uc32 ch) {
-    source_->PushBack(ch);
-    c0_ = ch;
-  }
+  // Initialize the Scanner to scan source.
+  void Initialize(Handle<String> source);
 
-  bool SkipWhiteSpace() {
-    if (is_parsing_json_) {
-      return SkipJsonWhiteSpace();
-    } else {
-      return SkipJavaScriptWhiteSpace();
-    }
-  }
+  // Returns the next token.
+  Token::Value Next();
 
-  bool SkipJavaScriptWhiteSpace();
+ protected:
+  // Skip past JSON whitespace (only space, tab, newline and carrige-return).
   bool SkipJsonWhiteSpace();
-  Token::Value SkipSingleLineComment();
-  Token::Value SkipMultiLineComment();
-
-  inline Token::Value Select(Token::Value tok);
-  inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
-
-  inline void Scan() {
-    if (is_parsing_json_) {
-      ScanJson();
-    } else {
-      ScanJavaScript();
-    }
-  }
-
-  // Scans a single JavaScript token.
-  void ScanJavaScript();
 
   // Scan a single JSON token. The JSON lexical grammar is specified in the
   // ECMAScript 5 standard, section 15.12.1.1.
@@ -331,53 +162,7 @@ class Scanner {
   // JSONNullLiteral).
   Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
 
-  void ScanDecimalDigits();
-  Token::Value ScanNumber(bool seen_period);
-  Token::Value ScanIdentifier();
-  uc32 ScanHexEscape(uc32 c, int length);
-  uc32 ScanOctalEscape(uc32 c, int length);
-  void ScanEscape();
-  Token::Value ScanString();
-
-  // Scans a possible HTML comment -- begins with '<!'.
-  Token::Value ScanHtmlComment();
-
-  // Return the current source position.
-  int source_pos() {
-    return source_->pos() - kCharacterLookaheadBufferSize;
-  }
-
-  // Decodes a unicode escape-sequence which is part of an identifier.
-  // If the escape sequence cannot be decoded the result is kBadRune.
-  uc32 ScanIdentifierUnicodeEscape();
-
-  TokenDesc current_;  // desc for current token (as returned by Next())
-  TokenDesc next_;     // desc for next token (one token look-ahead)
-  bool has_line_terminator_before_next_;
-  bool is_parsing_json_;
-
-  // Different UTF16 buffers used to pull characters from. Based on input one of
-  // these will be initialized as the actual data source.
-  CharacterStreamUTF16Buffer char_stream_buffer_;
-  ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
-      two_byte_string_buffer_;
-  ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
-
-  // Source. Will point to one of the buffers declared above.
-  UTF16Buffer* source_;
-
-  // Used to convert the source string into a character stream when a stream
-  // is not passed to the scanner.
-  SafeStringInputBuffer safe_string_input_buffer_;
-
-  // Buffer to hold literal values (identifiers, strings, numbers)
-  // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
-  UTF8Buffer literal_buffer_;
-
-  bool stack_overflow_;
-
-  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
-  uc32 c0_;
+  StreamInitializer stream_initializer_;
 };
 
 
@@ -400,7 +185,7 @@ void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
     SeekForward(start_position);
   }
   end_ =
-      end_position != Scanner::kNoEndPosition ? end_position : data->length();
+      end_position != kNoEndPosition ? end_position : data->length();
 }