From fa925ccd5a098a410d6f8af4689fdbc2c6fa6a8b Mon Sep 17 00:00:00 2001 From: Roberto Raggi Date: Thu, 28 Jan 2010 13:12:52 +0100 Subject: Say hello to the new incremental scanner for QML/JS. --- src/libs/qmljs/qmljshighlighter.cpp | 40 ++-- src/libs/qmljs/qmljshighlighter.h | 6 +- src/libs/qmljs/qmljsindenter.cpp | 2 +- src/libs/qmljs/qmljsscanner.cpp | 449 +++++++++++++----------------------- src/libs/qmljs/qmljsscanner.h | 32 +-- 5 files changed, 199 insertions(+), 330 deletions(-) (limited to 'src/libs/qmljs') diff --git a/src/libs/qmljs/qmljshighlighter.cpp b/src/libs/qmljs/qmljshighlighter.cpp index 5ec0d3ccd9..6e5a2e99f3 100644 --- a/src/libs/qmljs/qmljshighlighter.cpp +++ b/src/libs/qmljs/qmljshighlighter.cpp @@ -31,6 +31,7 @@ #include #include +#include using namespace QmlJS; @@ -58,11 +59,10 @@ bool QScriptHighlighter::isDuiEnabled() const void QScriptHighlighter::highlightBlock(const QString &text) { - m_scanner(text, onBlockStart()); + const QList tokens = m_scanner(text, onBlockStart()); QTextCharFormat emptyFormat; int lastEnd = 0; - const QList tokens = m_scanner.tokens(); for (int i = 0; i < tokens.size(); ++i) { const Token token = tokens.at(i); @@ -111,7 +111,7 @@ void QScriptHighlighter::highlightBlock(const QString &text) break; case Token::Identifier: - if (m_duiEnabled && (i + 1 != tokens.size()) && tokens.at(i + 1).kind == Token::Colon) { + if (m_duiEnabled && (i + 1) < tokens.size() && tokens.at(i + 1).is(Token::Colon)) { int j = i; for (; j != -1; --j) { const Token &tok = tokens.at(j); @@ -138,8 +138,7 @@ void QScriptHighlighter::highlightBlock(const QString &text) setFormat(token.offset, token.length, emptyFormat); break; - case Token::Operator: - case Token::Dot: + case Token::Delimiter: setFormat(token.offset, token.length, emptyFormat); break; @@ -150,13 +149,21 @@ void QScriptHighlighter::highlightBlock(const QString &text) lastEnd = token.end(); } - const int firstNonSpace = m_scanner.firstNonSpace(); + int firstNonSpace = 0; + + if (! tokens.isEmpty()) { + const Token &tk = tokens.first(); + firstNonSpace = tk.offset; + } + if (firstNonSpace > lastEnd) setFormat(lastEnd, firstNonSpace - lastEnd, m_formats[VisualWhitespace]); else if (text.length() > lastEnd) setFormat(lastEnd, text.length() - lastEnd, m_formats[VisualWhitespace]); onBlockEnd(m_scanner.endState(), firstNonSpace); + + setCurrentBlockState(m_scanner.endState()); } void QScriptHighlighter::setFormats(const QVector &s) @@ -237,15 +244,20 @@ QSet QScriptHighlighter::keywords() int QScriptHighlighter::onBlockStart() { - int state = 0; - int previousState = previousBlockState(); - if (previousState != -1) - state = previousState; - return state; + return currentBlockState(); +} + +void QScriptHighlighter::onBlockEnd(int, int) +{ +} + +void QScriptHighlighter::onOpeningParenthesis(QChar, int) +{ +} + +void QScriptHighlighter::onClosingParenthesis(QChar, int) +{ } -void QScriptHighlighter::onOpeningParenthesis(QChar, int) {} -void QScriptHighlighter::onClosingParenthesis(QChar, int) {} -void QScriptHighlighter::onBlockEnd(int state, int) { return setCurrentBlockState(state); } void QScriptHighlighter::highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat) { diff --git a/src/libs/qmljs/qmljshighlighter.h b/src/libs/qmljs/qmljshighlighter.h index 6e0bd2ba72..d289d91a5b 100644 --- a/src/libs/qmljs/qmljshighlighter.h +++ b/src/libs/qmljs/qmljshighlighter.h @@ -61,13 +61,13 @@ public: QSet keywords(); protected: + virtual int onBlockStart(); + virtual void onBlockEnd(int state, int firstNonSpace); + // The functions are notified whenever parentheses are encountered. // Custom behaviour can be added, for example storing info for indenting. - virtual int onBlockStart(); // returns the blocks initial state virtual void onOpeningParenthesis(QChar parenthesis, int pos); virtual void onClosingParenthesis(QChar parenthesis, int pos); - // sets the enriched user state, or simply calls setCurrentBlockState(state); - virtual void onBlockEnd(int state, int firstNonSpace); virtual void highlightWhitespace(const Token &token, const QString &text, int nonWhitespaceFormat); diff --git a/src/libs/qmljs/qmljsindenter.cpp b/src/libs/qmljs/qmljsindenter.cpp index 76b54bd06f..b0a872eb9b 100644 --- a/src/libs/qmljs/qmljsindenter.cpp +++ b/src/libs/qmljs/qmljsindenter.cpp @@ -256,7 +256,7 @@ QString QmlJSIndenter::trimmedCodeLine(const QString &t) case Token::LeftParenthesis: case Token::LeftBrace: case Token::Semicolon: - case Token::Operator: + case Token::Delimiter: break; case Token::RightParenthesis: diff --git a/src/libs/qmljs/qmljsscanner.cpp b/src/libs/qmljs/qmljsscanner.cpp index 98d350cf50..f2159104a6 100644 --- a/src/libs/qmljs/qmljsscanner.cpp +++ b/src/libs/qmljs/qmljsscanner.cpp @@ -34,330 +34,209 @@ using namespace QmlJS; QmlJSScanner::QmlJSScanner() + : m_state(0) { - reset(); } QmlJSScanner::~QmlJSScanner() -{} +{ +} -void QmlJSScanner::reset() +static bool isIdentifierChar(QChar ch) { - m_endState = -1; - m_firstNonSpace = -1; - m_tokens.clear(); + switch (ch.unicode()) { + case '$': case '_': + return true; + + default: + return ch.isLetterOrNumber(); + } } -QList QmlJSScanner::operator()(const QString &text, int startState) +static bool isNumberChar(QChar ch) { - reset(); - - // tokens - enum TokenKind { - InputAlpha, - InputNumber, - InputAsterix, - InputSlash, - InputSpace, - InputQuotation, - InputApostrophe, - InputSep, - NumInputs - }; + switch (ch.unicode()) { + case '.': + case 'e': + case 'E': // ### more... + return true; + + default: + return ch.isLetterOrNumber(); + } +} - // states +QList QmlJSScanner::operator()(const QString &text, int startState) +{ enum { - StateStandard, - StateCommentStart1, // '/' - StateCCommentStart2, // '*' after a '/' - StateCppCommentStart2, // '/' after a '/' - StateCComment, // after a "/*" - StateCppComment, // after a "//" - StateCCommentEnd1, // '*' in a CppComment - StateCCommentEnd2, // '/' after a '*' in a CppComment - StateStringStart, - StateString, - StateStringEnd, - StateString2Start, - StateString2, - StateString2End, - StateNumber, - NumStates + Normal = 0, + MultiLineComment = 1 }; - static const uchar table[NumStates][NumInputs] = { - // InputAlpha InputNumber InputAsterix InputSlash InputSpace InputQuotation InputApostrophe InputSep - { StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStandard - { StateStandard, StateNumber, StateCCommentStart2, StateCppCommentStart2, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCommentStart1 - { StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentStart2 - { StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppCommentStart2 - { StateCComment, StateCComment, StateCCommentEnd1, StateCComment, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCComment - { StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment, StateCppComment }, // StateCppComment - { StateCComment, StateCComment, StateCCommentEnd1, StateCCommentEnd2, StateCComment, StateCComment, StateCComment, StateCComment }, // StateCCommentEnd1 - { StateStandard, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateCCommentEnd2 - { StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateStringStart - { StateString, StateString, StateString, StateString, StateString, StateStringEnd, StateString, StateString }, // StateString - { StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateStringEnd - { StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2Start - { StateString2, StateString2, StateString2, StateString2, StateString2, StateString2, StateString2End, StateString2 }, // StateString2 - { StateStandard, StateStandard, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard }, // StateString2End - { StateNumber, StateNumber, StateStandard, StateCommentStart1, StateStandard, StateStringStart, StateString2Start, StateStandard } // StateNumber - }; + m_state = startState; + QList tokens; + + // ### handle multi line comment state. + + int index = 0; + + if (m_state == MultiLineComment) { + const int start = index; + while (index < text.length()) { + const QChar ch = text.at(index); + QChar la; + if (index + 1 < text.length()) + la = text.at(index + 1); + + if (ch == QLatin1Char('*') && la == QLatin1Char('/')) { + m_state = Normal; + index += 2; + break; + } else { + ++index; + } + } - int state = startState; - if (text.isEmpty()) { - blockEnd(state, 0); - return m_tokens; + tokens.append(Token(start, index - start, Token::Comment)); } - int input = -1; - int i = 0; - bool lastWasBackSlash = false; - bool makeLastStandard = false; - - static const QString alphabeth = QLatin1String("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); - static const QString mathChars = QString::fromLatin1("xXeE"); - static const QString numbers = QString::fromLatin1("0123456789"); - QChar lastChar; - - int firstNonSpace = -1; - int lastNonSpace = -1; - - forever { - const QChar qc = text.at(i); - const char c = qc.toLatin1(); - - if (lastWasBackSlash) { - input = InputSep; - } else { - switch (c) { - case '*': - input = InputAsterix; - break; - case '/': - input = InputSlash; - break; - case '"': - input = InputQuotation; - break; - case '\'': - input = InputApostrophe; - break; - case ' ': - input = InputSpace; - break; - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': case '0': - if (alphabeth.contains(lastChar) && (!mathChars.contains(lastChar) || !numbers.contains(text.at(i - 1)))) { - input = InputAlpha; + while (index < text.length()) { + const QChar ch = text.at(index); + + QChar la; // lookahead char + if (index + 1 < text.length()) + la = text.at(index + 1); + + switch (ch.unicode()) { + case '/': + if (la == QLatin1Char('/')) { + tokens.append(Token(index, text.length() - index, Token::Comment)); + index = text.length(); + } else if (la == QLatin1Char('*')) { + const int start = index; + index += 2; + m_state = MultiLineComment; + while (index < text.length()) { + const QChar ch = text.at(index); + QChar la; + if (index + 1 < text.length()) + la = text.at(index + 1); + + if (ch == QLatin1Char('*') && la == QLatin1Char('/')) { + m_state = Normal; + index += 2; + break; } else { - if (input == InputAlpha && numbers.contains(lastChar)) - input = InputAlpha; - else - input = InputNumber; + ++index; } - break; - case '.': - if (state == StateNumber) - input = InputNumber; - else - input = InputSep; - break; - default: { - if (qc.isLetter() || c == '_') - input = InputAlpha; - else - input = InputSep; - break; } + tokens.append(Token(start, index - start, Token::Comment)); + } else { + tokens.append(Token(index++, 1, Token::Delimiter)); } - } - - if (input != InputSpace) { - if (firstNonSpace < 0) - firstNonSpace = i; - lastNonSpace = i; - } + break; - lastWasBackSlash = !lastWasBackSlash && c == '\\'; + case '\'': + case '"': { + const QChar quote = ch; + const int start = index; + ++index; + while (index < text.length()) { + const QChar ch = text.at(index); - state = table[state][input]; + if (ch == quote) + break; + else if (index + 1 < text.length() && ch == QLatin1Char('\\')) + index += 2; + else + ++index; + } - switch (state) { - case StateStandard: { - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; + if (index < text.length()) { + ++index; + // good one + } else { + // unfinished + } - if (input == InputAlpha ) { - insertIdentifier(i); - } else if (input == InputSep || input == InputAsterix) { - insertCharToken(i, c); - } + tokens.append(Token(start, index - start, Token::String)); + } break; + case '.': + if (la.isDigit()) { + const int start = index; + do { + ++index; + } while (index < text.length() && isNumberChar(text.at(index))); + tokens.append(Token(start, index - start, Token::Number)); break; } + tokens.append(Token(index++, 1, Token::Dot)); + break; - case StateCommentStart1: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = true; - break; - case StateCCommentStart2: - makeLastStandard = false; - insertComment(i - 1, 2); - break; - case StateCppCommentStart2: - insertComment(i - 1, 2); - makeLastStandard = false; - break; - case StateCComment: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertComment(i, 1); - break; - case StateCppComment: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertComment(i, 1); - break; - case StateCCommentEnd1: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertComment(i, 1); - break; - case StateCCommentEnd2: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertComment(i, 1); - break; - case StateStringStart: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateString: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateStringEnd: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateString2Start: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateString2: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateString2End: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertString(i); - break; - case StateNumber: - if (makeLastStandard) - insertCharToken(i - 1, text.at(i - 1).toAscii()); - makeLastStandard = false; - insertNumber(i); - break; - } + case '(': + tokens.append(Token(index++, 1, Token::LeftParenthesis)); + break; - lastChar = qc; - i++; - if (i >= text.length()) + case ')': + tokens.append(Token(index++, 1, Token::RightParenthesis)); break; - } - scanForKeywords(text); + case '[': + tokens.append(Token(index++, 1, Token::LeftBracket)); + break; - if (state == StateCComment - || state == StateCCommentEnd1 - || state == StateCCommentStart2 - ) { - state = StateCComment; - } else { - state = StateStandard; - } + case ']': + tokens.append(Token(index++, 1, Token::RightBracket)); + break; - blockEnd(state, firstNonSpace); + case '{': + tokens.append(Token(index++, 1, Token::LeftBrace)); + break; - return m_tokens; -} + case '}': + tokens.append(Token(index++, 1, Token::RightBrace)); + break; -void QmlJSScanner::insertToken(int start, int length, Token::Kind kind, bool forceNewToken) -{ - if (m_tokens.isEmpty() || forceNewToken) { - m_tokens.append(Token(start, length, kind)); - } else { - Token &lastToken(m_tokens.last()); - - if (lastToken.kind == kind && lastToken.end() == start) { - lastToken.length += 1; - } else { - m_tokens.append(Token(start, length, kind)); - } - } -} + case ';': + tokens.append(Token(index++, 1, Token::Semicolon)); + break; -void QmlJSScanner::insertCharToken(int start, const char c) -{ - Token::Kind kind; - - switch (c) { - case '!': - case '<': - case '>': - case '+': - case '-': - case '*': - case '/': - case '%': kind = Token::Operator; break; - - case ';': kind = Token::Semicolon; break; - case ':': kind = Token::Colon; break; - case ',': kind = Token::Comma; break; - case '.': kind = Token::Dot; break; - - case '(': kind = Token::LeftParenthesis; break; - case ')': kind = Token::RightParenthesis; break; - case '{': kind = Token::LeftBrace; break; - case '}': kind = Token::RightBrace; break; - case '[': kind = Token::LeftBracket; break; - case ']': kind = Token::RightBracket; break; - - default: kind = Token::Identifier; break; + case ':': + tokens.append(Token(index++, 1, Token::Colon)); + break; + + case ',': + tokens.append(Token(index++, 1, Token::Comma)); + break; + + default: + if (ch.isNumber()) { + const int start = index; + do { + ++index; + } while (index < text.length() && isNumberChar(text.at(index))); + tokens.append(Token(start, index - start, Token::Number)); + } else if (ch.isLetter() || ch == QLatin1Char('_') || ch == QLatin1Char('$')) { + const int start = index; + do { + ++index; + } while (index < text.length() && isIdentifierChar(text.at(index))); + + if (isKeyword(text.mid(start, index - start))) + tokens.append(Token(start, index - start, Token::Keyword)); // ### fixme + else + tokens.append(Token(start, index - start, Token::Identifier)); + } else { + tokens.append(Token(index++, 1, Token::Delimiter)); + } + } // end of switch } - insertToken(start, 1, kind, true); + return tokens; } -void QmlJSScanner::scanForKeywords(const QString &text) +bool QmlJSScanner::isKeyword(const QString &text) const { - for (int i = 0; i < m_tokens.length(); ++i) { - Token &t(m_tokens[i]); - - if (t.kind != Token::Identifier) - continue; - - const QString id = text.mid(t.offset, t.length); - if (m_keywords.contains(id)) - t.kind = Token::Keyword; - } + return m_keywords.contains(text); } diff --git a/src/libs/qmljs/qmljsscanner.h b/src/libs/qmljs/qmljsscanner.h index e8aea6b94e..9c98714fc6 100644 --- a/src/libs/qmljs/qmljsscanner.h +++ b/src/libs/qmljs/qmljsscanner.h @@ -54,11 +54,11 @@ public: RightBrace, LeftBracket, RightBracket, - Operator, Semicolon, Colon, Comma, - Dot + Dot, + Delimiter }; inline Token(): offset(0), length(0), kind(EndOfFile) {} @@ -83,39 +83,17 @@ public: void setKeywords(const QSet keywords) { m_keywords = keywords; } - void reset(); - QList operator()(const QString &text, int startState = 0); int endState() const - { return m_endState; } - - int firstNonSpace() const - { return m_firstNonSpace; } - - QList tokens() const - { return m_tokens; } + { return m_state; } private: - void blockEnd(int state, int firstNonSpace) - { m_endState = state; m_firstNonSpace = firstNonSpace; } - void insertString(int start) - { insertToken(start, 1, Token::String, false); } - void insertComment(int start, int length) - { insertToken(start, length, Token::Comment, false); } - void insertCharToken(int start, const char c); - void insertIdentifier(int start) - { insertToken(start, 1, Token::Identifier, false); } - void insertNumber(int start) - { insertToken(start, 1, Token::Number, false); } - void insertToken(int start, int length, Token::Kind kind, bool forceNewToken); - void scanForKeywords(const QString &text); + bool isKeyword(const QString &text) const; private: QSet m_keywords; - int m_endState; - int m_firstNonSpace; - QList m_tokens; + int m_state; }; } // namespace QmlJS -- cgit v1.2.1