diff options
author | Erik Verbruggen <erik.verbruggen@digia.com> | 2014-02-07 15:24:30 +0100 |
---|---|---|
committer | Nikolai Kosjar <nikolai.kosjar@theqtcompany.com> | 2014-12-18 15:46:22 +0100 |
commit | 242b3f4110ebbe3882e28b7df75c26768c5f9ecc (patch) | |
tree | daab9f47f597ab278d97c8e737df68efc54f523f | |
parent | 16becbd29c44e664904a7fb2d698f79fc08b2731 (diff) | |
download | qt-creator-242b3f4110ebbe3882e28b7df75c26768c5f9ecc.tar.gz |
C++: clean up numeric literal parsing and add support for n3472.
Separate the messy pp-number parsing from the numeric literal parsing.
The C/C++ preprocessor makes a grown man cry, but at least we have
"proper" literal parsing when we want it, including C++1y binary
literals.
Next step is digit separators (n3781).
Change-Id: Ia069eef454ed5c056f77694a5b8a595d0b76adc4
Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.cpp | 167 | ||||
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.h | 9 | ||||
-rw-r--r-- | src/libs/cplusplus/SimpleLexer.cpp | 4 | ||||
-rw-r--r-- | src/libs/cplusplus/SimpleLexer.h | 4 | ||||
-rw-r--r-- | src/libs/cplusplus/pp-engine.cpp | 5 | ||||
-rw-r--r-- | tests/auto/cplusplus/lexer/tst_lexer.cpp | 107 |
6 files changed, 269 insertions, 27 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp index 93e47045f3..3fb9a66bd2 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.cpp +++ b/src/libs/3rdparty/cplusplus/Lexer.cpp @@ -305,24 +305,27 @@ void Lexer::scan_helper(Token *tok) tok->f.kind = T_ERROR; } } else if (std::isdigit(_yychar)) { + if (f._ppMode) { + scanPreprocessorNumber(tok, true); + break; + } + const char *yytext = _currentChar - 2; - do { - if (_yychar == 'e' || _yychar == 'E') { - yyinp(); - if (_yychar == '-' || _yychar == '+') { - yyinp(); - // ### CPP_CHECK(std::isdigit(_yychar)); - } - } else if (std::isalnum(_yychar) || _yychar == '.') { + yyinp(); + scanDigitSequence(); // this is optional: we already skipped over the first digit + scanExponentPart(); + scanOptionalFloatingSuffix(); + if (std::isalnum(_yychar) || _yychar == '_') { + do { yyinp(); - } else { - break; - } - } while (_yychar); - int yylen = _currentChar - yytext; - tok->f.kind = T_NUMERIC_LITERAL; - if (control()) - tok->number = control()->numericLiteral(yytext, yylen); + } while (std::isalnum(_yychar) || _yychar == '_'); + tok->f.kind = T_ERROR; + } else { + int yylen = _currentChar - yytext; + tok->f.kind = T_NUMERIC_LITERAL; + if (control()) + tok->number = control()->numericLiteral(yytext, yylen); + } } else { tok->f.kind = T_DOT; } @@ -651,7 +654,10 @@ void Lexer::scan_helper(Token *tok) } else if (std::isalpha(ch) || ch == '_' || ch == '$' || isByteOfMultiByteCodePoint(ch)) { scanIdentifier(tok, _currentChar - _tokenStart - 1); } else if (std::isdigit(ch)) { - scanNumericLiteral(tok); + if (f._ppMode) + scanPreprocessorNumber(tok, false); + else + scanNumericLiteral(tok); } else { tok->f.kind = T_ERROR; } @@ -776,26 +782,141 @@ void Lexer::scanUntilQuote(Token *tok, unsigned char quote) tok->string = control()->stringLiteral(yytext, yylen); } +bool Lexer::scanDigitSequence() +{ + if (!std::isdigit(_yychar)) + return false; + yyinp(); + while (std::isdigit(_yychar)) + yyinp(); + return true; +} + +bool Lexer::scanExponentPart() +{ + if (_yychar != 'e' && _yychar != 'E') + return false; + yyinp(); + if (_yychar == '+' || _yychar == '-') + yyinp(); + return scanDigitSequence(); +} + +void Lexer::scanOptionalFloatingSuffix() +{ + if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L') + yyinp(); +} + +void Lexer::scanOptionalIntegerSuffix(bool allowU) +{ + switch(_yychar) { + case 'u': + case 'U': + if (allowU) { + yyinp(); + scanOptionalIntegerSuffix(false); + } + return; + case 'l': + yyinp(); + if (_yychar == 'l') + yyinp(); + return; + case 'L': + yyinp(); + if (_yychar == 'L') + yyinp(); + return; + default: + return; + } +} + void Lexer::scanNumericLiteral(Token *tok) { const char *yytext = _currentChar - 1; + if (*yytext == '0' && _yychar) { + if (_yychar == 'x' || _yychar == 'X') { + yyinp(); + while (std::isdigit(_yychar) || + (_yychar >= 'a' && _yychar <= 'f') || + (_yychar >= 'A' && _yychar <= 'F')) { + yyinp(); + } + scanOptionalIntegerSuffix(); + goto theEnd; + } else if (_yychar == 'b' || _yychar == 'B') { // see n3472 + yyinp(); + while (_yychar == '0' || _yychar == '1') + yyinp(); + scanOptionalIntegerSuffix(); + goto theEnd; + } else if (_yychar >= '0' && _yychar <= '7') { + do { + yyinp(); + } while (_yychar >= '0' && _yychar <= '7'); + scanOptionalIntegerSuffix(); + goto theEnd; + } + } + + while (_yychar) { + if (_yychar == '.') { + yyinp(); + scanDigitSequence(); // this is optional: "1." is a valid floating point number + scanExponentPart(); + scanOptionalFloatingSuffix(); + break; + } else if (_yychar == 'e' || _yychar == 'E') { + if (scanExponentPart()) + scanOptionalFloatingSuffix(); + break; + } else if (std::isdigit(_yychar)) { + yyinp(); + } else { + scanOptionalIntegerSuffix(); + break; + } + } + +theEnd: + if (std::isalnum(_yychar) || _yychar == '_') { + do { + yyinp(); + } while (std::isalnum(_yychar) || _yychar == '_'); + tok->f.kind = T_ERROR; + } else { + int yylen = _currentChar - yytext; + tok->f.kind = T_NUMERIC_LITERAL; + if (control()) + tok->number = control()->numericLiteral(yytext, yylen); + } +} + +void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped) +{ + const char *yytext = _currentChar - (dotAlreadySkipped ? 2 : 1); + if (dotAlreadySkipped && + (!_yychar || (_yychar && !std::isdigit(_yychar)))) { + tok->f.kind = T_DOT; + return; + } + while (_yychar) { if (_yychar == 'e' || _yychar == 'E') { yyinp(); - if (_yychar == '-' || _yychar == '+') { + if (_yychar == '+' || _yychar == '-') yyinp(); - // ### CPP_CHECK(std::isdigit(_yychar)); - } - } else if (std::isalnum(_yychar) || _yychar == '.') { + } else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') { yyinp(); } else { break; } } - int yylen = _currentChar - yytext; + int yylen = _currentChar - yytext; tok->f.kind = T_NUMERIC_LITERAL; - if (control()) tok->number = control()->numericLiteral(yytext, yylen); } diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h index 78396a0e60..0309c69950 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.h +++ b/src/libs/3rdparty/cplusplus/Lexer.h @@ -61,6 +61,9 @@ public: LanguageFeatures languageFeatures() const { return _languageFeatures; } void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; } + void setPreprocessorMode(bool onoff) + { f._ppMode = onoff; } + public: static void yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar, unsigned &utf16charCounter) @@ -95,7 +98,12 @@ private: void scanRawStringLiteral(Token *tok, unsigned char hint = 0); void scanCharLiteral(Token *tok, unsigned char hint = 0); void scanUntilQuote(Token *tok, unsigned char quote); + bool scanDigitSequence(); + bool scanExponentPart(); + void scanOptionalFloatingSuffix(); + void scanOptionalIntegerSuffix(bool allowU = true); void scanNumericLiteral(Token *tok); + void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped); void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0); void scanBackslash(Kind type); void scanCppComment(Kind type); @@ -115,6 +123,7 @@ private: unsigned _scanCommentTokens: 1; unsigned _scanKeywords: 1; unsigned _scanAngleStringLiteralTokens: 1; + unsigned _ppMode: 1; }; struct State { diff --git a/src/libs/cplusplus/SimpleLexer.cpp b/src/libs/cplusplus/SimpleLexer.cpp index db76bb4c09..f379e5c91b 100644 --- a/src/libs/cplusplus/SimpleLexer.cpp +++ b/src/libs/cplusplus/SimpleLexer.cpp @@ -41,7 +41,8 @@ using namespace CPlusPlus; SimpleLexer::SimpleLexer() : _lastState(0), _skipComments(false), - _endedJoined(false) + _endedJoined(false), + _ppMode(false) {} SimpleLexer::~SimpleLexer() @@ -73,6 +74,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state) Lexer lex(firstChar, lastChar); lex.setLanguageFeatures(_languageFeatures); lex.setStartWithNewline(true); + lex.setPreprocessorMode(_ppMode); if (! _skipComments) lex.setScanCommentTokens(true); diff --git a/src/libs/cplusplus/SimpleLexer.h b/src/libs/cplusplus/SimpleLexer.h index d4cba997e0..cc3a576928 100644 --- a/src/libs/cplusplus/SimpleLexer.h +++ b/src/libs/cplusplus/SimpleLexer.h @@ -51,6 +51,9 @@ public: bool skipComments() const; void setSkipComments(bool skipComments); + void setPreprocessorMode(bool ppMode) + { _ppMode = ppMode; } + LanguageFeatures languageFeatures() const { return _languageFeatures; } void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; } @@ -74,6 +77,7 @@ private: LanguageFeatures _languageFeatures; bool _skipComments: 1; bool _endedJoined: 1; + bool _ppMode: 1; }; } // namespace CPlusPlus diff --git a/src/libs/cplusplus/pp-engine.cpp b/src/libs/cplusplus/pp-engine.cpp index c4ef04dc53..306af0b0de 100644 --- a/src/libs/cplusplus/pp-engine.cpp +++ b/src/libs/cplusplus/pp-engine.cpp @@ -401,6 +401,9 @@ protected: const char *end = spell + len; char *vend = const_cast<char *>(end); _value.set_long(strtol(spell, &vend, 0)); + // TODO: if (vend != end) error(NaN) + // TODO: binary literals + // TODO: float literals ++(*_lex); } else if (isTokenDefined()) { ++(*_lex); @@ -1388,6 +1391,7 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source, m_state.m_lexer = new Lexer(source.constBegin(), source.constEnd()); m_state.m_lexer->setScanKeywords(false); m_state.m_lexer->setScanAngleStringLiteralTokens(false); + m_state.m_lexer->setPreprocessorMode(true); if (m_keepComments) m_state.m_lexer->setScanCommentTokens(true); m_state.m_result = result; @@ -1803,6 +1807,7 @@ const PPToken Preprocessor::evalExpression(PPToken *tk, Value &result) PPToken lastConditionToken; const QByteArray expanded = expand(tk, &lastConditionToken); Lexer lexer(expanded.constData(), expanded.constData() + expanded.size()); + lexer.setPreprocessorMode(true); std::vector<Token> buf; Token t; do { diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp index ab72843443..2d93a6addd 100644 --- a/tests/auto/cplusplus/lexer/tst_lexer.cpp +++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp @@ -70,6 +70,10 @@ private slots: void basic_data(); void incremental(); void incremental_data(); + void literals(); + void literals_data(); + void preprocessor(); + void preprocessor_data(); void bytes_and_utf16chars(); void bytes_and_utf16chars_data(); @@ -82,7 +86,8 @@ private: void run(const QByteArray &source, const Tokens &expectedTokens, bool preserveState, - TokenCompareFlags compareFlags); + TokenCompareFlags compareFlags, + bool preprocessorMode = false); int _state; }; @@ -103,11 +108,13 @@ Tokens tst_SimpleLexer::toTokens(const TokenKindList &tokenKinds) void tst_SimpleLexer::run(const QByteArray &source, const Tokens &expectedTokens, bool preserveState, - TokenCompareFlags compareFlags) + TokenCompareFlags compareFlags, + bool preprocessorMode) { QVERIFY(compareFlags); SimpleLexer lexer; + lexer.setPreprocessorMode(preprocessorMode); const Tokens tokens = lexer(source, preserveState ? _state : 0); if (preserveState) _state = lexer.state(); @@ -140,7 +147,10 @@ void tst_SimpleLexer::run(const QByteArray &source, if (compareFlags & CompareUtf16CharsEnd) QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd()); } - QVERIFY2(i == expectedTokens.size(), "Less tokens than expected."); + + QString msg = QLatin1String("Less tokens than expected: got %1, expected %2."); + msg = msg.arg(i).arg(expectedTokens.size()); + QVERIFY2(i == expectedTokens.size(), msg.toUtf8().constData()); } void tst_SimpleLexer::basic() @@ -254,6 +264,97 @@ void tst_SimpleLexer::basic_data() QTest::newRow(source) << source << expectedTokenKindList; } +void tst_SimpleLexer::literals() +{ + QFETCH(QByteArray, source); + QFETCH(TokenKindList, expectedTokenKindList); + + run(source, toTokens(expectedTokenKindList), false, CompareKind); +} + +void tst_SimpleLexer::literals_data() +{ + QTest::addColumn<QByteArray>("source"); + QTest::addColumn<TokenKindList>("expectedTokenKindList"); + + QByteArray source; + TokenKindList expectedTokenKindList; + + source = + "1.\n" + "1.1\n" + "1.23456789\n" + ".1\n" + ".3e8\n" + ".3e8f\n" + "1e1\n" + "1E1\n" + "-1e-1\n" // the first minus sign is a separate token! + "1e-1\n" + "1e+1\n" + "1e1L\n" + "1e1l\n" + "1e1f\n" + "1e1F\n" + "23.45x" + ".45x" + ; + expectedTokenKindList = + TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_MINUS + << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_ERROR << T_ERROR + ; + QTest::newRow("float-literals") << source << expectedTokenKindList; + + source = // these are all the same + "42\n" + "0b101010u\n" + "052ll\n" + "0x2aL\n" + "123FOO\n" + "0xfOo\n" + "33_\n" + ; + expectedTokenKindList = + TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR + ; + QTest::newRow("integer-literals") << source << expectedTokenKindList; +} + +void tst_SimpleLexer::preprocessor() +{ + QFETCH(QByteArray, source); + QFETCH(TokenKindList, expectedTokenKindList); + + run(source, toTokens(expectedTokenKindList), false, CompareKind, true); +} + +void tst_SimpleLexer::preprocessor_data() +{ + QTest::addColumn<QByteArray>("source"); + QTest::addColumn<TokenKindList>("expectedTokenKindList"); + + QByteArray source; + TokenKindList expectedTokenKindList; + + source = // sad but true [2.10] + "1\n" + "1x.\n" + "1.y\n" + ".1_1.1.\n" + "1e-\n" + "01x1b2qWeRtty_Grumble+E-.\n" + ; + expectedTokenKindList = + TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL + << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL; + QTest::newRow("pp-number") << source << expectedTokenKindList; +} + void tst_SimpleLexer::bytes_and_utf16chars() { QFETCH(QByteArray, source); |