summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Verbruggen <erik.verbruggen@digia.com>2014-02-07 15:24:30 +0100
committerNikolai Kosjar <nikolai.kosjar@theqtcompany.com>2014-12-18 15:46:22 +0100
commit242b3f4110ebbe3882e28b7df75c26768c5f9ecc (patch)
treedaab9f47f597ab278d97c8e737df68efc54f523f
parent16becbd29c44e664904a7fb2d698f79fc08b2731 (diff)
downloadqt-creator-242b3f4110ebbe3882e28b7df75c26768c5f9ecc.tar.gz
C++: clean up numeric literal parsing and add support for n3472.
Separate the messy pp-number parsing from the numeric literal parsing. The C/C++ preprocessor makes a grown man cry, but at least we have "proper" literal parsing when we want it, including C++1y binary literals. Next step is digit separators (n3781). Change-Id: Ia069eef454ed5c056f77694a5b8a595d0b76adc4 Reviewed-by: Erik Verbruggen <erik.verbruggen@theqtcompany.com>
-rw-r--r--src/libs/3rdparty/cplusplus/Lexer.cpp167
-rw-r--r--src/libs/3rdparty/cplusplus/Lexer.h9
-rw-r--r--src/libs/cplusplus/SimpleLexer.cpp4
-rw-r--r--src/libs/cplusplus/SimpleLexer.h4
-rw-r--r--src/libs/cplusplus/pp-engine.cpp5
-rw-r--r--tests/auto/cplusplus/lexer/tst_lexer.cpp107
6 files changed, 269 insertions, 27 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp
index 93e47045f3..3fb9a66bd2 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.cpp
+++ b/src/libs/3rdparty/cplusplus/Lexer.cpp
@@ -305,24 +305,27 @@ void Lexer::scan_helper(Token *tok)
tok->f.kind = T_ERROR;
}
} else if (std::isdigit(_yychar)) {
+ if (f._ppMode) {
+ scanPreprocessorNumber(tok, true);
+ break;
+ }
+
const char *yytext = _currentChar - 2;
- do {
- if (_yychar == 'e' || _yychar == 'E') {
- yyinp();
- if (_yychar == '-' || _yychar == '+') {
- yyinp();
- // ### CPP_CHECK(std::isdigit(_yychar));
- }
- } else if (std::isalnum(_yychar) || _yychar == '.') {
+ yyinp();
+ scanDigitSequence(); // this is optional: we already skipped over the first digit
+ scanExponentPart();
+ scanOptionalFloatingSuffix();
+ if (std::isalnum(_yychar) || _yychar == '_') {
+ do {
yyinp();
- } else {
- break;
- }
- } while (_yychar);
- int yylen = _currentChar - yytext;
- tok->f.kind = T_NUMERIC_LITERAL;
- if (control())
- tok->number = control()->numericLiteral(yytext, yylen);
+ } while (std::isalnum(_yychar) || _yychar == '_');
+ tok->f.kind = T_ERROR;
+ } else {
+ int yylen = _currentChar - yytext;
+ tok->f.kind = T_NUMERIC_LITERAL;
+ if (control())
+ tok->number = control()->numericLiteral(yytext, yylen);
+ }
} else {
tok->f.kind = T_DOT;
}
@@ -651,7 +654,10 @@ void Lexer::scan_helper(Token *tok)
} else if (std::isalpha(ch) || ch == '_' || ch == '$' || isByteOfMultiByteCodePoint(ch)) {
scanIdentifier(tok, _currentChar - _tokenStart - 1);
} else if (std::isdigit(ch)) {
- scanNumericLiteral(tok);
+ if (f._ppMode)
+ scanPreprocessorNumber(tok, false);
+ else
+ scanNumericLiteral(tok);
} else {
tok->f.kind = T_ERROR;
}
@@ -776,26 +782,141 @@ void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
tok->string = control()->stringLiteral(yytext, yylen);
}
+bool Lexer::scanDigitSequence()
+{
+ if (!std::isdigit(_yychar))
+ return false;
+ yyinp();
+ while (std::isdigit(_yychar))
+ yyinp();
+ return true;
+}
+
+bool Lexer::scanExponentPart()
+{
+ if (_yychar != 'e' && _yychar != 'E')
+ return false;
+ yyinp();
+ if (_yychar == '+' || _yychar == '-')
+ yyinp();
+ return scanDigitSequence();
+}
+
+void Lexer::scanOptionalFloatingSuffix()
+{
+ if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L')
+ yyinp();
+}
+
+void Lexer::scanOptionalIntegerSuffix(bool allowU)
+{
+ switch(_yychar) {
+ case 'u':
+ case 'U':
+ if (allowU) {
+ yyinp();
+ scanOptionalIntegerSuffix(false);
+ }
+ return;
+ case 'l':
+ yyinp();
+ if (_yychar == 'l')
+ yyinp();
+ return;
+ case 'L':
+ yyinp();
+ if (_yychar == 'L')
+ yyinp();
+ return;
+ default:
+ return;
+ }
+}
+
void Lexer::scanNumericLiteral(Token *tok)
{
const char *yytext = _currentChar - 1;
+ if (*yytext == '0' && _yychar) {
+ if (_yychar == 'x' || _yychar == 'X') {
+ yyinp();
+ while (std::isdigit(_yychar) ||
+ (_yychar >= 'a' && _yychar <= 'f') ||
+ (_yychar >= 'A' && _yychar <= 'F')) {
+ yyinp();
+ }
+ scanOptionalIntegerSuffix();
+ goto theEnd;
+ } else if (_yychar == 'b' || _yychar == 'B') { // see n3472
+ yyinp();
+ while (_yychar == '0' || _yychar == '1')
+ yyinp();
+ scanOptionalIntegerSuffix();
+ goto theEnd;
+ } else if (_yychar >= '0' && _yychar <= '7') {
+ do {
+ yyinp();
+ } while (_yychar >= '0' && _yychar <= '7');
+ scanOptionalIntegerSuffix();
+ goto theEnd;
+ }
+ }
+
+ while (_yychar) {
+ if (_yychar == '.') {
+ yyinp();
+ scanDigitSequence(); // this is optional: "1." is a valid floating point number
+ scanExponentPart();
+ scanOptionalFloatingSuffix();
+ break;
+ } else if (_yychar == 'e' || _yychar == 'E') {
+ if (scanExponentPart())
+ scanOptionalFloatingSuffix();
+ break;
+ } else if (std::isdigit(_yychar)) {
+ yyinp();
+ } else {
+ scanOptionalIntegerSuffix();
+ break;
+ }
+ }
+
+theEnd:
+ if (std::isalnum(_yychar) || _yychar == '_') {
+ do {
+ yyinp();
+ } while (std::isalnum(_yychar) || _yychar == '_');
+ tok->f.kind = T_ERROR;
+ } else {
+ int yylen = _currentChar - yytext;
+ tok->f.kind = T_NUMERIC_LITERAL;
+ if (control())
+ tok->number = control()->numericLiteral(yytext, yylen);
+ }
+}
+
+void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
+{
+ const char *yytext = _currentChar - (dotAlreadySkipped ? 2 : 1);
+ if (dotAlreadySkipped &&
+ (!_yychar || (_yychar && !std::isdigit(_yychar)))) {
+ tok->f.kind = T_DOT;
+ return;
+ }
+
while (_yychar) {
if (_yychar == 'e' || _yychar == 'E') {
yyinp();
- if (_yychar == '-' || _yychar == '+') {
+ if (_yychar == '+' || _yychar == '-')
yyinp();
- // ### CPP_CHECK(std::isdigit(_yychar));
- }
- } else if (std::isalnum(_yychar) || _yychar == '.') {
+ } else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
yyinp();
} else {
break;
}
}
- int yylen = _currentChar - yytext;
+ int yylen = _currentChar - yytext;
tok->f.kind = T_NUMERIC_LITERAL;
-
if (control())
tok->number = control()->numericLiteral(yytext, yylen);
}
diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h
index 78396a0e60..0309c69950 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -61,6 +61,9 @@ public:
LanguageFeatures languageFeatures() const { return _languageFeatures; }
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
+ void setPreprocessorMode(bool onoff)
+ { f._ppMode = onoff; }
+
public:
static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
unsigned &utf16charCounter)
@@ -95,7 +98,12 @@ private:
void scanRawStringLiteral(Token *tok, unsigned char hint = 0);
void scanCharLiteral(Token *tok, unsigned char hint = 0);
void scanUntilQuote(Token *tok, unsigned char quote);
+ bool scanDigitSequence();
+ bool scanExponentPart();
+ void scanOptionalFloatingSuffix();
+ void scanOptionalIntegerSuffix(bool allowU = true);
void scanNumericLiteral(Token *tok);
+ void scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped);
void scanIdentifier(Token *tok, unsigned extraProcessedChars = 0);
void scanBackslash(Kind type);
void scanCppComment(Kind type);
@@ -115,6 +123,7 @@ private:
unsigned _scanCommentTokens: 1;
unsigned _scanKeywords: 1;
unsigned _scanAngleStringLiteralTokens: 1;
+ unsigned _ppMode: 1;
};
struct State {
diff --git a/src/libs/cplusplus/SimpleLexer.cpp b/src/libs/cplusplus/SimpleLexer.cpp
index db76bb4c09..f379e5c91b 100644
--- a/src/libs/cplusplus/SimpleLexer.cpp
+++ b/src/libs/cplusplus/SimpleLexer.cpp
@@ -41,7 +41,8 @@ using namespace CPlusPlus;
SimpleLexer::SimpleLexer()
: _lastState(0),
_skipComments(false),
- _endedJoined(false)
+ _endedJoined(false),
+ _ppMode(false)
{}
SimpleLexer::~SimpleLexer()
@@ -73,6 +74,7 @@ Tokens SimpleLexer::operator()(const QString &text, int state)
Lexer lex(firstChar, lastChar);
lex.setLanguageFeatures(_languageFeatures);
lex.setStartWithNewline(true);
+ lex.setPreprocessorMode(_ppMode);
if (! _skipComments)
lex.setScanCommentTokens(true);
diff --git a/src/libs/cplusplus/SimpleLexer.h b/src/libs/cplusplus/SimpleLexer.h
index d4cba997e0..cc3a576928 100644
--- a/src/libs/cplusplus/SimpleLexer.h
+++ b/src/libs/cplusplus/SimpleLexer.h
@@ -51,6 +51,9 @@ public:
bool skipComments() const;
void setSkipComments(bool skipComments);
+ void setPreprocessorMode(bool ppMode)
+ { _ppMode = ppMode; }
+
LanguageFeatures languageFeatures() const { return _languageFeatures; }
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
@@ -74,6 +77,7 @@ private:
LanguageFeatures _languageFeatures;
bool _skipComments: 1;
bool _endedJoined: 1;
+ bool _ppMode: 1;
};
} // namespace CPlusPlus
diff --git a/src/libs/cplusplus/pp-engine.cpp b/src/libs/cplusplus/pp-engine.cpp
index c4ef04dc53..306af0b0de 100644
--- a/src/libs/cplusplus/pp-engine.cpp
+++ b/src/libs/cplusplus/pp-engine.cpp
@@ -401,6 +401,9 @@ protected:
const char *end = spell + len;
char *vend = const_cast<char *>(end);
_value.set_long(strtol(spell, &vend, 0));
+ // TODO: if (vend != end) error(NaN)
+ // TODO: binary literals
+ // TODO: float literals
++(*_lex);
} else if (isTokenDefined()) {
++(*_lex);
@@ -1388,6 +1391,7 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source,
m_state.m_lexer = new Lexer(source.constBegin(), source.constEnd());
m_state.m_lexer->setScanKeywords(false);
m_state.m_lexer->setScanAngleStringLiteralTokens(false);
+ m_state.m_lexer->setPreprocessorMode(true);
if (m_keepComments)
m_state.m_lexer->setScanCommentTokens(true);
m_state.m_result = result;
@@ -1803,6 +1807,7 @@ const PPToken Preprocessor::evalExpression(PPToken *tk, Value &result)
PPToken lastConditionToken;
const QByteArray expanded = expand(tk, &lastConditionToken);
Lexer lexer(expanded.constData(), expanded.constData() + expanded.size());
+ lexer.setPreprocessorMode(true);
std::vector<Token> buf;
Token t;
do {
diff --git a/tests/auto/cplusplus/lexer/tst_lexer.cpp b/tests/auto/cplusplus/lexer/tst_lexer.cpp
index ab72843443..2d93a6addd 100644
--- a/tests/auto/cplusplus/lexer/tst_lexer.cpp
+++ b/tests/auto/cplusplus/lexer/tst_lexer.cpp
@@ -70,6 +70,10 @@ private slots:
void basic_data();
void incremental();
void incremental_data();
+ void literals();
+ void literals_data();
+ void preprocessor();
+ void preprocessor_data();
void bytes_and_utf16chars();
void bytes_and_utf16chars_data();
@@ -82,7 +86,8 @@ private:
void run(const QByteArray &source,
const Tokens &expectedTokens,
bool preserveState,
- TokenCompareFlags compareFlags);
+ TokenCompareFlags compareFlags,
+ bool preprocessorMode = false);
int _state;
};
@@ -103,11 +108,13 @@ Tokens tst_SimpleLexer::toTokens(const TokenKindList &tokenKinds)
void tst_SimpleLexer::run(const QByteArray &source,
const Tokens &expectedTokens,
bool preserveState,
- TokenCompareFlags compareFlags)
+ TokenCompareFlags compareFlags,
+ bool preprocessorMode)
{
QVERIFY(compareFlags);
SimpleLexer lexer;
+ lexer.setPreprocessorMode(preprocessorMode);
const Tokens tokens = lexer(source, preserveState ? _state : 0);
if (preserveState)
_state = lexer.state();
@@ -140,7 +147,10 @@ void tst_SimpleLexer::run(const QByteArray &source,
if (compareFlags & CompareUtf16CharsEnd)
QCOMPARE(token.utf16charsEnd(), expectedToken.utf16charsEnd());
}
- QVERIFY2(i == expectedTokens.size(), "Less tokens than expected.");
+
+ QString msg = QLatin1String("Less tokens than expected: got %1, expected %2.");
+ msg = msg.arg(i).arg(expectedTokens.size());
+ QVERIFY2(i == expectedTokens.size(), msg.toUtf8().constData());
}
void tst_SimpleLexer::basic()
@@ -254,6 +264,97 @@ void tst_SimpleLexer::basic_data()
QTest::newRow(source) << source << expectedTokenKindList;
}
+void tst_SimpleLexer::literals()
+{
+ QFETCH(QByteArray, source);
+ QFETCH(TokenKindList, expectedTokenKindList);
+
+ run(source, toTokens(expectedTokenKindList), false, CompareKind);
+}
+
+void tst_SimpleLexer::literals_data()
+{
+ QTest::addColumn<QByteArray>("source");
+ QTest::addColumn<TokenKindList>("expectedTokenKindList");
+
+ QByteArray source;
+ TokenKindList expectedTokenKindList;
+
+ source =
+ "1.\n"
+ "1.1\n"
+ "1.23456789\n"
+ ".1\n"
+ ".3e8\n"
+ ".3e8f\n"
+ "1e1\n"
+ "1E1\n"
+ "-1e-1\n" // the first minus sign is a separate token!
+ "1e-1\n"
+ "1e+1\n"
+ "1e1L\n"
+ "1e1l\n"
+ "1e1f\n"
+ "1e1F\n"
+ "23.45x"
+ ".45x"
+ ;
+ expectedTokenKindList =
+ TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_MINUS
+ << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_ERROR << T_ERROR
+ ;
+ QTest::newRow("float-literals") << source << expectedTokenKindList;
+
+ source = // these are all the same
+ "42\n"
+ "0b101010u\n"
+ "052ll\n"
+ "0x2aL\n"
+ "123FOO\n"
+ "0xfOo\n"
+ "33_\n"
+ ;
+ expectedTokenKindList =
+ TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_ERROR << T_ERROR << T_ERROR
+ ;
+ QTest::newRow("integer-literals") << source << expectedTokenKindList;
+}
+
+void tst_SimpleLexer::preprocessor()
+{
+ QFETCH(QByteArray, source);
+ QFETCH(TokenKindList, expectedTokenKindList);
+
+ run(source, toTokens(expectedTokenKindList), false, CompareKind, true);
+}
+
+void tst_SimpleLexer::preprocessor_data()
+{
+ QTest::addColumn<QByteArray>("source");
+ QTest::addColumn<TokenKindList>("expectedTokenKindList");
+
+ QByteArray source;
+ TokenKindList expectedTokenKindList;
+
+ source = // sad but true [2.10]
+ "1\n"
+ "1x.\n"
+ "1.y\n"
+ ".1_1.1.\n"
+ "1e-\n"
+ "01x1b2qWeRtty_Grumble+E-.\n"
+ ;
+ expectedTokenKindList =
+ TokenKindList() << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL
+ << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL << T_NUMERIC_LITERAL;
+ QTest::newRow("pp-number") << source << expectedTokenKindList;
+}
+
void tst_SimpleLexer::bytes_and_utf16chars()
{
QFETCH(QByteArray, source);