diff options
author | Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com> | 2015-10-21 15:51:54 +0200 |
---|---|---|
committer | Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com> | 2015-10-23 10:47:25 +0000 |
commit | e15ca59111fe05f5ecc0f7fb48c34b5b22115a12 (patch) | |
tree | 1f66c3191f4745dea9bf126b8c22a4d8c5a90c58 /src/qdoc/tokenizer.cpp | |
parent | 4e2d1893a89ce3dcfaacea091acae1f673c14a9e (diff) | |
download | qttools-e15ca59111fe05f5ecc0f7fb48c34b5b22115a12.tar.gz |
move qdoc back to qttools
we can do that now, as the bootstrap lib is now a properly exported
module, and qmldevtools is now bootstrapped as well.
this removes the abomination of a copy of the qml parser in qtbase.
unfortunately qtbase/2422251ee5025a067b14b989153764ab36e43f10 is
reverted, as qtdeclarative is still missing the respective change.
this introduces no regression in discoverability or usability, as a full
doc build already needed qttools - for qhelpgenerator.
Change-Id: Ic9c4c9732ddf5998637b9e42e27939ba50b31479
Reviewed-by: Jędrzej Nowacki <jedrzej.nowacki@theqtcompany.com>
Reviewed-by: Martin Smith <martin.smith@digia.com>
Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
Reviewed-by: Topi Reiniö <topi.reinio@digia.com>
Diffstat (limited to 'src/qdoc/tokenizer.cpp')
-rw-r--r-- | src/qdoc/tokenizer.cpp | 799 |
1 files changed, 799 insertions, 0 deletions
diff --git a/src/qdoc/tokenizer.cpp b/src/qdoc/tokenizer.cpp new file mode 100644 index 000000000..987fff548 --- /dev/null +++ b/src/qdoc/tokenizer.cpp @@ -0,0 +1,799 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the tools applications of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "config.h" +#include "tokenizer.h" +#include "generator.h" + +#include <qfile.h> +#include <qhash.h> +#include <qregexp.h> +#include <qstring.h> +#include <qtextcodec.h> + +#include <ctype.h> +#include <string.h> + +QT_BEGIN_NAMESPACE + +#define LANGUAGE_CPP "Cpp" + +/* qmake ignore Q_OBJECT */ + +/* + Keep in sync with tokenizer.h. +*/ +static const char *kwords[] = { + "char", "class", "const", "double", "enum", "explicit", + "friend", "inline", "int", "long", "namespace", "operator", + "private", "protected", "public", "short", "signals", "signed", + "slots", "static", "struct", "template", "typedef", "typename", + "union", "unsigned", "using", "virtual", "void", "volatile", + "__int64", + "Q_OBJECT", + "Q_OVERRIDE", + "Q_PROPERTY", + "Q_PRIVATE_PROPERTY", + "Q_DECLARE_SEQUENTIAL_ITERATOR", + "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR", + "Q_DECLARE_ASSOCIATIVE_ITERATOR", + "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR", + "Q_DECLARE_FLAGS", + "Q_SIGNALS", + "Q_SLOTS", + "QT_COMPAT", + "QT_COMPAT_CONSTRUCTOR", + "QT_DEPRECATED", + "QT_MOC_COMPAT", + "QT_MODULE", + "QT3_SUPPORT", + "QT3_SUPPORT_CONSTRUCTOR", + "QT3_MOC_SUPPORT", + "QDOC_PROPERTY", + "QPrivateSignal" +}; + +static const int KwordHashTableSize = 4096; +static int kwordHashTable[KwordHashTableSize]; + +static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0; + +static QRegExp *comment = 0; +static QRegExp *versionX = 0; +static QRegExp *definedX = 0; + +static QRegExp *defines = 0; +static QRegExp *falsehoods = 0; + +#ifndef QT_NO_TEXTCODEC +static QTextCodec *sourceCodec = 0; +#endif + +/* + This function is a perfect hash function for the 37 keywords of C99 + (with a hash table size of 512). It should perform well on our + Qt-enhanced C++ subset. +*/ +static int hashKword(const char *s, int len) +{ + return (((uchar) s[0]) + (((uchar) s[2]) << 5) + + (((uchar) s[len - 1]) << 3)) % KwordHashTableSize; +} + +static void insertKwordIntoHash(const char *s, int number) +{ + int k = hashKword(s, int(strlen(s))); + while (kwordHashTable[k]) { + if (++k == KwordHashTableSize) + k = 0; + } + kwordHashTable[k] = number; +} + +Tokenizer::Tokenizer(const Location& loc, QFile &in) +{ + init(); + yyIn = in.readAll(); + yyPos = 0; + start(loc); +} + +Tokenizer::Tokenizer(const Location& loc, const QByteArray &in) + : yyIn(in) +{ + init(); + yyPos = 0; + start(loc); +} + +Tokenizer::~Tokenizer() +{ + delete[] yyLexBuf1; + delete[] yyLexBuf2; +} + +int Tokenizer::getToken() +{ + char *t = yyPrevLex; + yyPrevLex = yyLex; + yyLex = t; + + while (yyCh != EOF) { + yyTokLoc = yyCurLoc; + yyLexLen = 0; + + if (isspace(yyCh)) { + do { + yyCh = getChar(); + } while (isspace(yyCh)); + } + else if (isalpha(yyCh) || yyCh == '_') { + do { + yyCh = getChar(); + } while (isalnum(yyCh) || yyCh == '_'); + + int k = hashKword(yyLex, int(yyLexLen)); + for (;;) { + int i = kwordHashTable[k]; + if (i == 0) { + return Tok_Ident; + } + else if (i == -1) { + if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) { + if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive + int parenDepth = 0; + while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) { + if (yyCh == '(') + ++parenDepth; + else if (yyCh == ')') + --parenDepth; + yyCh = getChar(); + } + if (yyCh == ')') + yyCh = getChar(); + } + break; + } + } + else if (strcmp(yyLex, kwords[i - 1]) == 0) { + int ret = (int) Tok_FirstKeyword + i - 1; + if (ret != Tok_typename) + return ret; + break; + } + + if (++k == KwordHashTableSize) + k = 0; + } + } + else if (isdigit(yyCh)) { + do { + yyCh = getChar(); + } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || + yyCh == '-'); + return Tok_Number; + } + else { + switch (yyCh) { + case '!': + case '%': + yyCh = getChar(); + if (yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + case '"': + yyCh = getChar(); + + while (yyCh != EOF && yyCh != '"') { + if (yyCh == '\\') + yyCh = getChar(); + yyCh = getChar(); + } + yyCh = getChar(); + + if (yyCh == EOF) + yyTokLoc.warning(tr("Unterminated C++ string literal"), + tr("Maybe you forgot '/*!' at the beginning of the file?")); + else + return Tok_String; + break; + case '#': + return getTokenAfterPreprocessor(); + case '&': + yyCh = getChar(); + /* + Removed check for '&&', only interpret '&=' as an operator. + '&&' is also used for an rvalue reference. QTBUG-32675 + */ + if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } + else { + return Tok_Ampersand; + } + case '\'': + yyCh = getChar(); + /* + Allow empty character literal. QTBUG-25775 + */ + if (yyCh == '\'') { + yyCh = getChar(); + break; + } + if (yyCh == '\\') + yyCh = getChar(); + do { + yyCh = getChar(); + } while (yyCh != EOF && yyCh != '\''); + + if (yyCh == EOF) { + yyTokLoc.warning(tr("Unterminated C++ character literal")); + } + else { + yyCh = getChar(); + return Tok_Number; + } + break; + case '(': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyParenDepth++; + if (isspace(yyCh)) { + do { + yyCh = getChar(); + } while (isspace(yyCh)); + yyLexLen = 1; + yyLex[1] = '\0'; + } + if (yyCh == '*') { + yyCh = getChar(); + return Tok_LeftParenAster; + } + return Tok_LeftParen; + case ')': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyParenDepth--; + return Tok_RightParen; + case '*': + yyCh = getChar(); + if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } else { + return Tok_Aster; + } + case '^': + yyCh = getChar(); + if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } else { + return Tok_Caret; + } + case '+': + yyCh = getChar(); + if (yyCh == '+' || yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + case ',': + yyCh = getChar(); + return Tok_Comma; + case '-': + yyCh = getChar(); + if (yyCh == '-' || yyCh == '=') { + yyCh = getChar(); + } else if (yyCh == '>') { + yyCh = getChar(); + if (yyCh == '*') + yyCh = getChar(); + } + return Tok_SomeOperator; + case '.': + yyCh = getChar(); + if (yyCh == '*') { + yyCh = getChar(); + } else if (yyCh == '.') { + do { + yyCh = getChar(); + } while (yyCh == '.'); + return Tok_Ellipsis; + } else if (isdigit(yyCh)) { + do { + yyCh = getChar(); + } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' || + yyCh == '-'); + return Tok_Number; + } + return Tok_SomeOperator; + case '/': + yyCh = getChar(); + if (yyCh == '/') { + do { + yyCh = getChar(); + } while (yyCh != EOF && yyCh != '\n'); + } else if (yyCh == '*') { + bool metDoc = false; // empty doc is no doc + bool metSlashAsterBang = false; + bool metAster = false; + bool metAsterSlash = false; + + yyCh = getChar(); + if (yyCh == '!') + metSlashAsterBang = true; + + while (!metAsterSlash) { + if (yyCh == EOF) { + yyTokLoc.warning(tr("Unterminated C++ comment")); + break; + } else { + if (yyCh == '*') { + metAster = true; + } else if (metAster && yyCh == '/') { + metAsterSlash = true; + } else { + metAster = false; + if (isgraph(yyCh)) + metDoc = true; + } + } + yyCh = getChar(); + } + if (metSlashAsterBang && metDoc) + return Tok_Doc; + else if (yyParenDepth > 0) + return Tok_Comment; + } else { + if (yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + } + break; + case ':': + yyCh = getChar(); + if (yyCh == ':') { + yyCh = getChar(); + return Tok_Gulbrandsen; + } else { + return Tok_Colon; + } + case ';': + yyCh = getChar(); + return Tok_Semicolon; + case '<': + yyCh = getChar(); + if (yyCh == '<') { + yyCh = getChar(); + if (yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + } else if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } else { + return Tok_LeftAngle; + } + case '=': + yyCh = getChar(); + if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } else { + return Tok_Equal; + } + case '>': + yyCh = getChar(); + if (yyCh == '>') { + yyCh = getChar(); + if (yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + } else if (yyCh == '=') { + yyCh = getChar(); + return Tok_SomeOperator; + } else { + return Tok_RightAngle; + } + case '?': + yyCh = getChar(); + return Tok_SomeOperator; + case '[': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyBracketDepth++; + return Tok_LeftBracket; + case '\\': + yyCh = getChar(); + yyCh = getChar(); // skip one character + break; + case ']': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyBracketDepth--; + return Tok_RightBracket; + case '{': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyBraceDepth++; + return Tok_LeftBrace; + case '}': + yyCh = getChar(); + if (yyNumPreprocessorSkipping == 0) + yyBraceDepth--; + return Tok_RightBrace; + case '|': + yyCh = getChar(); + if (yyCh == '|' || yyCh == '=') + yyCh = getChar(); + return Tok_SomeOperator; + case '~': + yyCh = getChar(); + return Tok_Tilde; + case '@': + yyCh = getChar(); + return Tok_At; + default: + // ### We should really prevent qdoc from looking at snippet files rather than + // ### suppress warnings when reading them. + if (yyNumPreprocessorSkipping == 0 && !(yyTokLoc.fileName().endsWith(".qdoc") || + yyTokLoc.fileName().endsWith(".js"))) { + yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source") + .arg((uchar)yyCh, 1, 16)); + } + yyCh = getChar(); + } + } + } + + if (yyPreprocessorSkipping.count() > 1) { + yyTokLoc.warning(tr("Expected #endif before end of file")); + // clear it out or we get an infinite loop! + while (!yyPreprocessorSkipping.isEmpty()) { + popSkipping(); + } + } + + strcpy(yyLex, "end-of-input"); + yyLexLen = strlen(yyLex); + return Tok_Eoi; +} + +void Tokenizer::initialize(const Config &config) +{ + QString versionSym = config.getString(CONFIG_VERSIONSYM); + + QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING); + if (sourceEncoding.isEmpty()) + sourceEncoding = QLatin1String("ISO-8859-1"); +#ifndef QT_NO_TEXTCODEC + sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit()); +#endif + + comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)"); + comment->setMinimal(true); + versionX = new QRegExp("$cannot possibly match^"); + if (!versionSym.isEmpty()) + versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym) + + ")[ \t]+\"([^\"]*)\"[ \t]*"); + definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)"); + + QStringList d = config.getStringList(CONFIG_DEFINES); + d += "qdoc"; + defines = new QRegExp(d.join('|')); + falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join('|')); + + /* + The keyword hash table is always cleared before any words are inserted. + */ + memset(kwordHashTable, 0, sizeof(kwordHashTable)); + for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++) + insertKwordIntoHash(kwords[i], i + 1); + + ignoredTokensAndDirectives = new QHash<QByteArray, bool>; + + QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS); + foreach (const QString &t, tokens) { + const QByteArray tb = t.toLatin1(); + ignoredTokensAndDirectives->insert(tb, false); + insertKwordIntoHash(tb.data(), -1); + } + + QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot + + CONFIG_IGNOREDIRECTIVES); + foreach (const QString &d, directives) { + const QByteArray db = d.toLatin1(); + ignoredTokensAndDirectives->insert(db, true); + insertKwordIntoHash(db.data(), -1); + } +} + +/*! + The heap allocated variables are freed here. The keyword + hash table is not cleared here, but it is cleared in the + initialize() function, before any keywords are inserted. + */ +void Tokenizer::terminate() +{ + delete comment; + comment = 0; + delete versionX; + versionX = 0; + delete definedX; + definedX = 0; + delete defines; + defines = 0; + delete falsehoods; + falsehoods = 0; + delete ignoredTokensAndDirectives; + ignoredTokensAndDirectives = 0; +} + +void Tokenizer::init() +{ + yyLexBuf1 = new char[(int) yyLexBufSize]; + yyLexBuf2 = new char[(int) yyLexBufSize]; + yyPrevLex = yyLexBuf1; + yyPrevLex[0] = '\0'; + yyLex = yyLexBuf2; + yyLex[0] = '\0'; + yyLexLen = 0; + yyPreprocessorSkipping.push(false); + yyNumPreprocessorSkipping = 0; + yyBraceDepth = 0; + yyParenDepth = 0; + yyBracketDepth = 0; + yyCh = '\0'; + parsingMacro = false; +} + +void Tokenizer::start(const Location& loc) +{ + yyTokLoc = loc; + yyCurLoc = loc; + yyCurLoc.start(); + strcpy(yyPrevLex, "beginning-of-input"); + strcpy(yyLex, "beginning-of-input"); + yyLexLen = strlen(yyLex); + yyBraceDepth = 0; + yyParenDepth = 0; + yyBracketDepth = 0; + yyCh = '\0'; + yyCh = getChar(); +} + +/* + Returns the next token, if # was met. This function interprets the + preprocessor directive, skips over any #ifdef'd out tokens, and returns the + token after all of that. +*/ +int Tokenizer::getTokenAfterPreprocessor() +{ + yyCh = getChar(); + while (isspace(yyCh) && yyCh != '\n') + yyCh = getChar(); + + /* + #directive condition + */ + QString directive; + QString condition; + + while (isalpha(yyCh)) { + directive += QChar(yyCh); + yyCh = getChar(); + } + if (!directive.isEmpty()) { + while (yyCh != EOF && yyCh != '\n') { + if (yyCh == '\\') { + yyCh = getChar(); + if (yyCh == '\r') + yyCh = getChar(); + } + condition += yyCh; + yyCh = getChar(); + } + condition.remove(*comment); + condition = condition.simplified(); + + /* + The #if, #ifdef, #ifndef, #elif, #else, and #endif + directives have an effect on the skipping stack. For + instance, if the code processed so far is + + #if 1 + #if 0 + #if 1 + // ... + #else + + the skipping stack contains, from bottom to top, false true + true (assuming 0 is false and 1 is true). If at least one + entry of the stack is true, the tokens are skipped. + + This mechanism is simple yet hard to understand. + */ + if (directive[0] == QChar('i')) { + if (directive == QString("if")) + pushSkipping(!isTrue(condition)); + else if (directive == QString("ifdef")) + pushSkipping(!defines->exactMatch(condition)); + else if (directive == QString("ifndef")) + pushSkipping(defines->exactMatch(condition)); + } else if (directive[0] == QChar('e')) { + if (directive == QString("elif")) { + bool old = popSkipping(); + if (old) + pushSkipping(!isTrue(condition)); + else + pushSkipping(true); + } else if (directive == QString("else")) { + pushSkipping(!popSkipping()); + } else if (directive == QString("endif")) { + popSkipping(); + } + } else if (directive == QString("define")) { + if (versionX->exactMatch(condition)) + yyVersion = versionX->cap(1); + } + } + + int tok; + do { + /* + We set yyLex now, and after getToken() this will be + yyPrevLex. This way, we skip over the preprocessor + directive. + */ + qstrcpy(yyLex, yyPrevLex); + + /* + If getToken() meets another #, it will call + getTokenAfterPreprocessor() once again, which could in turn + call getToken() again, etc. Unless there are 10,000 or so + preprocessor directives in a row, this shouldn't overflow + the stack. + */ + tok = getToken(); + } while (yyNumPreprocessorSkipping > 0 && tok != Tok_Eoi); + return tok; +} + +/* + Pushes a new skipping value onto the stack. This corresponds to entering a + new #if block. +*/ +void Tokenizer::pushSkipping(bool skip) +{ + yyPreprocessorSkipping.push(skip); + if (skip) + yyNumPreprocessorSkipping++; +} + +/* + Pops a skipping value from the stack. This corresponds to reaching a #endif. +*/ +bool Tokenizer::popSkipping() +{ + if (yyPreprocessorSkipping.isEmpty()) { + yyTokLoc.warning(tr("Unexpected #elif, #else or #endif")); + return true; + } + + bool skip = yyPreprocessorSkipping.pop(); + if (skip) + yyNumPreprocessorSkipping--; + return skip; +} + +/* + Returns \c true if the condition evaluates as true, otherwise false. The + condition is represented by a string. Unsophisticated parsing techniques are + used. The preprocessing method could be named StriNg-Oriented PreProcessing, + as SNOBOL stands for StriNg-Oriented symBOlic Language. +*/ +bool Tokenizer::isTrue(const QString &condition) +{ + int firstOr = -1; + int firstAnd = -1; + int parenDepth = 0; + + /* + Find the first logical operator at top level, but be careful + about precedence. Examples: + + X || Y // the or + X || Y || Z // the leftmost or + X || Y && Z // the or + X && Y || Z // the or + (X || Y) && Z // the and + */ + for (int i = 0; i < (int) condition.length() - 1; i++) { + QChar ch = condition[i]; + if (ch == QChar('(')) { + parenDepth++; + } else if (ch == QChar(')')) { + parenDepth--; + } else if (parenDepth == 0) { + if (condition[i + 1] == ch) { + if (ch == QChar('|')) { + firstOr = i; + break; + } else if (ch == QChar('&')) { + if (firstAnd == -1) + firstAnd = i; + } + } + } + } + if (firstOr != -1) + return isTrue(condition.left(firstOr)) || + isTrue(condition.mid(firstOr + 2)); + if (firstAnd != -1) + return isTrue(condition.left(firstAnd)) && + isTrue(condition.mid(firstAnd + 2)); + + QString t = condition.simplified(); + if (t.isEmpty()) + return true; + + if (t[0] == QChar('!')) + return !isTrue(t.mid(1)); + if (t[0] == QChar('(') && t.endsWith(QChar(')'))) + return isTrue(t.mid(1, t.length() - 2)); + + if (definedX->exactMatch(t)) + return defines->exactMatch(definedX->cap(1)); + else + return !falsehoods->exactMatch(t); +} + +QString Tokenizer::lexeme() const +{ +#ifndef QT_NO_TEXTCODEC + return sourceCodec->toUnicode(yyLex); +#else + return QString::fromUtf8(yyLex); +#endif +} + +QString Tokenizer::previousLexeme() const +{ +#ifndef QT_NO_TEXTCODEC + return sourceCodec->toUnicode(yyPrevLex); +#else + return QString::fromUtf8(yyPrevLex); +#endif +} + +QT_END_NAMESPACE |