summaryrefslogtreecommitdiff
path: root/src/xmlpatterns/parser/qxquerytokenizer_p.h
diff options
context:
space:
mode:
authorQt by Nokia <qt-info@nokia.com>2011-04-27 12:05:43 +0200
committeraxis <qt-info@nokia.com>2011-04-27 12:05:43 +0200
commite1b2c9deb5943faae2b29be6a5c006f75bb73f06 (patch)
treefc79e45367c0a8fc71185e9afc33f7503a58653c /src/xmlpatterns/parser/qxquerytokenizer_p.h
downloadqtxmlpatterns-e1b2c9deb5943faae2b29be6a5c006f75bb73f06.tar.gz
Initial import from the monolithic Qt.
This is the beginning of revision history for this module. If you want to look at revision history older than this, please refer to the Qt Git wiki for how to use Git history grafting. At the time of writing, this wiki is located here: http://qt.gitorious.org/qt/pages/GitIntroductionWithQt If you have already performed the grafting and you don't see any history beyond this commit, try running "git log" with the "--follow" argument. Branched from the monolithic repo, Qt master branch, at commit 896db169ea224deb96c59ce8af800d019de63f12
Diffstat (limited to 'src/xmlpatterns/parser/qxquerytokenizer_p.h')
-rw-r--r--src/xmlpatterns/parser/qxquerytokenizer_p.h332
1 files changed, 332 insertions, 0 deletions
diff --git a/src/xmlpatterns/parser/qxquerytokenizer_p.h b/src/xmlpatterns/parser/qxquerytokenizer_p.h
new file mode 100644
index 0000000..36611ef
--- /dev/null
+++ b/src/xmlpatterns/parser/qxquerytokenizer_p.h
@@ -0,0 +1,332 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtXmlPatterns module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+#ifndef Patternist_XQueryTokenizer_H
+#define Patternist_XQueryTokenizer_H
+
+#include <QHash>
+#include <QSet>
+#include <QStack>
+#include <QString>
+#include <QUrl>
+
+#include "qtokenizer_p.h"
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+namespace QPatternist
+{
+ struct TokenMap;
+
+ /**
+ * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
+ * and delivers tokens to the Bison generated parser.
+ *
+ * @author Frans Englich <frans.englich@nokia.com>
+ */
+ class XQueryTokenizer : public Tokenizer
+ {
+ public:
+ /**
+ * Tokenizer states. Organized alphabetically.
+ */
+ enum State
+ {
+ AfterAxisSeparator,
+ AposAttributeContent,
+ Axis,
+ Default,
+ ElementContent,
+ EndTag,
+ ItemType,
+ KindTest,
+ KindTestForPI,
+ NamespaceDecl,
+ NamespaceKeyword,
+ OccurrenceIndicator,
+ Operator,
+ Pragma,
+ PragmaContent,
+ ProcessingInstructionContent,
+ ProcessingInstructionName,
+ QuotAttributeContent,
+ StartTag,
+ VarName,
+ XMLComment,
+ XMLSpaceDecl,
+ XQueryVersion
+ };
+
+ XQueryTokenizer(const QString &query,
+ const QUrl &location,
+ const State startingState = Default);
+
+ virtual Token nextToken(YYLTYPE *const sourceLocator);
+ virtual int commenceScanOnly();
+ virtual void resumeTokenizationFrom(const int position);
+
+ /**
+ * Does nothing.
+ */
+ virtual void setParserContext(const ParserContext::Ptr &parseInfo);
+
+ private:
+
+ /**
+ * Returns the character corresponding to the builtin reference @p
+ * reference. For instance, passing @c gt will give you '>' in return.
+ *
+ * If @p reference is an invalid character reference, a null QChar is
+ * returned.
+ *
+ * @see QChar::isNull()
+ */
+ QChar charForReference(const QString &reference);
+
+ inline Token tokenAndChangeState(const TokenType code,
+ const State state,
+ const int advance = 1);
+ inline Token tokenAndChangeState(const TokenType code,
+ const QString &value,
+ const State state);
+ inline Token tokenAndAdvance(const TokenType code,
+ const int advance = 1);
+ QString tokenizeCharacterReference();
+
+ inline Token tokenizeStringLiteral();
+ inline Token tokenizeNumberLiteral();
+
+ /**
+ * @returns the character @p length characters from the current
+ * position.
+ */
+ inline char peekAhead(const int length = 1) const;
+
+ /**
+ * @returns whether the stream, starting from @p offset from the
+ * current position, matches @p chs. The length of @p chs is @p len.
+ */
+ inline bool aheadEquals(const char *const chs,
+ const int len,
+ const int offset = 1) const;
+
+ inline Token tokenizeNCName();
+ static inline bool isOperatorKeyword(const TokenType);
+
+ static inline bool isDigit(const char ch);
+ static inline Token error();
+ inline TokenType consumeWhitespace();
+
+ /**
+ * @short Returns the character at the current position, converted to
+ * @c ASCII.
+ *
+ * Equivalent to calling:
+ *
+ * @code
+ * current().toAscii();
+ * @endcode
+ */
+ inline char peekCurrent() const;
+
+ /**
+ * Disregarding encoding conversion, equivalent to calling:
+ *
+ * @code
+ * peekAhead(0);
+ * @endcode
+ */
+ inline const QChar current() const;
+
+ /**
+ * @p hadWhitespace is always set to a proper value.
+ *
+ * @returns the length of whitespace scanned before reaching "::", or
+ * -1 if something else was found.
+ */
+ int peekForColonColon() const;
+
+ static inline bool isNCNameStart(const QChar ch);
+ static inline bool isNCNameBody(const QChar ch);
+ static inline const TokenMap *lookupKeyword(const QString &keyword);
+ inline void popState();
+ inline void pushState(const State state);
+ inline State state() const;
+ inline void setState(const State s);
+ static bool isTypeToken(const TokenType t);
+
+ inline Token tokenizeNCNameOrQName();
+ /**
+ * Advances m_pos until content is encountered.
+ *
+ * Returned is the length stretching from m_pos when starting, until
+ * @p content is encountered. @p content is not included in the length.
+ */
+ int scanUntil(const char *const content);
+
+ /**
+ * Same as calling:
+ * @code
+ * pushState(currentState());
+ * @endcode
+ */
+ inline void pushState();
+
+ /**
+ * Consumes only whitespace, in the traditional sense. The function exits
+ * if non-whitespace is encountered, such as the start of a comment.
+ *
+ * @returns @c true if the end was reached, otherwise @c false
+ */
+ inline bool consumeRawWhitespace();
+
+ /**
+ * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
+ * parsing nested comments.
+ *
+ * It is assumed that the start token for the comment, "(:", has
+ * already been parsed.
+ *
+ * Typically, don't call this function, but ignoreWhitespace().
+ *
+ * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
+ * 2.0, 2.6 Comments</a>
+ * @returns
+ * - SUCCESS if everything went ok
+ * - ERROR if there was an error in parsing one or more comments
+ * - END_OF_FILE if the end was reached
+ */
+ Tokenizer::TokenType consumeComment();
+
+ /**
+ * Determines whether @p code is a keyword
+ * that is followed by a second keyword. For instance <tt>declare
+ * function</tt>.
+ */
+ static inline bool isPhraseKeyword(const TokenType code);
+
+ /**
+ * A set of indexes into a QString, the one being passed to
+ * normalizeEOL() whose characters shouldn't be normalized. */
+ typedef QSet<int> CharacterSkips;
+
+ /**
+ * Returns @p input, normalized according to
+ * <a href="http://www.w3.org/TR/xquery/#id-eol-handling">XQuery 1.0:
+ * An XML Query Language, A.2.3 End-of-Line Handling</a>
+ */
+ static QString normalizeEOL(const QString &input,
+ const CharacterSkips &characterSkips);
+
+ inline bool atEnd() const
+ {
+ return m_pos == m_length;
+ }
+
+ Token nextToken();
+ /**
+ * Instead of recognizing and tokenizing embedded expressions in
+ * direct attriute constructors, this function is essentially a mini
+ * recursive-descent parser that has the necessary logic to recognize
+ * embedded expressions and their potentially interfering string literals, in
+ * order to scan to the very end of the attribute value, and return the
+ * whole as a string.
+ *
+ * There is of course syntax errors this function will not detect, but
+ * that is ok since the attributes will be parsed once more.
+ *
+ * An inelegant solution, but which gets the job done.
+ *
+ * @see commenceScanOnly(), resumeTokenizationFrom()
+ */
+ Token attributeAsRaw(const QChar separator,
+ int &stack,
+ const int startPos,
+ const bool inLiteral,
+ QString &result);
+
+ const QString m_data;
+ const int m_length;
+ State m_state;
+ QStack<State> m_stateStack;
+ int m_pos;
+
+ /**
+ * The current line number.
+ *
+ * The line number and column number both starts at 1.
+ */
+ int m_line;
+
+ /**
+ * The offset into m_length for where
+ * the current column starts. So m_length - m_columnOffset
+ * is the current column.
+ *
+ * The line number and column number both starts at 1.
+ */
+ int m_columnOffset;
+
+ const NamePool::Ptr m_namePool;
+ QStack<Token> m_tokenStack;
+ QHash<QString, QChar> m_charRefs;
+ bool m_scanOnly;
+
+ Q_DISABLE_COPY(XQueryTokenizer)
+ };
+}
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+#endif