From 7a4d0130441bae27a302e694828ec71ade9e6005 Mon Sep 17 00:00:00 2001 From: David Boddie Date: Tue, 2 Feb 2010 20:44:08 +0100 Subject: qdoc: Added support for different source and output character encodings. Previously, qdoc assumed Latin1 (ISO-8859-1) for source code and other documentation, and wrote out XHTML with the same encoding. This change adds additional configuration options (sourceencoding, outputencoding, naturallanguage) that enable translated documentation in non-Latin1 encodings to be built with qdoc. To be reviewed before merge into the master branch. Reviewed-by: Trust Me --- tools/qdoc3/tokenizer.cpp | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'tools/qdoc3/tokenizer.cpp') diff --git a/tools/qdoc3/tokenizer.cpp b/tools/qdoc3/tokenizer.cpp index b391759b37..7c10de65a6 100644 --- a/tools/qdoc3/tokenizer.cpp +++ b/tools/qdoc3/tokenizer.cpp @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,8 @@ static QRegExp *definedX = 0; static QRegExp *defines = 0; static QRegExp *falsehoods = 0; +static QTextCodec *sourceCodec = 0; + /* This function is a perfect hash function for the 37 keywords of C99 (with a hash table size of 512). It should perform well on our @@ -118,13 +121,10 @@ static void insertKwordIntoHash(const char *s, int number) kwordHashTable[k] = number; } -Tokenizer::Tokenizer(const Location& loc, FILE *in) +Tokenizer::Tokenizer(const Location& loc, QFile &in) { init(); - QFile file; - file.open(in, QIODevice::ReadOnly); - yyIn = file.readAll(); - file.close(); + yyIn = in.readAll(); yyPos = 0; start(loc); } @@ -483,6 +483,11 @@ void Tokenizer::initialize(const Config &config) { QString versionSym = config.getString(CONFIG_VERSIONSYM); + QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING); + if (sourceEncoding.isEmpty()) + sourceEncoding = QLatin1String("ISO-8859-1"); + sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit()); + comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)"); comment->setMinimal(true); versionX = new QRegExp("$cannot possibly match^"); @@ -750,4 +755,14 @@ bool Tokenizer::isTrue(const QString &condition) return !falsehoods->exactMatch(t); } +QString Tokenizer::lexeme() const +{ + return sourceCodec->toUnicode(yyLex); +} + +QString Tokenizer::previousLexeme() const +{ + return sourceCodec->toUnicode(yyPrevLex); +} + QT_END_NAMESPACE -- cgit v1.2.1