diff options
author | Friedemann Kleint <Friedemann.Kleint@nokia.com> | 2011-08-25 12:54:20 +0200 |
---|---|---|
committer | Friedemann Kleint <Friedemann.Kleint@nokia.com> | 2011-08-26 08:23:16 +0200 |
commit | bd5a1a9bcea4da782494a02f17d37ea42eb01d9b (patch) | |
tree | e37bb6e53fa8576a875edd75d6b2453a8fa7163b /src/libs/utils | |
parent | e29881015d385176daca7ecbaace600e9eaa5436 (diff) | |
download | qt-creator-bd5a1a9bcea4da782494a02f17d37ea42eb01d9b.tar.gz |
Extract Core::TextFile from TextEditor::BaseTextDocument.
Use it in BaseTextEditor and Designer to preserve CRLF
of the files.
Task-number: QTCREATORBUG-5901
Change-Id: I7599ce78649a3b09f2e5118d02f8cbf3db27ed31
Reviewed-on: http://codereview.qt.nokia.com/3591
Reviewed-by: Friedemann Kleint <Friedemann.Kleint@nokia.com>
Diffstat (limited to 'src/libs/utils')
-rw-r--r-- | src/libs/utils/textfileformat.cpp | 314 | ||||
-rw-r--r-- | src/libs/utils/textfileformat.h | 96 | ||||
-rw-r--r-- | src/libs/utils/utils-lib.pri | 2 |
3 files changed, 412 insertions, 0 deletions
diff --git a/src/libs/utils/textfileformat.cpp b/src/libs/utils/textfileformat.cpp new file mode 100644 index 0000000000..44215340a3 --- /dev/null +++ b/src/libs/utils/textfileformat.cpp @@ -0,0 +1,314 @@ +/************************************************************************** +** +** This file is part of Qt Creator +** +** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies). +** +** Contact: Nokia Corporation (info@qt.nokia.com) +** +** +** GNU Lesser General Public License Usage +** +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this file. +** Please review the following information to ensure the GNU Lesser General +** Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** Other Usage +** +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** If you have questions regarding the use of this file, please contact +** Nokia at info@qt.nokia.com. +** +**************************************************************************/ + +#include "textfileformat.h" +#include "fileutils.h" +#include "qtcassert.h" + +#include <QtCore/QTextCodec> +#include <QtCore/QStringList> +#include <QtCore/QCoreApplication> +#include <QtCore/QDebug> + +enum { debug = 0 }; + +#include <exception> + +namespace Utils { + +QDebug operator<<(QDebug d, const TextFileFormat &format) +{ + QDebug nsp = d.nospace(); + nsp << "TextFileFormat: "; + if (format.codec) { + nsp << format.codec->name(); + foreach (const QByteArray &alias, format.codec->aliases()) + nsp << ' ' << alias; + } else { + nsp << "NULL"; + } + nsp << " hasUtf8Bom=" << format.hasUtf8Bom + << (format.lineTerminationMode == TextFileFormat::LFLineTerminator ? " LF" : " CRLF"); + return d; +} + +/*! + \class Utils::TextFileFormat + + \brief Describes the format of a text file and provides autodetection. + + The format comprises + \list + \o Encoding represented by a pointer to a QTextCodec + \o Presence of an UTF8 Byte Order Marker (BOM) + \o Line feed storage convention + \endlist + + The class also provides convenience functions to read text files and return them + as strings or string lists and to write out files. +*/ + +TextFileFormat::TextFileFormat() : + lineTerminationMode(NativeLineTerminator), hasUtf8Bom(false), codec(0) +{ +} + +/*! + \brief Detect the format of text data. +*/ + +TextFileFormat TextFileFormat::detect(const QByteArray &data) +{ + TextFileFormat result; + if (data.isEmpty()) + return result; + const int bytesRead = data.size(); + const unsigned char *buf = reinterpret_cast<const unsigned char *>(data.constData()); + // code taken from qtextstream + if (bytesRead >= 4 && ((buf[0] == 0xff && buf[1] == 0xfe && buf[2] == 0 && buf[3] == 0) + || (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xfe && buf[3] == 0xff))) { + result.codec = QTextCodec::codecForName("UTF-32"); + } else if (bytesRead >= 2 && ((buf[0] == 0xff && buf[1] == 0xfe) + || (buf[0] == 0xfe && buf[1] == 0xff))) { + result.codec = QTextCodec::codecForName("UTF-16"); + } else if (bytesRead >= 3 && ((buf[0] == 0xef && buf[1] == 0xbb) && buf[2] == 0xbf)) { + result.codec = QTextCodec::codecForName("UTF-8"); + result.hasUtf8Bom = true; + } + // end code taken from qtextstream + const int newLinePos = data.indexOf('\n'); + if (newLinePos == -1) { + result.lineTerminationMode = NativeLineTerminator; + } else if (newLinePos == 0) { + result.lineTerminationMode = LFLineTerminator; + } else { + result.lineTerminationMode = data.at(newLinePos - 1) == '\r' ? CRLFLineTerminator : LFLineTerminator; + } + return result; +} + +/*! + \brief Returns a piece of text suitable as display for a encoding error. +*/ + +QByteArray TextFileFormat::decodingErrorSample(const QByteArray &data) +{ + const int p = data.indexOf('\n', 16384); + return p < 0 ? data : data.left(p); +} + +enum { textChunkSize = 65536 }; + +static bool verifyDecodingError(const QString &text, const QTextCodec *codec, + const char *data, const int dataSize, + const bool possibleHeader) +{ + const QByteArray verifyBuf = codec->fromUnicode(text); // slow + // the minSize trick lets us ignore unicode headers + const int minSize = qMin(verifyBuf.size(), dataSize); + return (minSize < dataSize - (possibleHeader? 4 : 0) + || memcmp(verifyBuf.constData() + verifyBuf.size() - minSize, + data + dataSize - minSize, + minSize)); +} + +// Decode a potentially large file in chunks and append it to target +// using the append function passed on (fits QStringList and QString). + +template <class Target> +bool decodeTextFileContent(const QByteArray &dataBA, + const TextFileFormat &format, + Target *target, + void (Target::*appendFunction)(const QString &)) +{ + QTC_ASSERT(format.codec, return false; ) + + QTextCodec::ConverterState state; + bool hasDecodingError = false; + + const char *start = dataBA.constData(); + const char *data = start; + const char *end = data + dataBA.size(); + // Process chunkwise as QTextCodec allocates too much memory when doing it in one + // go. An alternative to the code below would be creating a decoder from the codec, + // but its failure detection does not seem be working reliably. + for (const char *data = start; data < end; ) { + const char *chunkStart = data; + const int chunkSize = qMin(int(textChunkSize), int(end - chunkStart)); + QString text = format.codec->toUnicode(chunkStart, chunkSize, &state); + data += chunkSize; + // Process until the end of the current multi-byte character. Remaining might + // actually contain more than needed so try one-be-one. If EOF is reached with + // and characters remain->encoding error. + for ( ; state.remainingChars && data < end ; ++data) + text.append(format.codec->toUnicode(data, 1, &state)); + if (state.remainingChars) + hasDecodingError = true; + if (!hasDecodingError) + hasDecodingError = + verifyDecodingError(text, format.codec, chunkStart, data - chunkStart, + chunkStart == start); + if (format.lineTerminationMode == TextFileFormat::CRLFLineTerminator) + text.remove(QLatin1Char('\r')); + (target->*appendFunction)(text); + } + return !hasDecodingError; +} + +/*! + \brief Decode data to a plain string. +*/ + +bool TextFileFormat::decode(const QByteArray &data, QString *target) const +{ + target->clear(); + return decodeTextFileContent<QString>(data, *this, target, &QString::push_back); +} + +/*! + \brief Decode data to a list of strings. + + Intended for use with progress bars loading large files. +*/ + +bool TextFileFormat::decode(const QByteArray &data, QStringList *target) const +{ + target->clear(); + if (data.size() > textChunkSize) + target->reserve(5 + data.size() / textChunkSize); + return decodeTextFileContent<QStringList>(data, *this, target, &QStringList::append); +} + +// Read text file contents to string or stringlist. +template <class Target> +TextFileFormat::ReadResult readTextFile(const QString &fileName, const QTextCodec *defaultCodec, + Target *target, TextFileFormat *format, QString *errorString, + QByteArray *decodingErrorSampleIn = 0) +{ + if (decodingErrorSampleIn) + decodingErrorSampleIn->clear(); + + QByteArray data; + try { + Utils::FileReader reader; + if (!reader.fetch(fileName, errorString)) + return TextFileFormat::ReadIOError; + data = reader.data(); + } catch (const std::bad_alloc &) { + *errorString = QCoreApplication::translate("Utils::TextFileFormat", "Out of memory."); + return TextFileFormat::ReadMemoryAllocationError; + } + + *format = TextFileFormat::detect(data); + if (!format->codec) + format->codec = defaultCodec ? defaultCodec : QTextCodec::codecForLocale(); + + if (!format->decode(data, target)) { + *errorString = QCoreApplication::translate("Utils::TextFileFormat", "An encoding error was encountered."); + if (decodingErrorSampleIn) + *decodingErrorSampleIn = TextFileFormat::decodingErrorSample(data); + return TextFileFormat::ReadEncodingError; + } + return TextFileFormat::ReadSuccess; +} + +/*! + \brief Read text file into a list of strings. +*/ + +TextFileFormat::ReadResult + TextFileFormat::readFile(const QString &fileName, const QTextCodec *defaultCodec, + QStringList *plainTextList, TextFileFormat *format, QString *errorString, + QByteArray *decodingErrorSample /* = 0 */) +{ + const TextFileFormat::ReadResult result = + readTextFile(fileName, defaultCodec, + plainTextList, format, errorString, decodingErrorSample); + if (debug) + qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *format + << " returns " << result << '/' << plainTextList->size() << " chunks"; + return result; +} + +/*! + \brief Read text file into a string. +*/ + +TextFileFormat::ReadResult + TextFileFormat::readFile(const QString &fileName, const QTextCodec *defaultCodec, + QString *plainText, TextFileFormat *format, QString *errorString, + QByteArray *decodingErrorSample /* = 0 */) +{ + const TextFileFormat::ReadResult result = + readTextFile(fileName, defaultCodec, + plainText, format, errorString, decodingErrorSample); + if (debug) + qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *format + << " returns " << result << '/' << plainText->size() << " characters"; + return result; +} + +/*! + \brief Write out a text file. +*/ + +bool TextFileFormat::writeFile(const QString &fileName, QString plainText, QString *errorString) const +{ + QTC_ASSERT(codec, return false;) + + // Does the user want CRLF? If that is native, + // let QFile do the work, else manually add. + QIODevice::OpenMode fileMode = QIODevice::NotOpen; + if (lineTerminationMode == CRLFLineTerminator) { + if (NativeLineTerminator == CRLFLineTerminator) { + fileMode |= QIODevice::Text; + } else { + plainText.replace(QLatin1Char('\n'), QLatin1String("\r\n")); + } + } + + Utils::FileSaver saver(fileName, fileMode); + if (saver.hasError()) { + *errorString = saver.errorString(); + return false; + } + if (hasUtf8Bom && codec->name() == "UTF-8") + saver.write("\xef\xbb\xbf", 3); + saver.write(codec->fromUnicode(plainText)); + const bool ok = saver.finalize(errorString); + if (debug) + qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *this << ' ' << plainText.size() + << " bytes, returns " << ok; + return ok; +} + +} // namespace Utils diff --git a/src/libs/utils/textfileformat.h b/src/libs/utils/textfileformat.h new file mode 100644 index 0000000000..a41245965b --- /dev/null +++ b/src/libs/utils/textfileformat.h @@ -0,0 +1,96 @@ +/************************************************************************** +** +** This file is part of Qt Creator +** +** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies). +** +** Contact: Nokia Corporation (info@qt.nokia.com) +** +** +** GNU Lesser General Public License Usage +** +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this file. +** Please review the following information to ensure the GNU Lesser General +** Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** Other Usage +** +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** If you have questions regarding the use of this file, please contact +** Nokia at info@qt.nokia.com. +** +**************************************************************************/ + +#ifndef TEXTFILEUTILS_H +#define TEXTFILEUTILS_H + +#include "utils_global.h" + +#include <QtCore/QtGlobal> + +QT_BEGIN_NAMESPACE +class QTextCodec; +class QStringList; +class QString; +class QByteArray; +QT_END_NAMESPACE + +namespace Utils { + +class QTCREATOR_UTILS_EXPORT TextFileFormat { +public: + enum LineTerminationMode + { + LFLineTerminator, + CRLFLineTerminator, + NativeLineTerminator = +#if defined (Q_OS_WIN) + CRLFLineTerminator, +#else + LFLineTerminator +#endif + }; + + enum ReadResult + { + ReadSuccess, + ReadEncodingError, + ReadMemoryAllocationError, + ReadIOError + }; + + TextFileFormat(); + + static TextFileFormat detect(const QByteArray &data); + + bool decode(const QByteArray &data, QString *target) const; + bool decode(const QByteArray &data, QStringList *target) const; + + static ReadResult readFile(const QString &fileName, const QTextCodec *defaultCodec, + QStringList *plainText, TextFileFormat *format, QString *errorString, + QByteArray *decodingErrorSample = 0); + static ReadResult readFile(const QString &fileName, const QTextCodec *defaultCodec, + QString *plainText, TextFileFormat *format, QString *errorString, + QByteArray *decodingErrorSample = 0); + + bool writeFile(const QString &fileName, QString plainText, QString *errorString) const; + + static QByteArray decodingErrorSample(const QByteArray &data); + + LineTerminationMode lineTerminationMode; + bool hasUtf8Bom; + const QTextCodec *codec; +}; + +} // namespace Utils + +#endif // TEXTFILEUTILS_H diff --git a/src/libs/utils/utils-lib.pri b/src/libs/utils/utils-lib.pri index 36b9226bdf..db8394388e 100644 --- a/src/libs/utils/utils-lib.pri +++ b/src/libs/utils/utils-lib.pri @@ -40,6 +40,7 @@ SOURCES += $$PWD/environment.cpp \ $$PWD/synchronousprocess.cpp \ $$PWD/savefile.cpp \ $$PWD/fileutils.cpp \ + $$PWD/textfileformat.cpp \ $$PWD/submitfieldwidget.cpp \ $$PWD/consoleprocess.cpp \ $$PWD/uncommentselection.cpp \ @@ -135,6 +136,7 @@ HEADERS += \ $$PWD/synchronousprocess.h \ $$PWD/savefile.h \ $$PWD/fileutils.h \ + $$PWD/textfileformat.h \ $$PWD/submitfieldwidget.h \ $$PWD/uncommentselection.h \ $$PWD/parameteraction.h \ |