summaryrefslogtreecommitdiff
path: root/src/libs/utils/textfileformat.cpp
diff options
context:
space:
mode:
authorFriedemann Kleint <Friedemann.Kleint@nokia.com>2011-08-25 12:54:20 +0200
committerFriedemann Kleint <Friedemann.Kleint@nokia.com>2011-08-26 08:23:16 +0200
commitbd5a1a9bcea4da782494a02f17d37ea42eb01d9b (patch)
treee37bb6e53fa8576a875edd75d6b2453a8fa7163b /src/libs/utils/textfileformat.cpp
parente29881015d385176daca7ecbaace600e9eaa5436 (diff)
downloadqt-creator-bd5a1a9bcea4da782494a02f17d37ea42eb01d9b.tar.gz
Extract Core::TextFile from TextEditor::BaseTextDocument.
Use it in BaseTextEditor and Designer to preserve CRLF of the files. Task-number: QTCREATORBUG-5901 Change-Id: I7599ce78649a3b09f2e5118d02f8cbf3db27ed31 Reviewed-on: http://codereview.qt.nokia.com/3591 Reviewed-by: Friedemann Kleint <Friedemann.Kleint@nokia.com>
Diffstat (limited to 'src/libs/utils/textfileformat.cpp')
-rw-r--r--src/libs/utils/textfileformat.cpp314
1 files changed, 314 insertions, 0 deletions
diff --git a/src/libs/utils/textfileformat.cpp b/src/libs/utils/textfileformat.cpp
new file mode 100644
index 0000000000..44215340a3
--- /dev/null
+++ b/src/libs/utils/textfileformat.cpp
@@ -0,0 +1,314 @@
+/**************************************************************************
+**
+** This file is part of Qt Creator
+**
+** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
+**
+** Contact: Nokia Corporation (info@qt.nokia.com)
+**
+**
+** GNU Lesser General Public License Usage
+**
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this file.
+** Please review the following information to ensure the GNU Lesser General
+** Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** Other Usage
+**
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at info@qt.nokia.com.
+**
+**************************************************************************/
+
+#include "textfileformat.h"
+#include "fileutils.h"
+#include "qtcassert.h"
+
+#include <QtCore/QTextCodec>
+#include <QtCore/QStringList>
+#include <QtCore/QCoreApplication>
+#include <QtCore/QDebug>
+
+enum { debug = 0 };
+
+#include <exception>
+
+namespace Utils {
+
+QDebug operator<<(QDebug d, const TextFileFormat &format)
+{
+ QDebug nsp = d.nospace();
+ nsp << "TextFileFormat: ";
+ if (format.codec) {
+ nsp << format.codec->name();
+ foreach (const QByteArray &alias, format.codec->aliases())
+ nsp << ' ' << alias;
+ } else {
+ nsp << "NULL";
+ }
+ nsp << " hasUtf8Bom=" << format.hasUtf8Bom
+ << (format.lineTerminationMode == TextFileFormat::LFLineTerminator ? " LF" : " CRLF");
+ return d;
+}
+
+/*!
+ \class Utils::TextFileFormat
+
+ \brief Describes the format of a text file and provides autodetection.
+
+ The format comprises
+ \list
+ \o Encoding represented by a pointer to a QTextCodec
+ \o Presence of an UTF8 Byte Order Marker (BOM)
+ \o Line feed storage convention
+ \endlist
+
+ The class also provides convenience functions to read text files and return them
+ as strings or string lists and to write out files.
+*/
+
+TextFileFormat::TextFileFormat() :
+ lineTerminationMode(NativeLineTerminator), hasUtf8Bom(false), codec(0)
+{
+}
+
+/*!
+ \brief Detect the format of text data.
+*/
+
+TextFileFormat TextFileFormat::detect(const QByteArray &data)
+{
+ TextFileFormat result;
+ if (data.isEmpty())
+ return result;
+ const int bytesRead = data.size();
+ const unsigned char *buf = reinterpret_cast<const unsigned char *>(data.constData());
+ // code taken from qtextstream
+ if (bytesRead >= 4 && ((buf[0] == 0xff && buf[1] == 0xfe && buf[2] == 0 && buf[3] == 0)
+ || (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xfe && buf[3] == 0xff))) {
+ result.codec = QTextCodec::codecForName("UTF-32");
+ } else if (bytesRead >= 2 && ((buf[0] == 0xff && buf[1] == 0xfe)
+ || (buf[0] == 0xfe && buf[1] == 0xff))) {
+ result.codec = QTextCodec::codecForName("UTF-16");
+ } else if (bytesRead >= 3 && ((buf[0] == 0xef && buf[1] == 0xbb) && buf[2] == 0xbf)) {
+ result.codec = QTextCodec::codecForName("UTF-8");
+ result.hasUtf8Bom = true;
+ }
+ // end code taken from qtextstream
+ const int newLinePos = data.indexOf('\n');
+ if (newLinePos == -1) {
+ result.lineTerminationMode = NativeLineTerminator;
+ } else if (newLinePos == 0) {
+ result.lineTerminationMode = LFLineTerminator;
+ } else {
+ result.lineTerminationMode = data.at(newLinePos - 1) == '\r' ? CRLFLineTerminator : LFLineTerminator;
+ }
+ return result;
+}
+
+/*!
+ \brief Returns a piece of text suitable as display for a encoding error.
+*/
+
+QByteArray TextFileFormat::decodingErrorSample(const QByteArray &data)
+{
+ const int p = data.indexOf('\n', 16384);
+ return p < 0 ? data : data.left(p);
+}
+
+enum { textChunkSize = 65536 };
+
+static bool verifyDecodingError(const QString &text, const QTextCodec *codec,
+ const char *data, const int dataSize,
+ const bool possibleHeader)
+{
+ const QByteArray verifyBuf = codec->fromUnicode(text); // slow
+ // the minSize trick lets us ignore unicode headers
+ const int minSize = qMin(verifyBuf.size(), dataSize);
+ return (minSize < dataSize - (possibleHeader? 4 : 0)
+ || memcmp(verifyBuf.constData() + verifyBuf.size() - minSize,
+ data + dataSize - minSize,
+ minSize));
+}
+
+// Decode a potentially large file in chunks and append it to target
+// using the append function passed on (fits QStringList and QString).
+
+template <class Target>
+bool decodeTextFileContent(const QByteArray &dataBA,
+ const TextFileFormat &format,
+ Target *target,
+ void (Target::*appendFunction)(const QString &))
+{
+ QTC_ASSERT(format.codec, return false; )
+
+ QTextCodec::ConverterState state;
+ bool hasDecodingError = false;
+
+ const char *start = dataBA.constData();
+ const char *data = start;
+ const char *end = data + dataBA.size();
+ // Process chunkwise as QTextCodec allocates too much memory when doing it in one
+ // go. An alternative to the code below would be creating a decoder from the codec,
+ // but its failure detection does not seem be working reliably.
+ for (const char *data = start; data < end; ) {
+ const char *chunkStart = data;
+ const int chunkSize = qMin(int(textChunkSize), int(end - chunkStart));
+ QString text = format.codec->toUnicode(chunkStart, chunkSize, &state);
+ data += chunkSize;
+ // Process until the end of the current multi-byte character. Remaining might
+ // actually contain more than needed so try one-be-one. If EOF is reached with
+ // and characters remain->encoding error.
+ for ( ; state.remainingChars && data < end ; ++data)
+ text.append(format.codec->toUnicode(data, 1, &state));
+ if (state.remainingChars)
+ hasDecodingError = true;
+ if (!hasDecodingError)
+ hasDecodingError =
+ verifyDecodingError(text, format.codec, chunkStart, data - chunkStart,
+ chunkStart == start);
+ if (format.lineTerminationMode == TextFileFormat::CRLFLineTerminator)
+ text.remove(QLatin1Char('\r'));
+ (target->*appendFunction)(text);
+ }
+ return !hasDecodingError;
+}
+
+/*!
+ \brief Decode data to a plain string.
+*/
+
+bool TextFileFormat::decode(const QByteArray &data, QString *target) const
+{
+ target->clear();
+ return decodeTextFileContent<QString>(data, *this, target, &QString::push_back);
+}
+
+/*!
+ \brief Decode data to a list of strings.
+
+ Intended for use with progress bars loading large files.
+*/
+
+bool TextFileFormat::decode(const QByteArray &data, QStringList *target) const
+{
+ target->clear();
+ if (data.size() > textChunkSize)
+ target->reserve(5 + data.size() / textChunkSize);
+ return decodeTextFileContent<QStringList>(data, *this, target, &QStringList::append);
+}
+
+// Read text file contents to string or stringlist.
+template <class Target>
+TextFileFormat::ReadResult readTextFile(const QString &fileName, const QTextCodec *defaultCodec,
+ Target *target, TextFileFormat *format, QString *errorString,
+ QByteArray *decodingErrorSampleIn = 0)
+{
+ if (decodingErrorSampleIn)
+ decodingErrorSampleIn->clear();
+
+ QByteArray data;
+ try {
+ Utils::FileReader reader;
+ if (!reader.fetch(fileName, errorString))
+ return TextFileFormat::ReadIOError;
+ data = reader.data();
+ } catch (const std::bad_alloc &) {
+ *errorString = QCoreApplication::translate("Utils::TextFileFormat", "Out of memory.");
+ return TextFileFormat::ReadMemoryAllocationError;
+ }
+
+ *format = TextFileFormat::detect(data);
+ if (!format->codec)
+ format->codec = defaultCodec ? defaultCodec : QTextCodec::codecForLocale();
+
+ if (!format->decode(data, target)) {
+ *errorString = QCoreApplication::translate("Utils::TextFileFormat", "An encoding error was encountered.");
+ if (decodingErrorSampleIn)
+ *decodingErrorSampleIn = TextFileFormat::decodingErrorSample(data);
+ return TextFileFormat::ReadEncodingError;
+ }
+ return TextFileFormat::ReadSuccess;
+}
+
+/*!
+ \brief Read text file into a list of strings.
+*/
+
+TextFileFormat::ReadResult
+ TextFileFormat::readFile(const QString &fileName, const QTextCodec *defaultCodec,
+ QStringList *plainTextList, TextFileFormat *format, QString *errorString,
+ QByteArray *decodingErrorSample /* = 0 */)
+{
+ const TextFileFormat::ReadResult result =
+ readTextFile(fileName, defaultCodec,
+ plainTextList, format, errorString, decodingErrorSample);
+ if (debug)
+ qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *format
+ << " returns " << result << '/' << plainTextList->size() << " chunks";
+ return result;
+}
+
+/*!
+ \brief Read text file into a string.
+*/
+
+TextFileFormat::ReadResult
+ TextFileFormat::readFile(const QString &fileName, const QTextCodec *defaultCodec,
+ QString *plainText, TextFileFormat *format, QString *errorString,
+ QByteArray *decodingErrorSample /* = 0 */)
+{
+ const TextFileFormat::ReadResult result =
+ readTextFile(fileName, defaultCodec,
+ plainText, format, errorString, decodingErrorSample);
+ if (debug)
+ qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *format
+ << " returns " << result << '/' << plainText->size() << " characters";
+ return result;
+}
+
+/*!
+ \brief Write out a text file.
+*/
+
+bool TextFileFormat::writeFile(const QString &fileName, QString plainText, QString *errorString) const
+{
+ QTC_ASSERT(codec, return false;)
+
+ // Does the user want CRLF? If that is native,
+ // let QFile do the work, else manually add.
+ QIODevice::OpenMode fileMode = QIODevice::NotOpen;
+ if (lineTerminationMode == CRLFLineTerminator) {
+ if (NativeLineTerminator == CRLFLineTerminator) {
+ fileMode |= QIODevice::Text;
+ } else {
+ plainText.replace(QLatin1Char('\n'), QLatin1String("\r\n"));
+ }
+ }
+
+ Utils::FileSaver saver(fileName, fileMode);
+ if (saver.hasError()) {
+ *errorString = saver.errorString();
+ return false;
+ }
+ if (hasUtf8Bom && codec->name() == "UTF-8")
+ saver.write("\xef\xbb\xbf", 3);
+ saver.write(codec->fromUnicode(plainText));
+ const bool ok = saver.finalize(errorString);
+ if (debug)
+ qDebug().nospace() << Q_FUNC_INFO << fileName << ' ' << *this << ' ' << plainText.size()
+ << " bytes, returns " << ok;
+ return ok;
+}
+
+} // namespace Utils