diff options
author | Marc Mutz <marc.mutz@qt.io> | 2022-09-17 11:32:45 +0200 |
---|---|---|
committer | Marc Mutz <marc.mutz@qt.io> | 2022-10-23 09:02:03 +0200 |
commit | 93aa02e67197d25fa71c9f33960b2c1994e5d2a2 (patch) | |
tree | 6e042ebd6c25a4a1557b45819b0858bbe6fed9d3 /src/corelib | |
parent | 1eff7844a5cd5cd6a5b72197905b8aa025d1b22e (diff) | |
download | qtbase-93aa02e67197d25fa71c9f33960b2c1994e5d2a2.tar.gz |
Port qCompress() to zstream/deflate()
The zlib convenience API we've been using so far has two problems:
- On Windows-64, where sizeof(long) == 4, the use of ulong for sizes
meant that we could not compress data compressable on other 64-bit
platforms (Unix). While zstream also uses ulong, being a stream API,
it allows feeding data in chunks. The total_in and total_out members
are only required for gzip compression and are otherwise just
informational. They're unsigned, so their overflow does not cause
UB. In summary, using zstream + deflate() allows us to compress more
than 4GiB of data even on Windows-64.
- On all platforms, we always allocated the output buffer in such a
way as to accommodate the pathological case of random, incompressible
data, so the output buffer was larger than the input. Using zstream
+ deflate(), we can start with a smaller buffer, then let zlib pick
up where it left off when it ran out of output buffer space, saving
memory in the common case that compression meaningfully reduces the
size. To avoid the first few rounds of reallocations, we continue to
use zlib's compressBound() for input less than 256KiB.
This completely fixes the compression side of QTBUG-106542 and
QTBUG-104972.
Fixes: QTBUG-104972
Fixes: QTBUG-106542
Change-Id: Ia7e6c38403906b35462480fd611b482f05a5c59c
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
(cherry picked from commit 2fd990b386c3555cf66cd3efff0d6a47cdc63763)
Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/text/qbytearray.cpp | 65 |
1 files changed, 32 insertions, 33 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index 28094ec34e..7aae6973b4 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -506,15 +506,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard) The default value is -1, which specifies zlib's default compression. -//![compress-limit-note] - \note The maximum size of data that this function can consume is limited by - what the platform's \c{unsigned long} can represent (a Zlib limitation). - That means that data > 4GiB can be compressed and decompressed on a 64-bit - Unix system, but not on a 64-bit Windows system. Portable code should - therefore avoid using qCompress()/qUncompress() to compress more than 4GiB - of input. -//![compress-limit-note] - \sa qUncompress(const QByteArray &data) */ @@ -526,8 +517,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard) Compresses the first \a nbytes of \a data at compression level \a compressionLevel and returns the compressed data in a new byte array. - - \include qbytearray.cpp compress-limit-note */ #ifndef QT_NO_COMPRESS @@ -687,30 +676,40 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) if (compressionLevel < -1 || compressionLevel > 9) compressionLevel = -1; - ulong len = nbytes + nbytes / 100 + 13; - QByteArray bazip; - int res; - do { - bazip.resize(len + HeaderSize); - res = ::compress2(reinterpret_cast<uchar *>(bazip.data()) + HeaderSize, &len, - data, nbytes, - compressionLevel); - - switch (res) { - case Z_OK: - bazip.resize(len + HeaderSize); - qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), bazip.data()); - break; - case Z_MEM_ERROR: - return tooMuchData(ZLibOp::Compression); - - case Z_BUF_ERROR: - len *= 2; - break; + QArrayDataPointer out = [&] { + constexpr qsizetype SingleAllocLimit = 256 * 1024; // the maximum size for which we use + // zlib's compressBound() to guarantee + // the output buffer size is sufficient + // to hold result + qsizetype capacity = HeaderSize; + if (nbytes < SingleAllocLimit) { + // use maximum size + capacity += compressBound(uLong(nbytes)); // cannot overflow (both times)! + return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity)}; } - } while (res == Z_BUF_ERROR); - return bazip; + // for larger buffers, assume it compresses optimally, and + // grow geometrically from there: + constexpr qsizetype MaxCompressionFactor = 1024; // max theoretical factor is 1032 + // cf. http://www.zlib.org/zlib_tech.html, + // but use a nearby power-of-two (faster) + capacity += std::max(qsizetype(compressBound(uLong(SingleAllocLimit))), + nbytes / MaxCompressionFactor); + return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity, QArrayData::Grow)}; + }(); + + if (out.data() == nullptr) // allocation failed + return tooMuchData(ZLibOp::Compression); + + qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), out.data()); + out.size = HeaderSize; + + return xxflate(ZLibOp::Compression, std::move(out), {data, nbytes}, + [=] (z_stream *zs) { return deflateInit(zs, compressionLevel); }, + [] (z_stream *zs, size_t inputLeft) { + return deflate(zs, inputLeft ? Z_NO_FLUSH : Z_FINISH); + }, + [] (z_stream *zs) { deflateEnd(zs); }); } #endif |