Port qCompress() to zstream/deflate()

The zlib convenience API we've been using so far has two problems: - On Windows-64, where sizeof(long) == 4, the use of ulong for sizes meant that we could not compress data compressable on other 64-bit platforms (Unix). While zstream also uses ulong, being a stream API, it allows feeding data in chunks. The total_in and total_out members are only required for gzip compression and are otherwise just informational. They're unsigned, so their overflow does not cause UB. In summary, using zstream + deflate() allows us to compress more than 4GiB of data even on Windows-64. - On all platforms, we always allocated the output buffer in such a way as to accommodate the pathological case of random, incompressible data, so the output buffer was larger than the input. Using zstream + deflate(), we can start with a smaller buffer, then let zlib pick up where it left off when it ran out of output buffer space, saving memory in the common case that compression meaningfully reduces the size. To avoid the first few rounds of reallocations, we continue to use zlib's compressBound() for input less than 256KiB. This completely fixes the compression side of QTBUG-106542 and QTBUG-104972. Fixes: QTBUG-104972 Fixes: QTBUG-106542 Change-Id: Ia7e6c38403906b35462480fd611b482f05a5c59c Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> (cherry picked from commit 2fd990b386c3555cf66cd3efff0d6a47cdc63763) Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
author: Marc Mutz <marc.mutz@qt.io> 2022-09-17 11:32:45 +0200
committer: Marc Mutz <marc.mutz@qt.io> 2022-10-23 09:02:03 +0200
commit: 93aa02e67197d25fa71c9f33960b2c1994e5d2a2 (patch)
tree: 6e042ebd6c25a4a1557b45819b0858bbe6fed9d3 /src/corelib
parent: 1eff7844a5cd5cd6a5b72197905b8aa025d1b22e (diff)
download: qtbase-93aa02e67197d25fa71c9f33960b2c1994e5d2a2.tar.gz
1 files changed, 32 insertions, 33 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp
index 28094ec34e..7aae6973b4 100644
--- a/src/corelib/text/qbytearray.cpp
+++ b/src/corelib/text/qbytearray.cpp
@@ -506,15 +506,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard)
     The default value is -1, which specifies zlib's default
     compression.
 
-//![compress-limit-note]
-    \note The maximum size of data that this function can consume is limited by
-    what the platform's \c{unsigned long} can represent (a Zlib limitation).
-    That means that data > 4GiB can be compressed and decompressed on a 64-bit
-    Unix system, but not on a 64-bit Windows system. Portable code should
-    therefore avoid using qCompress()/qUncompress() to compress more than 4GiB
-    of input.
-//![compress-limit-note]
-
     \sa qUncompress(const QByteArray &data)
 */
 
@@ -526,8 +517,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard)
 
     Compresses the first \a nbytes of \a data at compression level
     \a compressionLevel and returns the compressed data in a new byte array.
-
-    \include qbytearray.cpp compress-limit-note
 */
 
 #ifndef QT_NO_COMPRESS
@@ -687,30 +676,40 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
     if (compressionLevel < -1 || compressionLevel > 9)
         compressionLevel = -1;
 
-    ulong len = nbytes + nbytes / 100 + 13;
-    QByteArray bazip;
-    int res;
-    do {
-        bazip.resize(len + HeaderSize);
-        res = ::compress2(reinterpret_cast<uchar *>(bazip.data()) + HeaderSize, &len,
-                          data, nbytes,
-                          compressionLevel);
-
-        switch (res) {
-        case Z_OK:
-            bazip.resize(len + HeaderSize);
-            qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), bazip.data());
-            break;
-        case Z_MEM_ERROR:
-            return tooMuchData(ZLibOp::Compression);
-
-        case Z_BUF_ERROR:
-            len *= 2;
-            break;
+    QArrayDataPointer out = [&] {
+        constexpr qsizetype SingleAllocLimit = 256 * 1024; // the maximum size for which we use
+                                                           // zlib's compressBound() to guarantee
+                                                           // the output buffer size is sufficient
+                                                           // to hold result
+        qsizetype capacity = HeaderSize;
+        if (nbytes < SingleAllocLimit) {
+            // use maximum size
+            capacity += compressBound(uLong(nbytes)); // cannot overflow (both times)!
+            return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity)};
         }
-    } while (res == Z_BUF_ERROR);
 
-    return bazip;
+        // for larger buffers, assume it compresses optimally, and
+        // grow geometrically from there:
+        constexpr qsizetype MaxCompressionFactor = 1024; // max theoretical factor is 1032
+                                                         // cf. http://www.zlib.org/zlib_tech.html,
+                                                         // but use a nearby power-of-two (faster)
+        capacity += std::max(qsizetype(compressBound(uLong(SingleAllocLimit))),
+                             nbytes / MaxCompressionFactor);
+        return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity, QArrayData::Grow)};
+    }();
+
+    if (out.data() == nullptr) // allocation failed
+      return tooMuchData(ZLibOp::Compression);
+
+    qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), out.data());
+    out.size = HeaderSize;
+
+    return xxflate(ZLibOp::Compression, std::move(out), {data, nbytes},
+                   [=] (z_stream *zs) { return deflateInit(zs, compressionLevel); },
+                   [] (z_stream *zs, size_t inputLeft) {
+                       return deflate(zs, inputLeft ? Z_NO_FLUSH : Z_FINISH);
+                   },
+                   [] (z_stream *zs) { deflateEnd(zs); });
 }
 #endif
author	Marc Mutz <marc.mutz@qt.io>	2022-09-17 11:32:45 +0200
committer	Marc Mutz <marc.mutz@qt.io>	2022-10-23 09:02:03 +0200
commit	93aa02e67197d25fa71c9f33960b2c1994e5d2a2 (patch)
tree	6e042ebd6c25a4a1557b45819b0858bbe6fed9d3 /src/corelib
parent	1eff7844a5cd5cd6a5b72197905b8aa025d1b22e (diff)
download	qtbase-93aa02e67197d25fa71c9f33960b2c1994e5d2a2.tar.gz