summaryrefslogtreecommitdiff
path: root/lib/compression
diff options
context:
space:
mode:
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>2022-11-24 11:44:35 +1300
committerJoseph Sutton <jsutton@samba.org>2022-12-01 22:56:40 +0000
commite4066b2be6d87cae130f40e3faf3a0c8815389f8 (patch)
treebc5737194c433018a094ac9b998f42c0b2320c87 /lib/compression
parentc0f28d71858a0fd3035971ca4f2f5a6af6d450b6 (diff)
downloadsamba-e4066b2be6d87cae130f40e3faf3a0c8815389f8.tar.gz
lib/compression: more tests for lzxpress plain compression
These are based on (i.e. copied and pasted from) the LZ77 + Huffman tests. Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
Diffstat (limited to 'lib/compression')
-rw-r--r--lib/compression/tests/test_lzxpress_plain.c749
1 files changed, 749 insertions, 0 deletions
diff --git a/lib/compression/tests/test_lzxpress_plain.c b/lib/compression/tests/test_lzxpress_plain.c
index 5d2a51eb466..57130852fc9 100644
--- a/lib/compression/tests/test_lzxpress_plain.c
+++ b/lib/compression/tests/test_lzxpress_plain.c
@@ -27,6 +27,746 @@
#include "lzxpress.h"
#include "lib/util/base64.h"
+
+/* set LZX_DEBUG_FILES to true to save round-trip files in /tmp. */
+#define LZX_DEBUG_FILES false
+
+/* set LZX_DEBUG_VERBOSE to true to print more. */
+#define LZX_DEBUG_VERBOSE false
+
+
+#if LZX_DEBUG_VERBOSE
+#define debug_message(...) print_message(__VA_ARGS__)
+
+#include <time.h>
+
+struct timespec start = {0};
+struct timespec end = {0};
+static void debug_start_timer(void)
+{
+ clock_gettime(CLOCK_MONOTONIC, &start);
+}
+
+static void debug_end_timer(const char *name, size_t len)
+{
+ uint64_t ns;
+ double secs;
+ double rate;
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ ns = end.tv_nsec;
+ ns += end.tv_sec * 1000 * 1000 * 1000;
+ ns -= start.tv_nsec;
+ ns -= start.tv_sec * 1000 * 1000 * 1000;
+ secs = ns / 1e9;
+ rate = len / (secs * 1024 * 1024);
+ debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n",
+ name, len, secs, rate);
+}
+
+#else
+#define debug_message(...) /* debug_message */
+#define debug_start_timer(...) /* debug_start_timer */
+#define debug_end_timer(...) /* debug_end_timer */
+#endif
+
+
+struct lzx_pair {
+ const char *name;
+ DATA_BLOB compressed;
+ DATA_BLOB decompressed;
+};
+
+struct lzx_file_pair {
+ const char *name;
+ const char *compressed_file;
+ const char *decompressed_file;
+};
+
+
+#define DECOMP_DIR "testdata/compression/decompressed"
+#define COMP_DIR "testdata/compression/compressed-plain"
+#define MORE_COMP_DIR "testdata/compression/compressed-more-plain"
+
+
+#define BLOB_FROM_ARRAY(...) \
+ { \
+ .data = (uint8_t[]){__VA_ARGS__}, \
+ .length = sizeof((uint8_t[]){__VA_ARGS__}) \
+ }
+
+#define BLOB_FROM_STRING(s) \
+ { \
+ .data = discard_const_p(uint8_t, s), \
+ .length = (sizeof(s) - 1) \
+ }
+
+
+const char * file_names[] = {
+ "generate-windows-test-vectors.c",
+ "fib_shuffle-128k+",
+ "fuzzing-0fc2d461b56cd8103c91",
+ "fuzzing-3ec3bca27bb9eb40c128",
+ "fuzzing-a3115a81d1ac500318f9",
+ "fuzzing-3591f9dc02bb00a54b60",
+ "27826-8.txt",
+ "5d049b4cb1bd933f5e8ex19",
+ "638e61e96d54279981c3x5",
+ "64k-minus-one-zeros",
+ "64k-plus-one-zeros",
+ "64k-zeros",
+ "96f696a4e5ce56c61a3dx10",
+ "9e0b6a12febf38e98f13",
+ "abc-times-101",
+ "abc-times-105",
+ "abc-times-200",
+ "b63289ccc7f218c0d56b",
+ "beta-variate1-128k+",
+ "beta-variate3-128k+",
+ "decayed_alphabet_128k+",
+ "decayed_alphabet_64k",
+ "f00842317dc6d5695b02",
+ "fib_shuffle",
+ "midsummer-nights-dream.txt",
+ "notes-on-the-underground.txt",
+ "pg22009.txt",
+ "repeating",
+ "repeating-exactly-64k",
+ "setup.log",
+ "slow-015ddc36a71412ccc50d",
+ "slow-100e9f966a7feb9ca40a",
+ "slow-2a671c3cff4f1574cbab",
+ "slow-33d90a24e70515b14cd0",
+ "slow-49d8c05261e3f412fc72",
+ "slow-50a249d2fe56873e56a0",
+ "slow-63e9f0b52235fb0129fa",
+ "slow-73b7f971d65908ac0095",
+ "slow-8b61e3dd267908544531",
+ "slow-9d1c5a079b0462986f1f",
+ "slow-aa7262a821dabdcf04a6",
+ "slow-b8a91d142b0d2af7f5ca",
+ "slow-c79142457734bbc8d575",
+ "slow-d736544545b90d83fe75",
+ "slow-e3b9bdfaed7d1a606fdb",
+ "slow-f3f1c02a9d006e5e1703",
+ "trigram_128k+",
+ "trigram_64k",
+ "trigram_sum_128k+",
+ "trigram_sum_64k",
+ NULL
+};
+
+
+
+static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx,
+ const char *filename)
+{
+ DATA_BLOB b = {0};
+ FILE *fh = fopen(filename, "rb");
+ int ret;
+ struct stat s;
+ size_t len;
+ if (fh == NULL) {
+ debug_message("could not open '%s'\n", filename);
+ return b;
+ }
+ ret = fstat(fileno(fh), &s);
+ if (ret != 0) {
+ fclose(fh);
+ return b;
+ }
+ b.data = talloc_array(mem_ctx, uint8_t, s.st_size);
+ if (b.data == NULL) {
+ fclose(fh);
+ return b;
+ }
+ len = fread(b.data, 1, s.st_size, fh);
+ if (ferror(fh) || len != s.st_size) {
+ TALLOC_FREE(b.data);
+ } else {
+ b.length = len;
+ }
+ fclose(fh);
+ return b;
+}
+
+
+
+static void test_lzxpress_plain_decompress_files(void **state)
+{
+ size_t i;
+ int score = 0;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ for (i = 0; file_names[i] != NULL; i++) {
+ char filename[200];
+ uint8_t *dest = NULL;
+ ssize_t written;
+ TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
+ struct lzx_pair p = {
+ .name = file_names[i]
+ };
+
+ debug_message("%s\n", p.name);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.decomp", DECOMP_DIR, p.name);
+
+ p.decompressed = datablob_from_file(tmp_ctx, filename);
+ assert_non_null(p.decompressed.data);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.lzplain", COMP_DIR, p.name);
+
+ p.compressed = datablob_from_file(tmp_ctx, filename);
+ assert_non_null(p.compressed.data);
+
+ dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
+ debug_start_timer();
+ written = lzxpress_decompress(p.compressed.data,
+ p.compressed.length,
+ dest,
+ p.decompressed.length);
+ debug_end_timer("decompress", p.decompressed.length);
+ if (written == p.decompressed.length &&
+ memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
+ debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
+ score++;
+ } else {
+ debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
+ p.name);
+ debug_message("size %zd vs reference %zu\n",
+ written, p.decompressed.length);
+ }
+ talloc_free(tmp_ctx);
+ }
+ debug_message("%d/%zu correct\n", score, i);
+ assert_int_equal(score, i);
+}
+
+
+static void test_lzxpress_plain_decompress_more_compressed_files(void **state)
+{
+ /*
+ * This tests the decompression of files that have been compressed on
+ * Windows with the level turned up (to 1, default for MS-XCA is 0).
+ *
+ * The format is identical, but it will have tried harder to find
+ * matches.
+ */
+ size_t i;
+ int score = 0;
+ int found = 0;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ for (i = 0; file_names[i] != NULL; i++) {
+ char filename[200];
+ uint8_t *dest = NULL;
+ ssize_t written;
+ TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
+ struct lzx_pair p = {
+ .name = file_names[i]
+ };
+
+ debug_message("%s\n", p.name);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.decomp", DECOMP_DIR, p.name);
+
+ p.decompressed = datablob_from_file(tmp_ctx, filename);
+ assert_non_null(p.decompressed.data);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.lzplain", MORE_COMP_DIR, p.name);
+
+ p.compressed = datablob_from_file(tmp_ctx, filename);
+ if (p.compressed.data == NULL) {
+ /*
+ * We don't have all the vectors in the
+ * more-compressed directory, which is OK, we skip
+ * them.
+ */
+ continue;
+ }
+ found++;
+ dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
+ debug_start_timer();
+ written = lzxpress_decompress(p.compressed.data,
+ p.compressed.length,
+ dest,
+ p.decompressed.length);
+ debug_end_timer("decompress", p.decompressed.length);
+ if (written == p.decompressed.length &&
+ memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
+ debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
+ score++;
+ } else {
+ debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
+ p.name);
+ debug_message("size %zd vs reference %zu\n",
+ written, p.decompressed.length);
+ }
+ talloc_free(tmp_ctx);
+ }
+ debug_message("%d/%d correct\n", score, found);
+ assert_int_equal(score, found);
+}
+
+
+/*
+ * attempt_round_trip() tests whether a data blob can survive a compression
+ * and decompression cycle. If save_name is not NULL and LZX_DEBUG_FILES
+ * evals to true, the various stages are saved in files with that name and the
+ * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed
+ * has data, it'll print a message saying whether the compressed data matches
+ * that.
+ */
+
+static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx,
+ DATA_BLOB original,
+ const char *save_name,
+ DATA_BLOB ref_compressed)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
+ DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL,
+ original.length * 8 / 7 + 8);
+ DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL,
+ original.length);
+ ssize_t comp_written, decomp_written;
+ debug_start_timer();
+ comp_written = lzxpress_compress(original.data,
+ original.length,
+ compressed.data,
+ compressed.length);
+ debug_end_timer("compress", original.length);
+ if (comp_written <= 0) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (ref_compressed.data != NULL) {
+ /*
+ * This is informational, not an assertion; there are
+ * ~infinite legitimate ways to compress the data, many as
+ * good as each other (think of compression as a language, not
+ * a format).
+ */
+ debug_message("compressed size %zd vs reference %zu\n",
+ comp_written, ref_compressed.length);
+
+ if (comp_written == compressed.length &&
+ memcmp(compressed.data, ref_compressed.data, comp_written) == 0) {
+ debug_message("\033[1;32mbyte identical!\033[0m\n");
+ }
+ }
+ debug_start_timer();
+ decomp_written = lzxpress_decompress(compressed.data,
+ comp_written,
+ decompressed.data,
+ decompressed.length);
+ debug_end_timer("decompress", original.length);
+ if (save_name != NULL && LZX_DEBUG_FILES) {
+ char s[300];
+ FILE *fh = NULL;
+
+ snprintf(s, sizeof(s), "%s-original", save_name);
+ fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
+ fh = fopen(s, "w");
+ fwrite(original.data, 1, original.length, fh);
+ fclose(fh);
+
+ snprintf(s, sizeof(s), "%s-compressed", save_name);
+ fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s);
+ fh = fopen(s, "w");
+ fwrite(compressed.data, 1, comp_written, fh);
+ fclose(fh);
+ /*
+ * We save the decompressed file using original.length, not
+ * the returned size. If these differ, the returned size will
+ * be -1. By saving the whole buffer we can see at what point
+ * it went haywire.
+ */
+ snprintf(s, sizeof(s), "%s-decompressed", save_name);
+ fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
+ fh = fopen(s, "w");
+ fwrite(decompressed.data, 1, original.length, fh);
+ fclose(fh);
+ }
+
+ if (original.length != decomp_written ||
+ memcmp(decompressed.data,
+ original.data,
+ original.length) != 0) {
+ debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n",
+ decomp_written,
+ original.length);
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ talloc_free(tmp_ctx);
+ return comp_written;
+}
+
+
+static void test_lzxpress_plain_round_trip_files(void **state)
+{
+ size_t i;
+ int score = 0;
+ ssize_t compressed_total = 0;
+ ssize_t reference_total = 0;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ for (i = 0; file_names[i] != NULL; i++) {
+ char filename[200];
+ char *debug_files = NULL;
+ TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
+ ssize_t comp_size;
+ struct lzx_pair p = {
+ .name = file_names[i]
+ };
+ debug_message("-------------------\n");
+ debug_message("%s\n", p.name);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.decomp", DECOMP_DIR, p.name);
+
+ p.decompressed = datablob_from_file(tmp_ctx, filename);
+ assert_non_null(p.decompressed.data);
+
+ snprintf(filename, sizeof(filename),
+ "%s/%s.lzplain", COMP_DIR, p.name);
+
+ p.compressed = datablob_from_file(tmp_ctx, filename);
+ if (p.compressed.data == NULL) {
+ debug_message(
+ "Could not load %s reference file %s\n",
+ p.name, filename);
+ debug_message("%s decompressed %zu\n", p.name,
+ p.decompressed.length);
+ } else {
+ debug_message("%s: reference compressed %zu decomp %zu\n",
+ p.name,
+ p.compressed.length,
+ p.decompressed.length);
+ }
+ if (1) {
+ /*
+ * We're going to save copies in /tmp.
+ */
+ snprintf(filename, sizeof(filename),
+ "/tmp/lzxplain-%s", p.name);
+ debug_files = filename;
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, p.decompressed,
+ debug_files,
+ p.compressed);
+ if (comp_size > 0) {
+ debug_message("\033[1;32mround trip!\033[0m\n");
+ score++;
+ if (p.compressed.length) {
+ compressed_total += comp_size;
+ reference_total += p.compressed.length;
+ }
+ }
+ talloc_free(tmp_ctx);
+ }
+ debug_message("%d/%zu correct\n", score, i);
+ print_message("\033[1;34mtotal compressed size: %zu\033[0m\n",
+ compressed_total);
+ print_message("total reference size: %zd \n", reference_total);
+ print_message("diff: %7zd \n",
+ reference_total - compressed_total);
+ print_message("ratio: \033[1;3%dm%.2f\033[0m \n",
+ 2 + (compressed_total >= reference_total),
+ ((double)compressed_total) / reference_total);
+ /*
+ * Assert that the compression is better than Windows. Unlike the
+ * Huffman varient, where things are very even, here we do much better
+ * than Windows without especially trying.
+ */
+ assert_true(compressed_total <= reference_total);
+
+ assert_int_equal(score, i);
+ talloc_free(mem_ctx);
+}
+
+
+/*
+ * Bob Jenkins' Small Fast RNG.
+ *
+ * We don't need it to be this good, but we do need it to be reproduceable
+ * across platforms, which rand() etc aren't.
+ *
+ * http://burtleburtle.net/bob/rand/smallprng.html
+ */
+
+struct jsf_rng {
+ uint32_t a;
+ uint32_t b;
+ uint32_t c;
+ uint32_t d;
+};
+
+#define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+static uint32_t jsf32(struct jsf_rng *x) {
+ uint32_t e = x->a - ROTATE32(x->b, 27);
+ x->a = x->b ^ ROTATE32(x->c, 17);
+ x->b = x->c + x->d;
+ x->c = x->d + e;
+ x->d = e + x->a;
+ return x->d;
+}
+
+static void jsf32_init(struct jsf_rng *x, uint32_t seed) {
+ size_t i;
+ x->a = 0xf1ea5eed;
+ x->b = x->c = x->d = seed;
+ for (i = 0; i < 20; ++i) {
+ jsf32(x);
+ }
+}
+
+
+static void test_lzxpress_plain_long_gpl_round_trip(void **state)
+{
+ /*
+ * We use a kind of model-free Markov model to generate a massively
+ * extended pastiche of the GPLv3 (chosen because it is right there in
+ * "COPYING" and won't change often).
+ *
+ * The point is to check a round trip of a very long message with
+ * multiple repetitions on many scales, without having to add a very
+ * large file.
+ */
+ size_t i, j, k;
+ uint8_t c;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING");
+ DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
+ DATA_BLOB ref = {0};
+ ssize_t comp_size;
+ struct jsf_rng rng;
+
+
+ jsf32_init(&rng, 1);
+
+ j = 1;
+ original.data[0] = gpl.data[0];
+ for (i = 1; i < original.length; i++) {
+ size_t m;
+ char p = original.data[i - 1];
+ c = gpl.data[j];
+ original.data[i] = c;
+ j++;
+ m = (j + jsf32(&rng)) % (gpl.length - 50);
+ for (k = m; k < m + 30; k++) {
+ if (p == gpl.data[k] &&
+ c == gpl.data[k + 1]) {
+ j = k + 2;
+ break;
+ }
+ }
+ if (j == gpl.length) {
+ j = 1;
+ }
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref);
+ assert_true(comp_size > 0);
+ assert_true(comp_size < original.length);
+
+ talloc_free(mem_ctx);
+}
+
+
+static void test_lzxpress_plain_long_random_graph_round_trip(void **state)
+{
+ size_t i;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
+ DATA_BLOB ref = {0};
+ /*
+ * There's a random trigram graph, with each pair of sequential bytes
+ * pointing to a successor. This would probably fall into a fairly
+ * simple loop, but we introduce damage into the system, randomly
+ * flipping about 1 bit in 64.
+ *
+ * The result is semi-structured and compressable.
+ */
+ uint8_t *d = original.data;
+ uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
+ uint32_t *table32 = (void*)table;
+ ssize_t comp_size;
+ struct jsf_rng rng;
+
+ jsf32_init(&rng, 1);
+ for (i = 0; i < (65536 / 4); i++) {
+ table32[i] = jsf32(&rng);
+ }
+
+ d[0] = 'a';
+ d[1] = 'b';
+
+ for (i = 2; i < original.length; i++) {
+ uint16_t k = (d[i - 2] << 8) | d[i - 1];
+ uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng);
+ damage &= (damage >> 16);
+ k ^= damage & 0xffff;
+ d[i] = table[k];
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref);
+ assert_true(comp_size > 0);
+ assert_true(comp_size < original.length);
+
+ talloc_free(mem_ctx);
+}
+
+
+static void test_lzxpress_plain_chaos_graph_round_trip(void **state)
+{
+ size_t i;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
+ DATA_BLOB ref = {0};
+ /*
+ * There's a random trigram graph, with each pair of sequential bytes
+ * pointing to a successor. This would probably fall into a fairly
+ * simple loop, but we keep changing the graph. The result is long
+ * periods of stability separatd by bursts of noise.
+ */
+ uint8_t *d = original.data;
+ uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
+ uint32_t *table32 = (void*)table;
+ ssize_t comp_size;
+ struct jsf_rng rng;
+
+ jsf32_init(&rng, 1);
+ for (i = 0; i < (65536 / 4); i++) {
+ table32[i] = jsf32(&rng);
+ }
+
+ d[0] = 'a';
+ d[1] = 'b';
+
+ for (i = 2; i < original.length; i++) {
+ uint16_t k = (d[i - 2] << 8) | d[i - 1];
+ uint32_t damage = jsf32(&rng);
+ d[i] = table[k];
+ if ((damage >> 29) == 0) {
+ uint16_t index = damage & 0xffff;
+ uint8_t value = (damage >> 16) & 0xff;
+ table[index] = value;
+ }
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref);
+ assert_true(comp_size > 0);
+ assert_true(comp_size < original.length);
+
+ talloc_free(mem_ctx);
+}
+
+
+static void test_lzxpress_plain_sparse_random_graph_round_trip(void **state)
+{
+ size_t i;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
+ DATA_BLOB ref = {0};
+ /*
+ * There's a random trigram graph, with each pair of sequential bytes
+ * pointing to a successor. This will fall into a fairly simple loops,
+ * but we introduce damage into the system, randomly mangling about 1
+ * byte in 65536.
+ *
+ * The result has very long repetitive runs, which should lead to
+ * oversized blocks.
+ */
+ uint8_t *d = original.data;
+ uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
+ uint32_t *table32 = (void*)table;
+ ssize_t comp_size;
+ struct jsf_rng rng;
+
+ jsf32_init(&rng, 3);
+ for (i = 0; i < (65536 / 4); i++) {
+ table32[i] = jsf32(&rng);
+ }
+
+ d[0] = 'a';
+ d[1] = 'b';
+
+ for (i = 2; i < original.length; i++) {
+ uint16_t k = (d[i - 2] << 8) | d[i - 1];
+ uint32_t damage = jsf32(&rng);
+ if ((damage & 0xffff0000) == 0) {
+ k ^= damage & 0xffff;
+ }
+ d[i] = table[k];
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref);
+ assert_true(comp_size > 0);
+ assert_true(comp_size < original.length);
+
+ talloc_free(mem_ctx);
+}
+
+
+static void test_lzxpress_plain_random_noise_round_trip(void **state)
+{
+ size_t i;
+ size_t len = 10 * 1024 * 1024;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
+ DATA_BLOB ref = {0};
+ ssize_t comp_size;
+ /*
+ * We are filling this up with incompressible noise, but we can assert
+ * quite tight bounds on how badly it will fail to compress.
+ *
+ * There is one additional bit for each code, which says whether the
+ * code is a literal byte or a match. If *all* codes are literal
+ * bytes, the length should be 9/8 the original (with rounding
+ * issues regarding the indicator bit blocks).
+ *
+ * If some matches are found the length will be a bit less. We would
+ * expect one 3 byte match per 1 << 24 tries, but we try 8192 times
+ * per position. That means there'll a match 1/2048 of the time at
+ * best. 255 times out of 256 this will be exactly a 3 byte match,
+ * encoded as two bytes, so we could get a 1 / 2048 saving on top of
+ * the 1/8 cost. There'll be a smattering of longer matches too, and
+ * the potential for complicated maths to account for those, but we'll
+ * skimp on that by allowing for a 1/1500 saving.
+ *
+ * With the hash table, we take a shortcut in the "8192 tries", and
+ * the size of the table makes a difference in how we perform, with 13
+ * bits (8192 slots) naturally being luckier than 12. Ultimately,
+ * either way, the compressed file is still 12.5% bigger than the
+ * original.
+ */
+ size_t limit = len * 9 / 8 + 4;
+
+ uint32_t *d32 = (uint32_t*)((void*)original.data);
+ struct jsf_rng rng;
+ jsf32_init(&rng, 2);
+
+ for (i = 0; i < (len / 4); i++) {
+ d32[i] = jsf32(&rng);
+ }
+
+ comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref);
+ debug_message("original size %zu; compressed size %zd; ratio %.5f\n",
+ len, comp_size, ((double)comp_size) / len);
+ debug_message("expected range %zu - %zu\n",
+ limit - limit / 1500, limit);
+
+ assert_true(comp_size > 0);
+ assert_true(comp_size < limit);
+ assert_true(comp_size >= limit - limit / 1500);
+ talloc_free(mem_ctx);
+}
+
+
/* Tests based on [MS-XCA] 3.1 Examples */
static void test_msft_data1(void **state)
{
@@ -414,6 +1154,15 @@ static void test_lzxpress_round_trip(void **state)
int main(void)
{
const struct CMUnitTest tests[] = {
+ cmocka_unit_test(test_lzxpress_plain_decompress_files),
+ cmocka_unit_test(test_lzxpress_plain_decompress_more_compressed_files),
+ cmocka_unit_test(test_lzxpress_plain_round_trip_files),
+ cmocka_unit_test(test_lzxpress_plain_long_gpl_round_trip),
+ cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip),
+ cmocka_unit_test(test_lzxpress_plain_chaos_graph_round_trip),
+ cmocka_unit_test(test_lzxpress_plain_sparse_random_graph_round_trip),
+ cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip),
+ cmocka_unit_test(test_lzxpress_plain_random_noise_round_trip),
cmocka_unit_test(test_lzxpress),
cmocka_unit_test(test_msft_data1),
cmocka_unit_test(test_msft_data2),