summaryrefslogtreecommitdiff
path: root/lib/compression
diff options
context:
space:
mode:
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>2022-11-18 15:54:37 +1300
committerJoseph Sutton <jsutton@samba.org>2022-12-01 22:56:39 +0000
commit77048aaa61eaf29934cb9446fc552b0c44431f76 (patch)
treee2f20c001c7f97ad8dc035e19d94b3f76112c39e /lib/compression
parent955214ef6ec0015d8c1e1f8a43cacdf239b4d253 (diff)
downloadsamba-77048aaa61eaf29934cb9446fc552b0c44431f76.tar.gz
lib/compression: debug routines for lzxpress-huffman
If you need to see a Huffman tree (and sometimes you do), set DEBUG_HUFFMAN_TREE to true at the top of lzxpress_huffman.c, and run: make bin/test_lzx_huffman && bin/test_lzx_huffman Actually, that will show you hundreds of trees, and you'll be glad of that if you are ever trying to understand this. Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
Diffstat (limited to 'lib/compression')
-rw-r--r--lib/compression/lzxpress_huffman.c250
1 files changed, 249 insertions, 1 deletions
diff --git a/lib/compression/lzxpress_huffman.c b/lib/compression/lzxpress_huffman.c
index ef64ea7021f..990552a7d09 100644
--- a/lib/compression/lzxpress_huffman.c
+++ b/lib/compression/lzxpress_huffman.c
@@ -28,6 +28,7 @@
#include "replace.h"
#include "lzxpress_huffman.h"
#include "lib/util/stable_sort.h"
+#include "lib/util/debug.h"
#include "lib/util/byteorder.h"
#include "lib/util/bytearray.h"
@@ -41,6 +42,24 @@
#define DEBUG_NO_LZ77_MATCHES false
#endif
+/*
+ * DEBUG_HUFFMAN_TREE forces the drawing of ascii art huffman trees during
+ * compression and decompression.
+ *
+ * These trees will also be drawn at DEBUG level 10, but that doesn't work
+ * with cmocka tests.
+ */
+#ifndef DEBUG_HUFFMAN_TREE
+#define DEBUG_HUFFMAN_TREE false
+#endif
+
+#if DEBUG_HUFFMAN_TREE
+#define DBG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define DBG(...) DBG_INFO(__VA_ARGS__)
+#endif
+
+
#define LZXPRESS_ERROR -1LL
/*
@@ -154,6 +173,182 @@ static inline uint16_t encode_match(size_t len, size_t offset)
return code;
}
+/*
+ * debug_huffman_tree() uses debug_huffman_tree_print() to draw the Huffman
+ * tree in ascii art.
+ *
+ * Note that the Huffman tree is probably not the same as that implied by the
+ * canonical Huffman encoding that is finally used. That tree would be the
+ * same shape, but with the left and right toggled to sort the branches by
+ * length, after which the symbols for each length sorted by value.
+ */
+
+static void debug_huffman_tree_print(struct huffman_node *node,
+ int *trail, int depth)
+{
+ if (node->left == NULL) {
+ /* time to print a row */
+ int j;
+ bool branched = false;
+ int row[17];
+ char c[100];
+ int s = node->symbol;
+ char code[17];
+ if (depth > 15) {
+ fprintf(stderr,
+ " \033[1;31m Max depth exceeded! (%d)\033[0m "
+ " symbol %#3x claimed depth %d count %d\n",
+ depth, node->symbol, node->depth, node->count);
+ return;
+ }
+ for (j = depth - 1; j >= 0; j--) {
+ if (branched) {
+ if (trail[j] == -1) {
+ row[j] = -3;
+ } else {
+ row[j] = -2;
+ }
+ } else if (trail[j] == -1) {
+ row[j] = -1;
+ branched = true;
+ } else {
+ row[j] = trail[j];
+ }
+ }
+ for (j = 0; j < depth; j++) {
+ switch (row[j]) {
+ case -3:
+ code[j] = '1';
+ fprintf(stderr, " ");
+ break;
+ case -2:
+ code[j] = '0';
+ fprintf(stderr, " │ ");
+ break;
+ case -1:
+ code[j] = '1';
+ fprintf(stderr, " ╰─");
+ break;
+ default:
+ code[j] = '0';
+ fprintf(stderr, "%5d─┬─", row[j]);
+ break;
+ }
+ }
+ code[depth] = 0;
+ if (s < 32) {
+ snprintf(c, sizeof(c),
+ "\033[1;32m%02x\033[0m \033[1;33m%c%c%c\033[0m",
+ s,
+ 0xE2, 0x90, 0x80 + s); /* utf-8 for symbol */
+ } else if (s < 127) {
+ snprintf(c, sizeof(c),
+ "\033[1;32m%2x\033[0m '\033[10;32m%c\033[0m'",
+ s, s);
+ } else if (s < 256) {
+ snprintf(c, sizeof(c), "\033[1;32m%2x\033[0m", s);
+ } else {
+ uint16_t len = (s & 15) + 3;
+ uint16_t dbits = ((s >> 4) & 15) + 1;
+ snprintf(c, sizeof(c),
+ " \033[0;33mlen:%2d%s, "
+ "dist:%d-%d \033[0m \033[1;32m%3x\033[0m%s",
+ len,
+ len == 18 ? "+" : "",
+ 1 << (dbits - 1),
+ (1 << dbits) - 1,
+ s,
+ s == 256 ? " \033[1;31mEOF\033[0m" : "");
+
+ }
+
+ fprintf(stderr, "──%5d %s \033[2;37m%s\033[0m\n",
+ node->count, c, code);
+ return;
+ }
+ trail[depth] = node->count;
+ debug_huffman_tree_print(node->left, trail, depth + 1);
+ trail[depth] = -1;
+ debug_huffman_tree_print(node->right, trail, depth + 1);
+}
+
+
+/*
+ * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree()
+ * will print a tree looking something like this:
+ *
+ * 7─┬─── 3 len:18+, dist:1-1 10f 0
+ * ╰─ 4─┬─ 2─┬─── 1 61 'a' 100
+ * │ ╰─── 1 62 'b' 101
+ * ╰─ 2─┬─── 1 63 'c' 110
+ * ╰─── 1 len: 3, dist:1-1 100 EOF 111
+ *
+ * This is based off a Huffman root node, and the tree may not be the same as
+ * the canonical tree.
+ */
+static void debug_huffman_tree(struct huffman_node *root)
+{
+ int trail[17];
+ debug_huffman_tree_print(root, trail, 0);
+}
+
+
+/*
+ * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree_from_table()
+ * will print something like this based on a decoding symbol table.
+ *
+ * Tree from decoding table 9 nodes → 5 codes
+ * 10000─┬─── 5000 len:18+, dist:1-1 10f 0
+ * ╰─ 5000─┬─ 2500─┬─── 1250 61 'a' 100
+ * │ ╰─── 1250 62 'b' 101
+ * ╰─ 2500─┬─── 1250 63 'c' 110
+ * ╰─── 1250 len: 3, dist:1-1 100 EOF 111
+ *
+ * This is the canonical form of the Huffman tree where the actual counts
+ * aren't known (we use "10000" to help indicate relative frequencies).
+ */
+static void debug_huffman_tree_from_table(uint16_t *table)
+{
+ int trail[17];
+ struct huffman_node nodes[1024] = {{0}};
+ uint16_t codes[1024];
+ size_t n = 1;
+ size_t i = 0;
+ codes[0] = 0;
+ nodes[0].count = 10000;
+
+ while (i < n) {
+ uint16_t index = codes[i];
+ struct huffman_node *node = &nodes[i];
+ if (table[index] == 0xffff) {
+ /* internal node */
+ index <<= 1;
+ /* left */
+ index++;
+ codes[n] = index;
+ node->left = nodes + n;
+ nodes[n].count = node->count >> 1;
+ n++;
+ /*right*/
+ index++;
+ codes[n] = index;
+ node->right = nodes + n;
+ nodes[n].count = node->count >> 1;
+ n++;
+ } else {
+ /* leaf node */
+ node->symbol = table[index] & 511;
+ }
+ i++;
+ }
+
+ fprintf(stderr,
+ "\033[1;34m Tree from decoding table\033[0m "
+ "%zu nodes → %zu codes\n",
+ n, (n + 1) / 2);
+ debug_huffman_tree_print(nodes, trail, 0);
+}
+
static bool depth_walk(struct huffman_node *n, uint32_t depth)
{
@@ -381,6 +576,9 @@ static int generate_huffman_codes(struct huffman_node *leaf_nodes,
return LZXPRESS_ERROR;
}
}
+ if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) {
+ debug_huffman_tree(huffman_root);
+ }
/*
* We have a tree, and need to turn it into a lookup table,
* and see if it is shallow enough (<= 15).
@@ -1160,6 +1358,51 @@ ssize_t lzxpress_huffman_compress(struct lzxhuff_compressor_mem *cmp_mem,
return cmp_ctx.output_pos;
}
+static void debug_tree_codes(struct bitstream *input)
+{
+ /*
+ */
+ size_t head = 0;
+ size_t tail = 2;
+ size_t ffff_count = 0;
+ struct q {
+ uint16_t tree_code;
+ uint16_t code_code;
+ };
+ struct q queue[65536];
+ char bits[17];
+ uint16_t *t = input->table;
+ queue[0].tree_code = 1;
+ queue[0].code_code = 2;
+ queue[1].tree_code = 2;
+ queue[1].code_code = 3;
+ while (head < tail) {
+ struct q q = queue[head];
+ uint16_t x = t[q.tree_code];
+ if (x != 0xffff) {
+ int k;
+ uint16_t j = q.code_code;
+ size_t offset = bitlen_nonzero_16(j) - 1;
+ for (k = 0; k <= offset; k++) {
+ bool b = (j >> (offset - k)) & 1;
+ bits[k] = b ? '1' : '0';
+ }
+ bits[k] = 0;
+ DBG("%03x %s\n", x & 511, bits);
+ head++;
+ continue;
+ }
+ ffff_count++;
+ queue[tail].tree_code = q.tree_code * 2 + 1;
+ queue[tail].code_code = q.code_code * 2;
+ tail++;
+ queue[tail].tree_code = q.tree_code * 2 + 1 + 1;
+ queue[tail].code_code = q.code_code * 2 + 1;
+ tail++;
+ head++;
+ }
+ DBG("0xffff count: %zu\n", ffff_count);
+}
/**
* Determines the sort order of one prefix_code_symbol relative to another
@@ -1350,6 +1593,9 @@ static bool fill_decomp_table(struct bitstream *input)
input->table[prefix] = 0xffff;
}
}
+ if (CHECK_DEBUGLVL(10)) {
+ debug_tree_codes(input);
+ }
/*
* check that the last code encodes 11111..., with right number of
@@ -1443,7 +1689,9 @@ static ssize_t lzx_huffman_decompress_block(struct bitstream *input,
if (! ok) {
return LZXPRESS_ERROR;
}
-
+ if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) {
+ debug_huffman_tree_from_table(input->table);
+ }
/*
* Always read 32 bits at the start, even if we don't need them.
*/