summaryrefslogtreecommitdiff
path: root/lib/compression
diff options
context:
space:
mode:
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>2022-11-25 12:38:11 +1300
committerJoseph Sutton <jsutton@samba.org>2022-12-01 22:56:40 +0000
commitcaa643e36e671be9cb446afc99dfae3003aa8c6e (patch)
tree65c4ab9727b48ac44982aa1c784bc079cfbfa899 /lib/compression
parentfb35cf29a426ee2cb0ee280e147627fd3e84a71d (diff)
downloadsamba-caa643e36e671be9cb446afc99dfae3003aa8c6e.tar.gz
lib/compression/lzxpress: shift encoding into helper functions
This makes it easier to rework the encoding decision to depend on a hash table match rather than the current exhaustive search. Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
Diffstat (limited to 'lib/compression')
-rw-r--r--lib/compression/lzxpress.c178
1 files changed, 104 insertions, 74 deletions
diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c
index 7a1bb670bbc..da285898d6f 100644
--- a/lib/compression/lzxpress.c
+++ b/lib/compression/lzxpress.c
@@ -58,10 +58,102 @@ struct write_context {
uint32_t nibble_index;
};
+struct match {
+ const uint8_t *there;
+ uint32_t length;
+};
+
#define CHECK_INPUT_BYTES(__needed) \
__CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
+
+
+static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
+{
+ wc->indic = (wc->indic << 1) | bit;
+ wc->indic_bit += 1;
+
+ if (wc->indic_bit == 32) {
+ PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
+ wc->indic_bit = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ wc->indic_pos = wc->compressed_pos;
+ wc->compressed_pos += sizeof(uint32_t);
+ }
+ return wc->indic_pos;
+}
+
+
+static ssize_t encode_match(struct write_context *wc,
+ struct match match,
+ const uint8_t *here)
+{
+ uint32_t match_len = match.length - 3;
+ uint32_t best_offset = here - match.there - 1;
+ uint16_t metadata;
+
+ if (best_offset > 8191) {
+ return -1;
+ }
+
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
+ PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
+ wc->compressed_pos += sizeof(uint16_t);
+
+ if (match_len >= 7) {
+ match_len -= 7;
+
+ if (wc->nibble_index == 0) {
+ wc->nibble_index = wc->compressed_pos;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ wc->compressed[wc->nibble_index] = MIN(match_len, 15);
+ wc->compressed_pos += sizeof(uint8_t);
+ } else {
+ wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
+ wc->nibble_index = 0;
+ }
+
+ if (match_len >= 15) {
+ match_len -= 15;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
+ wc->compressed_pos += sizeof(uint8_t);
+
+ if (match_len >= 255) {
+ /* Additional match_len */
+
+ match_len += 7 + 15;
+
+ if (match_len < (1 << 16)) {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ PUSH_LE_U16(wc->compressed, wc->compressed_pos,
+ match_len);
+ wc->compressed_pos += sizeof(uint16_t);
+ } else {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
+ sizeof(uint32_t));
+ PUSH_LE_U16(wc->compressed,
+ wc->compressed_pos, 0);
+ wc->compressed_pos += sizeof(uint16_t);
+
+ PUSH_LE_U32(wc->compressed,
+ wc->compressed_pos,
+ match_len);
+ wc->compressed_pos += sizeof(uint32_t);
+ }
+ }
+ }
+ }
+ return push_indicator_bit(wc, 1);
+}
+
+#undef CHECK_OUTPUT_BYTES
+#define CHECK_OUTPUT_BYTES(__needed) \
__CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
@@ -80,6 +172,7 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed,
* the match length; they are always at least 16 bits long, and can
* implicitly use unused half-bytes from earlier in the stream.
*/
+ ssize_t ret;
uint32_t uncompressed_pos;
struct write_context wc = {
.indic = 0,
@@ -147,83 +240,20 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed,
CHECK_OUTPUT_BYTES(sizeof(uint8_t));
wc.compressed[wc.compressed_pos++] = uncompressed[uncompressed_pos++];
- wc.indic <<= 1;
- wc.indic_bit += 1;
-
- if (wc.indic_bit == 32) {
- PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
- wc.indic_bit = 0;
- CHECK_OUTPUT_BYTES(sizeof(uint32_t));
- wc.indic_pos = wc.compressed_pos;
- wc.compressed_pos += sizeof(uint32_t);
+ ret = push_indicator_bit(&wc, 0);
+ if (ret < 0) {
+ return ret;
}
} else {
- uint32_t match_len = best_len;
-
- uint16_t metadata;
-
- match_len -= 3;
- best_offset -= 1;
-
- /* Classical meta-data */
- CHECK_OUTPUT_BYTES(sizeof(uint16_t));
- metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
- PUSH_LE_U16(wc.compressed, wc.compressed_pos, metadata);
- wc.compressed_pos += sizeof(uint16_t);
-
- if (match_len >= 7) {
- match_len -= 7;
-
- if (!wc.nibble_index) {
- wc.nibble_index = wc.compressed_pos;
-
- CHECK_OUTPUT_BYTES(sizeof(uint8_t));
- wc.compressed[wc.nibble_index] = MIN(match_len, 15);
- wc.compressed_pos += sizeof(uint8_t);
- } else {
- wc.compressed[wc.nibble_index] |= MIN(match_len, 15) << 4;
- wc.nibble_index = 0;
- }
-
- if (match_len >= 15) {
- match_len -= 15;
-
- CHECK_OUTPUT_BYTES(sizeof(uint8_t));
- wc.compressed[wc.compressed_pos] = MIN(match_len, 255);
- wc.compressed_pos += sizeof(uint8_t);
-
- if (match_len >= 255) {
- /* Additional match_len */
-
- match_len += 7 + 15;
-
- if (match_len < (1 << 16)) {
- CHECK_OUTPUT_BYTES(sizeof(uint16_t));
- PUSH_LE_U16(wc.compressed, wc.compressed_pos, match_len);
- wc.compressed_pos += sizeof(uint16_t);
- } else {
- CHECK_OUTPUT_BYTES(sizeof(uint16_t) + sizeof(uint32_t));
- PUSH_LE_U16(wc.compressed, wc.compressed_pos, 0);
- wc.compressed_pos += sizeof(uint16_t);
-
- PUSH_LE_U32(wc.compressed, wc.compressed_pos, match_len);
- wc.compressed_pos += sizeof(uint32_t);
- }
- }
- }
+ const uint8_t *here = uncompressed + uncompressed_pos;
+ struct match match = {
+ .there = here - best_offset,
+ .length = best_len
+ };
+ ret = encode_match(&wc, match, here);
+ if (ret < 0) {
+ return ret;
}
-
- wc.indic = (wc.indic << 1) | 1;
- wc.indic_bit += 1;
-
- if (wc.indic_bit == 32) {
- PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
- wc.indic_bit = 0;
- CHECK_OUTPUT_BYTES(sizeof(uint32_t));
- wc.indic_pos = wc.compressed_pos;
- wc.compressed_pos += sizeof(uint32_t);
- }
-
uncompressed_pos += best_len;
}
}