summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--snappy.cc50
1 files changed, 41 insertions, 9 deletions
diff --git a/snappy.cc b/snappy.cc
index 250e543..d414718 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -29,7 +29,6 @@
#include "snappy-internal.h"
#include "snappy-sinksource.h"
#include "snappy.h"
-
#if !defined(SNAPPY_HAVE_BMI2)
// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
// specifically, but it does define __AVX2__ when AVX2 support is available.
@@ -1085,6 +1084,18 @@ void MemCopy64(ptrdiff_t dst, const void* src, size_t size) {
(void)size;
}
+void ClearDeferred(const void** deferred_src, size_t* deferred_length,
+ uint8_t* safe_source) {
+ *deferred_src = safe_source;
+ *deferred_length = 0;
+}
+
+void DeferMemCopy(const void** deferred_src, size_t* deferred_length,
+ const void* src, size_t length) {
+ *deferred_src = src;
+ *deferred_length = length;
+}
+
SNAPPY_ATTRIBUTE_ALWAYS_INLINE
inline size_t AdvanceToNextTagARMOptimized(const uint8_t** ip_p, size_t* tag) {
const uint8_t*& ip = *ip_p;
@@ -1189,6 +1200,12 @@ template <typename T>
std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
const uint8_t* ip, const uint8_t* ip_limit, ptrdiff_t op, T op_base,
ptrdiff_t op_limit_min_slop) {
+ // If deferred_src is invalid point it here.
+ uint8_t safe_source[64];
+ const void* deferred_src;
+ size_t deferred_length;
+ ClearDeferred(&deferred_src, &deferred_length, safe_source);
+
// We unroll the inner loop twice so we need twice the spare room.
op_limit_min_slop -= kSlopBytes;
if (2 * (kSlopBytes + 1) < ip_limit - ip && op < op_limit_min_slop) {
@@ -1211,7 +1228,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
// twice reduces the amount of instructions checking limits and also
// leads to reduced mov's.
- SNAPPY_PREFETCH(ip+128);
+ SNAPPY_PREFETCH(ip + 128);
for (int i = 0; i < 2; i++) {
const uint8_t* old_ip = ip;
assert(tag == ip[-1]);
@@ -1238,23 +1255,29 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
}
// Only copy-1 or copy-2 tags can get here.
assert(tag_type == 1 || tag_type == 2);
- std::ptrdiff_t delta = op + len_min_offset - len;
+ std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
// Guard against copies before the buffer start.
+ // Execute any deferred MemCopy since we write to dst here.
+ MemCopy64(op_base + op, deferred_src, deferred_length);
+ op += deferred_length;
+ ClearDeferred(&deferred_src, &deferred_length, safe_source);
if (SNAPPY_PREDICT_FALSE(delta < 0 ||
!Copy64BytesWithPatternExtension(
op_base + op, len - len_min_offset))) {
goto break_loop;
}
+ // We aren't deferring this copy so add length right away.
op += len;
continue;
}
- std::ptrdiff_t delta = op + len_min_offset - len;
+ std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
if (SNAPPY_PREDICT_FALSE(delta < 0)) {
// Due to the spurious offset in literals have this will trigger
// at the start of a block when op is still smaller than 256.
if (tag_type != 0) goto break_loop;
- MemCopy64(op_base + op, old_ip, len);
- op += len;
+ MemCopy64(op_base + op, deferred_src, deferred_length);
+ op += deferred_length;
+ DeferMemCopy(&deferred_src, &deferred_length, old_ip, len);
continue;
}
@@ -1262,14 +1285,23 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
// we need to copy from ip instead of from the stream.
const void* from =
tag_type ? reinterpret_cast<void*>(op_base + delta) : old_ip;
- MemCopy64(op_base + op, from, len);
- op += len;
+ MemCopy64(op_base + op, deferred_src, deferred_length);
+ op += deferred_length;
+ DeferMemCopy(&deferred_src, &deferred_length, from, len);
}
- } while (ip < ip_limit_min_slop && op < op_limit_min_slop);
+ } while (ip < ip_limit_min_slop &&
+ (op + deferred_length) < op_limit_min_slop);
exit:
ip--;
assert(ip <= ip_limit);
}
+ // If we deferred a copy then we can perform. If we are up to date then we
+ // might not have enough slop bytes and could run past the end.
+ if (deferred_length) {
+ MemCopy64(op_base + op, deferred_src, deferred_length);
+ op += deferred_length;
+ ClearDeferred(&deferred_src, &deferred_length, safe_source);
+ }
return {ip, op};
}