summaryrefslogtreecommitdiff
path: root/src/include/mb
diff options
context:
space:
mode:
authorJohn Naylor <john.naylor@postgresql.org>2022-08-26 15:01:24 +0700
committerJohn Naylor <john.naylor@postgresql.org>2022-08-26 15:48:49 +0700
commit121d2d3d70ecdb2113b340c5f3b99a61341291af (patch)
tree2fb2e21869ca777cb79a3a9282f0e5096ca409f6 /src/include/mb
parentab9717847a2b0c32b93121f873f4dff331e26eb0 (diff)
downloadpostgresql-121d2d3d70ecdb2113b340c5f3b99a61341291af.tar.gz
Use SSE2 in is_valid_ascii() where available.
Per flame graph from Jelte Fennema, COPY FROM ... USING BINARY shows input validation taking at least 5% of the profile, so it's worth trying to be more efficient here. With this change, validation of pure ASCII is nearly 40% faster on contemporary Intel hardware. To make this change legible and easier to adopt to additional architectures, use helper functions to abstract the platform details away. Reviewed by Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsG%3Dk8t%3DC457FXnoBXb%3D8iA4OaZkbFogFMachWif7mNnww%40mail.gmail.com
Diffstat (limited to 'src/include/mb')
-rw-r--r--src/include/mb/pg_wchar.h38
1 files changed, 27 insertions, 11 deletions
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 011b0b3abd..1e8c3af360 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -19,6 +19,8 @@
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
+#include "port/simd.h"
+
/*
* The pg_wchar type
*/
@@ -704,25 +706,28 @@ extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
* Verify a chunk of bytes for valid ASCII.
*
* Returns false if the input contains any zero bytes or bytes with the
- * high-bit set. Input len must be a multiple of 8.
+ * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
*/
static inline bool
is_valid_ascii(const unsigned char *s, int len)
{
const unsigned char *const s_end = s + len;
- uint64 chunk,
- highbit_cum = UINT64CONST(0),
- zero_cum = UINT64CONST(0x8080808080808080);
+ Vector8 chunk;
+ Vector8 highbit_cum = vector8_broadcast(0);
+#ifdef USE_NO_SIMD
+ Vector8 zero_cum = vector8_broadcast(0x80);
+#endif
Assert(len % sizeof(chunk) == 0);
while (s < s_end)
{
- memcpy(&chunk, s, sizeof(chunk));
+ vector8_load(&chunk, s);
+
+ /* Capture any zero bytes in this chunk. */
+#ifdef USE_NO_SIMD
/*
- * Capture any zero bytes in this chunk.
- *
* First, add 0x7f to each byte. This sets the high bit in each byte,
* unless it was a zero. If any resulting high bits are zero, the
* corresponding high bits in the zero accumulator will be cleared.
@@ -733,21 +738,32 @@ is_valid_ascii(const unsigned char *s, int len)
* any input bytes did have the high bit set, it doesn't matter
* because we check for those separately.
*/
- zero_cum &= (chunk + UINT64CONST(0x7f7f7f7f7f7f7f7f));
+ zero_cum &= (chunk + vector8_broadcast(0x7F));
+#else
+
+ /*
+ * Set all bits in each lane of the highbit accumulator where input
+ * bytes are zero.
+ */
+ highbit_cum = vector8_or(highbit_cum,
+ vector8_eq(chunk, vector8_broadcast(0)));
+#endif
/* Capture all set bits in this chunk. */
- highbit_cum |= chunk;
+ highbit_cum = vector8_or(highbit_cum, chunk);
s += sizeof(chunk);
}
/* Check if any high bits in the high bit accumulator got set. */
- if (highbit_cum & UINT64CONST(0x8080808080808080))
+ if (vector8_is_highbit_set(highbit_cum))
return false;
+#ifdef USE_NO_SIMD
/* Check if any high bits in the zero accumulator got cleared. */
- if (zero_cum != UINT64CONST(0x8080808080808080))
+ if (zero_cum != vector8_broadcast(0x80))
return false;
+#endif
return true;
}