summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYugo Horie <u5.horie@gmail.com>2023-02-23 08:09:50 +0900
committerYugo Horie <u5.horie@gmail.com>2023-02-23 08:09:50 +0900
commit2c5fccd4693c0a68e1c72d65e016ba83e861120e (patch)
tree77acffd59f36faf194f97348f3d8de2cedb7a8a2
parent4ace957c4e08bcbf9ef5e9f83b8e43458bead77f (diff)
downloadnginx-2c5fccd4693c0a68e1c72d65e016ba83e861120e.tar.gz
Core: stricter UTF-8 handling in ngx_utf8_decode().
An UTF-8 octet sequence cannot start with a 11111xxx byte (above 0xf8), see https://datatracker.ietf.org/doc/html/rfc3629#section-3. Previously, such bytes were accepted by ngx_utf8_decode() and misinterpreted as 11110xxx bytes (as in a 4-byte sequence). While unlikely, this can potentially cause issues. Fix is to explicitly reject such bytes in ngx_utf8_decode().
-rw-r--r--src/core/ngx_string.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/core/ngx_string.c b/src/core/ngx_string.c
index 98f270aca..f8f738472 100644
--- a/src/core/ngx_string.c
+++ b/src/core/ngx_string.c
@@ -1364,7 +1364,12 @@ ngx_utf8_decode(u_char **p, size_t n)
u = **p;
- if (u >= 0xf0) {
+ if (u >= 0xf8) {
+
+ (*p)++;
+ return 0xffffffff;
+
+ } else if (u >= 0xf0) {
u &= 0x07;
valid = 0xffff;