diff options
author | Douglas Bagnall <douglas.bagnall@catalyst.net.nz> | 2021-04-08 21:18:46 +1200 |
---|---|---|
committer | Jeremy Allison <jra@samba.org> | 2021-06-18 03:39:28 +0000 |
commit | 1ea1816629104e16d9b180bee8f4ccc6292869ed (patch) | |
tree | 80907e70703c0849a69981d57ea10b0913b2cddf /lib | |
parent | 50047588c0c8da2e1ffa0b08a8dc5d31e49f6a3b (diff) | |
download | samba-1ea1816629104e16d9b180bee8f4ccc6292869ed.tar.gz |
util/iconv: reject improperly packed UTF-8
If we allow a string that encodes say '\0' as a multi-byte sequence,
we are open to confusion where we mix NUL terminated strings with
sized data blobs, which is to say EVERYWHERE.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14684
Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Jeremy Allison <jra@samba.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/util/charset/iconv.c | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c index 1f2d49c0e27..43b3306b0de 100644 --- a/lib/util/charset/iconv.c +++ b/lib/util/charset/iconv.c @@ -832,6 +832,11 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft, } uc[1] = (c[0]>>2) & 0x7; uc[0] = (c[0]<<6) | (c[1]&0x3f); + if (uc[1] == 0 && uc[0] < 0x80) { + /* this should have been a single byte */ + errno = EILSEQ; + goto error; + } c += 2; in_left -= 2; out_left -= 2; @@ -840,14 +845,24 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft, } if ((c[0] & 0xf0) == 0xe0) { + unsigned int codepoint; if (in_left < 3 || (c[1] & 0xc0) != 0x80 || (c[2] & 0xc0) != 0x80) { errno = EILSEQ; goto error; } - uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF); - uc[0] = (c[1]<<6) | (c[2]&0x3f); + codepoint = ((c[2] & 0x3f) | + ((c[1] & 0x3f) << 6) | + ((c[0] & 0x0f) << 12)); + + if (codepoint < 0x800) { + /* this should be a 1 or 2 byte sequence */ + errno = EILSEQ; + goto error; + } + uc[0] = codepoint & 0xff; + uc[1] = codepoint >> 8; c += 3; in_left -= 3; out_left -= 2; @@ -870,15 +885,10 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft, ((c[1]&0x3f)<<12) | ((c[0]&0x7)<<18); if (codepoint < 0x10000) { - /* accept UTF-8 characters that are not - minimally packed, but pack the result */ - uc[0] = (codepoint & 0xFF); - uc[1] = (codepoint >> 8); - c += 4; - in_left -= 4; - out_left -= 2; - uc += 2; - continue; + /* reject UTF-8 characters that are not + minimally packed */ + errno = EILSEQ; + goto error; } codepoint -= 0x10000; |