summaryrefslogtreecommitdiff
path: root/lib/util/charset
diff options
context:
space:
mode:
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>2021-04-08 21:18:46 +1200
committerJeremy Allison <jra@samba.org>2021-06-18 03:39:28 +0000
commit1ea1816629104e16d9b180bee8f4ccc6292869ed (patch)
tree80907e70703c0849a69981d57ea10b0913b2cddf /lib/util/charset
parent50047588c0c8da2e1ffa0b08a8dc5d31e49f6a3b (diff)
downloadsamba-1ea1816629104e16d9b180bee8f4ccc6292869ed.tar.gz
util/iconv: reject improperly packed UTF-8
If we allow a string that encodes say '\0' as a multi-byte sequence, we are open to confusion where we mix NUL terminated strings with sized data blobs, which is to say EVERYWHERE. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14684 Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz> Reviewed-by: Jeremy Allison <jra@samba.org>
Diffstat (limited to 'lib/util/charset')
-rw-r--r--lib/util/charset/iconv.c32
1 files changed, 21 insertions, 11 deletions
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c
index 1f2d49c0e27..43b3306b0de 100644
--- a/lib/util/charset/iconv.c
+++ b/lib/util/charset/iconv.c
@@ -832,6 +832,11 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
}
uc[1] = (c[0]>>2) & 0x7;
uc[0] = (c[0]<<6) | (c[1]&0x3f);
+ if (uc[1] == 0 && uc[0] < 0x80) {
+ /* this should have been a single byte */
+ errno = EILSEQ;
+ goto error;
+ }
c += 2;
in_left -= 2;
out_left -= 2;
@@ -840,14 +845,24 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
}
if ((c[0] & 0xf0) == 0xe0) {
+ unsigned int codepoint;
if (in_left < 3 ||
(c[1] & 0xc0) != 0x80 ||
(c[2] & 0xc0) != 0x80) {
errno = EILSEQ;
goto error;
}
- uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
- uc[0] = (c[1]<<6) | (c[2]&0x3f);
+ codepoint = ((c[2] & 0x3f) |
+ ((c[1] & 0x3f) << 6) |
+ ((c[0] & 0x0f) << 12));
+
+ if (codepoint < 0x800) {
+ /* this should be a 1 or 2 byte sequence */
+ errno = EILSEQ;
+ goto error;
+ }
+ uc[0] = codepoint & 0xff;
+ uc[1] = codepoint >> 8;
c += 3;
in_left -= 3;
out_left -= 2;
@@ -870,15 +885,10 @@ static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
((c[1]&0x3f)<<12) |
((c[0]&0x7)<<18);
if (codepoint < 0x10000) {
- /* accept UTF-8 characters that are not
- minimally packed, but pack the result */
- uc[0] = (codepoint & 0xFF);
- uc[1] = (codepoint >> 8);
- c += 4;
- in_left -= 4;
- out_left -= 2;
- uc += 2;
- continue;
+ /* reject UTF-8 characters that are not
+ minimally packed */
+ errno = EILSEQ;
+ goto error;
}
codepoint -= 0x10000;