diff options
-rw-r--r-- | cgpt/cgpt.h | 18 | ||||
-rw-r--r-- | cgpt/cgpt_common.c | 197 | ||||
-rw-r--r-- | cgpt/cmd_add.c | 7 | ||||
-rw-r--r-- | cgpt/cmd_find.c | 8 |
4 files changed, 31 insertions, 199 deletions
diff --git a/cgpt/cgpt.h b/cgpt/cgpt.h index 85702a4f..9b0805cf 100644 --- a/cgpt/cgpt.h +++ b/cgpt/cgpt.h @@ -81,22 +81,14 @@ int WritePMBR(struct drive *drive); /* Convert possibly unterminated UTF16 string to UTF8. * Caller must prepare enough space for UTF8, which could be up to - * twice the byte length of UTF16 string plus the terminating '\0'. - * - * Return: CGPT_OK --- all character are converted successfully. - * CGPT_FAILED --- convert error, i.e. output buffer is too short. + * twice the number of UTF16 chars plus the terminating '\0'. */ -int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, - uint8_t *utf8, unsigned int maxoutput); - +void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, + uint8_t *utf8, unsigned int maxoutput); /* Convert null-terminated UTF8 string to UTF16. - * Caller must prepare enough space for UTF16, which is the byte length of UTF8 - * plus the terminating 0x0000. - * - * Return: CGPT_OK --- all character are converted successfully. - * CGPT_FAILED --- convert error, i.e. output buffer is too short. + * Caller must prepare enough space for UTF16, including a terminating 0x0000 */ -int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput); +void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput); /* Helper functions for supported GPT types. */ int ResolveType(const Guid *type, char *buf); diff --git a/cgpt/cgpt_common.c b/cgpt/cgpt_common.c index 52cbe70c..0e466fdc 100644 --- a/cgpt/cgpt_common.c +++ b/cgpt/cgpt_common.c @@ -350,209 +350,56 @@ void GuidToStr(const Guid *guid, char *str, unsigned int buflen) { /* Convert possibly unterminated UTF16 string to UTF8. * Caller must prepare enough space for UTF8, which could be up to - * twice the byte length of UTF16 string plus the terminating '\0'. - * See the following table for encoding lengths. - * - * Code point UTF16 UTF8 - * 0x0000-0x007F 2 bytes 1 byte - * 0x0080-0x07FF 2 bytes 2 bytes - * 0x0800-0xFFFF 2 bytes 3 bytes - * 0x10000-0x10FFFF 4 bytes 4 bytes - * - * This function uses a simple state meachine to convert UTF-16 char(s) to - * a code point. Once a code point is parsed out, the state machine throws - * out sequencial UTF-8 chars in one time. - * - * Return: CGPT_OK --- all character are converted successfully. - * CGPT_FAILED --- convert error, i.e. output buffer is too short. + * twice the number of UTF16 chars plus the terminating '\0'. + * FIXME(wfrichar): The original implementation had security issues. As a + * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 + * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix + * this. */ -int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, - uint8_t *utf8, unsigned int maxoutput) +void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, + uint8_t *utf8, unsigned int maxoutput) { size_t s16idx, s8idx; - uint32_t code_point; - int code_point_ready = 1; // code point is ready to output. - int retval = CGPT_OK; + uint32_t utfchar; if (!utf16 || !maxinput || !utf8 || !maxoutput) - return CGPT_FAILED; + return; maxoutput--; /* plan for termination now */ for (s16idx = s8idx = 0; s16idx < maxinput && utf16[s16idx] && maxoutput; - s16idx++) { - uint16_t codeunit = le16toh(utf16[s16idx]); - - if (code_point_ready) { - if (codeunit >= 0xD800 && codeunit <= 0xDBFF) { - /* high surrogate, need the low surrogate. */ - code_point_ready = 0; - code_point = (codeunit & 0x03FF) + 0x0040; - } else { - /* BMP char, output it. */ - code_point = codeunit; - } - } else { - /* expect the low surrogate */ - if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) { - code_point = (code_point << 10) | (codeunit & 0x03FF); - code_point_ready = 1; - } else { - /* the second code unit is NOT the low surrogate. Unexpected. */ - retval = CGPT_FAILED; - break; - } - } - - /* If UTF code point is ready, output it. */ - if (code_point_ready) { - require(code_point <= 0x10FFFF); - if (code_point <= 0x7F && maxoutput >= 1) { - maxoutput -= 1; - utf8[s8idx++] = code_point & 0x7F; - } else if (code_point <= 0x7FF && maxoutput >= 2) { - maxoutput -= 2; - utf8[s8idx++] = 0xC0 | (code_point >> 6); - utf8[s8idx++] = 0x80 | (code_point & 0x3F); - } else if (code_point <= 0xFFFF && maxoutput >= 3) { - maxoutput -= 3; - utf8[s8idx++] = 0xE0 | (code_point >> 12); - utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); - utf8[s8idx++] = 0x80 | (code_point & 0x3F); - } else if (code_point <= 0x10FFFF && maxoutput >= 4) { - maxoutput -= 4; - utf8[s8idx++] = 0xF0 | (code_point >> 18); - utf8[s8idx++] = 0x80 | ((code_point >> 12) & 0x3F); - utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); - utf8[s8idx++] = 0x80 | (code_point & 0x3F); - } else { - /* buffer underrun */ - retval = CGPT_FAILED; - break; - } - } + s16idx++, maxoutput--) { + utfchar = le16toh(utf16[s16idx]); + utf8[s8idx++] = utfchar & 0x7F; } utf8[s8idx++] = 0; - return retval; } /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. * Caller must prepare enough space for UTF16, including a terminating 0x0000. - * See the following table for encoding lengths. In any case, the caller - * just needs to prepare the byte length of UTF8 plus the terminating 0x0000. - * - * Code point UTF16 UTF8 - * 0x0000-0x007F 2 bytes 1 byte - * 0x0080-0x07FF 2 bytes 2 bytes - * 0x0800-0xFFFF 2 bytes 3 bytes - * 0x10000-0x10FFFF 4 bytes 4 bytes - * - * This function converts UTF8 chars to a code point first. Then, convrts it - * to UTF16 code unit(s). - * - * Return: CGPT_OK --- all character are converted successfully. - * CGPT_FAILED --- convert error, i.e. output buffer is too short. + * FIXME(wfrichar): The original implementation had security issues. As a + * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 + * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix + * this. */ -int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) +void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) { size_t s16idx, s8idx; - uint32_t code_point = 0; - unsigned int expected_units = 1; - unsigned int decoded_units = 1; - int retval = CGPT_OK; + uint32_t utfchar; if (!utf8 || !utf16 || !maxoutput) - return CGPT_FAILED; + return; maxoutput--; /* plan for termination */ for (s8idx = s16idx = 0; utf8[s8idx] && maxoutput; - s8idx++) { - uint8_t code_unit; - code_unit = utf8[s8idx]; - - if (expected_units != decoded_units) { - /* Trailing bytes of multi-byte character */ - if ((code_unit & 0xC0) == 0x80) { - code_point = (code_point << 6) | (code_unit & 0x3F); - ++decoded_units; - } else { - /* Unexpected code unit. */ - retval = CGPT_FAILED; - break; - } - } else { - /* parsing a new code point. */ - decoded_units = 1; - if (code_unit <= 0x7F) { - code_point = code_unit; - expected_units = 1; - } else if (code_unit <= 0xBF) { - /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */ - retval = CGPT_FAILED; - break; - } else if (code_unit >= 0xC2 && code_unit <= 0xDF) { - code_point = code_unit & 0x1F; - expected_units = 2; - } else if (code_unit >= 0xE0 && code_unit <= 0xEF) { - code_point = code_unit & 0x0F; - expected_units = 3; - } else if (code_unit >= 0xF0 && code_unit <= 0xF4) { - code_point = code_unit & 0x07; - expected_units = 4; - } else { - /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */ - retval = CGPT_FAILED; - break; - } - } - - /* If no more unit is needed, output the UTF16 unit(s). */ - if (expected_units == decoded_units) { - /* Check if the encoding is the shortest possible UTF-8 sequence. */ - switch (expected_units) { - case 2: - if (code_point <= 0x7F) retval = CGPT_FAILED; - break; - case 3: - if (code_point <= 0x7FF) retval = CGPT_FAILED; - break; - case 4: - if (code_point <= 0xFFFF) retval = CGPT_FAILED; - break; - } - if (retval == CGPT_FAILED) break; /* leave immediately */ - - if ((code_point <= 0xD7FF) || - (code_point >= 0xE000 && code_point <= 0xFFFF)) { - utf16[s16idx++] = code_point; - maxoutput -= 1; - } else if (code_point >= 0x10000 && code_point <= 0x10FFFF && - maxoutput >= 2) { - utf16[s16idx++] = 0xD800 | ((code_point >> 10) - 0x0040); - utf16[s16idx++] = 0xDC00 | (code_point & 0x03FF); - maxoutput -= 2; - } else { - /* Three possibilities fall into here. Both are failure cases. - * a. surrogate pair (non-BMP characters; 0xD800~0xDFFF) - * b. invalid code point > 0x10FFFF - * c. buffer underrun - */ - retval = CGPT_FAILED; - break; - } - } + s8idx++, maxoutput--) { + utfchar = utf8[s8idx]; + utf16[s16idx++] = utfchar & 0x7F; } - - /* A null-terminator shows up before the UTF8 sequence ends. */ - if (expected_units != decoded_units) { - retval = CGPT_FAILED; - } - utf16[s16idx++] = 0; - return retval; } struct { diff --git a/cgpt/cmd_add.c b/cgpt/cmd_add.c index 81b0dfa1..dafcc50f 100644 --- a/cgpt/cmd_add.c +++ b/cgpt/cmd_add.c @@ -251,11 +251,8 @@ int cmd_add(int argc, char *argv[]) { if (set_unique) memcpy(&entry->unique, &unique_guid, sizeof(Guid)); if (label) { - if (CGPT_OK != UTF8ToUTF16((uint8_t *)label, entry->name, - sizeof(entry->name) / sizeof(entry->name[0]))) { - Error("The label cannot be converted to UTF16.\n"); - goto bad; - } + UTF8ToUTF16((uint8_t *)label, entry->name, + sizeof(entry->name) / sizeof(entry->name[0])); } if (set_raw) { entry->attrs.fields.gpt_att = raw_value; diff --git a/cgpt/cmd_find.c b/cgpt/cmd_find.c index 43438ef7..40f10ba0 100644 --- a/cgpt/cmd_find.c +++ b/cgpt/cmd_find.c @@ -181,12 +181,8 @@ static int do_search(char *filename) { (set_type && !memcmp(&type_guid, &entry->type, sizeof(Guid)))) { found = 1; } else if (set_label) { - if (CGPT_OK != UTF16ToUTF8(entry->name, - sizeof(entry->name) / sizeof(entry->name[0]), - (uint8_t *)partlabel, sizeof(partlabel))) { - Error("The label cannot be converted to UTF16, so abort.\n"); - return 0; - } + UTF16ToUTF8(entry->name, sizeof(entry->name) / sizeof(entry->name[0]), + (uint8_t *)partlabel, sizeof(partlabel)); if (!strncmp(label, partlabel, sizeof(partlabel))) { found = 1; } |