diff options
author | Michael Adam <obnox@samba.org> | 2010-11-01 16:28:43 +0100 |
---|---|---|
committer | Karolin Seeger <kseeger@samba.org> | 2011-03-05 14:34:31 +0100 |
commit | b873b8b061cbbff578c242e2a062bd198a5069b3 (patch) | |
tree | 87e1d227bba1690d890f1347e9a696df62baa681 /source3/lib | |
parent | 4d06efbc24174b6c4f6627abdaa4dcdf58f286d3 (diff) | |
download | samba-b873b8b061cbbff578c242e2a062bd198a5069b3.tar.gz |
s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter.
(cherry picked from commit 054cd7ec30a3289443c97d36ea416d37f19d6b0b)
Diffstat (limited to 'source3/lib')
-rw-r--r-- | source3/lib/util_str.c | 64 |
1 files changed, 52 insertions, 12 deletions
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 9a0b12adea0..f0eb6e55715 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1454,12 +1454,12 @@ void strupper_m(char *s) } /** - Count the number of UCS2 characters in a string. Normally this will - be the same as the number of bytes in a string for single byte strings, - but will be different for multibyte. -**/ - -size_t strlen_m(const char *s) + * Calculate the number of units (8 or 16-bit, depending on the + * destination charset), that would be needed to convert the input + * string which is expected to be in in CH_UNIX encoding to the + * destination charset (which should be a unicode charset). + */ +size_t strlen_m_ext(const char *s, const charset_t dst_charset) { size_t count = 0; @@ -1479,20 +1479,60 @@ size_t strlen_m(const char *s) while (*s) { size_t c_size; codepoint_t c = next_codepoint(s, &c_size); - if (c < 0x10000) { - /* Unicode char fits into 16 bits. */ + s += c_size; + + switch(dst_charset) { + case CH_UTF16LE: + case CH_UTF16BE: + case CH_UTF16MUNGED: + if (c < 0x10000) { + /* Unicode char fits into 16 bits. */ + count += 1; + } else { + /* Double-width unicode char - 32 bits. */ + count += 2; + } + break; + case CH_UTF8: + /* + * this only checks ranges, and does not + * check for invalid codepoints + */ + if (c < 0x80) { + count += 1; + } else if (c < 0x800) { + count += 2; + } else if (c < 0x1000) { + count += 3; + } else { + count += 4; + } + break; + default: + /* + * non-unicode encoding: + * assume that each codepoint fits into + * one unit in the destination encoding. + */ count += 1; - } else { - /* Double-width unicode char - 32 bits. */ - count += 2; } - s += c_size; } return count; } /** + Count the number of UCS2 characters in a string. Normally this will + be the same as the number of bytes in a string for single byte strings, + but will be different for multibyte. +**/ + +size_t strlen_m(const char *s) +{ + return strlen_m_ext(s, CH_UTF16LE); +} + +/** Count the number of UCS2 characters in a string including the null terminator. **/ |