s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter.

(cherry picked from commit 054cd7ec30a3289443c97d36ea416d37f19d6b0b)
author: Michael Adam <obnox@samba.org> 2010-11-01 16:28:43 +0100
committer: Karolin Seeger <kseeger@samba.org> 2011-03-05 14:34:31 +0100
commit: b873b8b061cbbff578c242e2a062bd198a5069b3 (patch)
tree: 87e1d227bba1690d890f1347e9a696df62baa681 /source3/lib
parent: 4d06efbc24174b6c4f6627abdaa4dcdf58f286d3 (diff)
download: samba-b873b8b061cbbff578c242e2a062bd198a5069b3.tar.gz
1 files changed, 52 insertions, 12 deletions
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 9a0b12adea0..f0eb6e55715 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1454,12 +1454,12 @@ void strupper_m(char *s)
 }
 
 /**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-
-size_t strlen_m(const char *s)
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in CH_UNIX encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+size_t strlen_m_ext(const char *s, const charset_t dst_charset)
 {
 	size_t count = 0;
 
@@ -1479,20 +1479,60 @@ size_t strlen_m(const char *s)
 	while (*s) {
 		size_t c_size;
 		codepoint_t c = next_codepoint(s, &c_size);
-		if (c < 0x10000) {
-			/* Unicode char fits into 16 bits. */
+		s += c_size;
+
+		switch(dst_charset) {
+		case CH_UTF16LE:
+		case CH_UTF16BE:
+		case CH_UTF16MUNGED:
+			if (c < 0x10000) {
+				/* Unicode char fits into 16 bits. */
+				count += 1;
+			} else {
+				/* Double-width unicode char - 32 bits. */
+				count += 2;
+			}
+			break;
+		case CH_UTF8:
+			/*
+			 * this only checks ranges, and does not
+			 * check for invalid codepoints
+			 */
+			if (c < 0x80) {
+				count += 1;
+			} else if (c < 0x800) {
+				count += 2;
+			} else if (c < 0x1000) {
+				count += 3;
+			} else {
+				count += 4;
+			}
+			break;
+		default:
+			/*
+			 * non-unicode encoding:
+			 * assume that each codepoint fits into
+			 * one unit in the destination encoding.
+			 */
 			count += 1;
-		} else {
-			/* Double-width unicode char - 32 bits. */
-			count += 2;
 		}
-		s += c_size;
 	}
 
 	return count;
 }
 
 /**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+	return strlen_m_ext(s, CH_UTF16LE);
+}
+
+/**
  Count the number of UCS2 characters in a string including the null
  terminator.
 **/
author	Michael Adam <obnox@samba.org>	2010-11-01 16:28:43 +0100
committer	Karolin Seeger <kseeger@samba.org>	2011-03-05 14:34:31 +0100
commit	b873b8b061cbbff578c242e2a062bd198a5069b3 (patch)
tree	87e1d227bba1690d890f1347e9a696df62baa681 /source3/lib
parent	4d06efbc24174b6c4f6627abdaa4dcdf58f286d3 (diff)
download	samba-b873b8b061cbbff578c242e2a062bd198a5069b3.tar.gz