From 09363500b9e161670b6bd084cc57bff75d32fc02 Mon Sep 17 00:00:00 2001
From: Jay Satiro <raysatiro@yahoo.com>
Date: Sat, 13 Feb 2021 00:51:28 -0500
Subject: curl_multibyte: always return a heap-allocated copy of string

- Change the Windows char <-> UTF-8 conversion functions to return an
  allocated copy of the passed in string instead of the original.

Prior to this change the curlx_convert_ functions would, as what I
assume was an optimization, not make a copy of the passed in string if
no conversion was required. No conversion is required in non-UNICODE
Windows builds since our tchar strings are type char and remain in
whatever the passed in encoding is, which is assumed to be UTF-8 but may
be other encoding.

In contrast the UNICODE Windows builds require conversion
(wchar <-> char) and do return a copy. That inconsistency could lead to
programming errors where the developer expects a copy, and does not
realize that won't happen in all cases.

Closes https://github.com/curl/curl/pull/6602
---
 lib/curl_multibyte.c |  6 +++++-
 lib/curl_multibyte.h | 41 ++++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/lib/curl_multibyte.c b/lib/curl_multibyte.c
index 39b2c587c..a7ae47d7e 100644
--- a/lib/curl_multibyte.c
+++ b/lib/curl_multibyte.c
@@ -21,7 +21,11 @@
  ***************************************************************************/
 
 /*
- * This file is 'mem-include-scan' clean. See test 1132.
+ * This file is 'mem-include-scan' clean, which means memdebug.h and
+ * curl_memory.h are purposely not included in this file. See test 1132.
+ *
+ * The functions in this file are curlx functions which are not tracked by the
+ * curl memory tracker memdebug.
  */
 
 #include "curl_setup.h"
diff --git a/lib/curl_multibyte.h b/lib/curl_multibyte.h
index 8adaf4978..f43a75e1b 100644
--- a/lib/curl_multibyte.h
+++ b/lib/curl_multibyte.h
@@ -31,7 +31,6 @@
 
 wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8);
 char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w);
-
 #endif /* WIN32 */
 
 /*
@@ -40,29 +39,23 @@ char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w);
  * preprocessor conditional directives needed by code using these
  * to differentiate UNICODE from non-UNICODE builds.
  *
- * When building with UNICODE defined, these two macros
- * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8()
- * return a pointer to a newly allocated memory area holding result.
- * When the result is no longer needed, allocated memory is intended
- * to be free'ed with curlx_unicodefree().
+ * In the case of a non-UNICODE build the tchar strings are char strings that
+ * are duplicated via strdup and remain in whatever the passed in encoding is,
+ * which is assumed to be UTF-8 but may be other encoding. Therefore the
+ * significance of the conversion functions is primarily for UNICODE builds.
+ *
+ * Allocated memory should be free'd with curlx_unicodefree().
  *
- * When building without UNICODE defined, this macros
- * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8()
- * return the pointer received as argument. curlx_unicodefree() does
- * no actual free'ing of this pointer it is simply set to NULL.
+ * Note: Because these are curlx functions their memory usage is not tracked
+ * by the curl memory tracker memdebug. You'll notice that curlx function-like
+ * macros call free and strdup in parentheses, eg (strdup)(ptr), and that's to
+ * ensure that the curl memdebug override macros do not replace them.
  */
 
 #if defined(UNICODE) && defined(WIN32)
 
 #define curlx_convert_UTF8_to_tchar(ptr) curlx_convert_UTF8_to_wchar((ptr))
 #define curlx_convert_tchar_to_UTF8(ptr) curlx_convert_wchar_to_UTF8((ptr))
-#define curlx_unicodefree(ptr)                          \
-  do {                                                  \
-    if(ptr) {                                           \
-      (free)(ptr);                                        \
-      (ptr) = NULL;                                     \
-    }                                                   \
-  } while(0)
 
 typedef union {
   unsigned short       *tchar_ptr;
@@ -73,10 +66,8 @@ typedef union {
 
 #else
 
-#define curlx_convert_UTF8_to_tchar(ptr) (ptr)
-#define curlx_convert_tchar_to_UTF8(ptr) (ptr)
-#define curlx_unicodefree(ptr) \
-  do {(ptr) = NULL;} while(0)
+#define curlx_convert_UTF8_to_tchar(ptr) (strdup)(ptr)
+#define curlx_convert_tchar_to_UTF8(ptr) (strdup)(ptr)
 
 typedef union {
   char                *tchar_ptr;
@@ -87,4 +78,12 @@ typedef union {
 
 #endif /* UNICODE && WIN32 */
 
+#define curlx_unicodefree(ptr)                          \
+  do {                                                  \
+    if(ptr) {                                           \
+      (free)(ptr);                                      \
+      (ptr) = NULL;                                     \
+    }                                                   \
+  } while(0)
+
 #endif /* HEADER_CURL_MULTIBYTE_H */
-- 
cgit v1.2.1