From 09363500b9e161670b6bd084cc57bff75d32fc02 Mon Sep 17 00:00:00 2001 From: Jay Satiro Date: Sat, 13 Feb 2021 00:51:28 -0500 Subject: curl_multibyte: always return a heap-allocated copy of string - Change the Windows char <-> UTF-8 conversion functions to return an allocated copy of the passed in string instead of the original. Prior to this change the curlx_convert_ functions would, as what I assume was an optimization, not make a copy of the passed in string if no conversion was required. No conversion is required in non-UNICODE Windows builds since our tchar strings are type char and remain in whatever the passed in encoding is, which is assumed to be UTF-8 but may be other encoding. In contrast the UNICODE Windows builds require conversion (wchar <-> char) and do return a copy. That inconsistency could lead to programming errors where the developer expects a copy, and does not realize that won't happen in all cases. Closes https://github.com/curl/curl/pull/6602 --- lib/curl_multibyte.c | 6 +++++- lib/curl_multibyte.h | 41 ++++++++++++++++++++--------------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/lib/curl_multibyte.c b/lib/curl_multibyte.c index 39b2c587c..a7ae47d7e 100644 --- a/lib/curl_multibyte.c +++ b/lib/curl_multibyte.c @@ -21,7 +21,11 @@ ***************************************************************************/ /* - * This file is 'mem-include-scan' clean. See test 1132. + * This file is 'mem-include-scan' clean, which means memdebug.h and + * curl_memory.h are purposely not included in this file. See test 1132. + * + * The functions in this file are curlx functions which are not tracked by the + * curl memory tracker memdebug. */ #include "curl_setup.h" diff --git a/lib/curl_multibyte.h b/lib/curl_multibyte.h index 8adaf4978..f43a75e1b 100644 --- a/lib/curl_multibyte.h +++ b/lib/curl_multibyte.h @@ -31,7 +31,6 @@ wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8); char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w); - #endif /* WIN32 */ /* @@ -40,29 +39,23 @@ char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w); * preprocessor conditional directives needed by code using these * to differentiate UNICODE from non-UNICODE builds. * - * When building with UNICODE defined, these two macros - * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8() - * return a pointer to a newly allocated memory area holding result. - * When the result is no longer needed, allocated memory is intended - * to be free'ed with curlx_unicodefree(). + * In the case of a non-UNICODE build the tchar strings are char strings that + * are duplicated via strdup and remain in whatever the passed in encoding is, + * which is assumed to be UTF-8 but may be other encoding. Therefore the + * significance of the conversion functions is primarily for UNICODE builds. + * + * Allocated memory should be free'd with curlx_unicodefree(). * - * When building without UNICODE defined, this macros - * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8() - * return the pointer received as argument. curlx_unicodefree() does - * no actual free'ing of this pointer it is simply set to NULL. + * Note: Because these are curlx functions their memory usage is not tracked + * by the curl memory tracker memdebug. You'll notice that curlx function-like + * macros call free and strdup in parentheses, eg (strdup)(ptr), and that's to + * ensure that the curl memdebug override macros do not replace them. */ #if defined(UNICODE) && defined(WIN32) #define curlx_convert_UTF8_to_tchar(ptr) curlx_convert_UTF8_to_wchar((ptr)) #define curlx_convert_tchar_to_UTF8(ptr) curlx_convert_wchar_to_UTF8((ptr)) -#define curlx_unicodefree(ptr) \ - do { \ - if(ptr) { \ - (free)(ptr); \ - (ptr) = NULL; \ - } \ - } while(0) typedef union { unsigned short *tchar_ptr; @@ -73,10 +66,8 @@ typedef union { #else -#define curlx_convert_UTF8_to_tchar(ptr) (ptr) -#define curlx_convert_tchar_to_UTF8(ptr) (ptr) -#define curlx_unicodefree(ptr) \ - do {(ptr) = NULL;} while(0) +#define curlx_convert_UTF8_to_tchar(ptr) (strdup)(ptr) +#define curlx_convert_tchar_to_UTF8(ptr) (strdup)(ptr) typedef union { char *tchar_ptr; @@ -87,4 +78,12 @@ typedef union { #endif /* UNICODE && WIN32 */ +#define curlx_unicodefree(ptr) \ + do { \ + if(ptr) { \ + (free)(ptr); \ + (ptr) = NULL; \ + } \ + } while(0) + #endif /* HEADER_CURL_MULTIBYTE_H */ -- cgit v1.2.1