diff options
author | Jay Satiro <raysatiro@yahoo.com> | 2021-02-13 00:51:28 -0500 |
---|---|---|
committer | Jay Satiro <raysatiro@yahoo.com> | 2021-02-20 14:39:39 -0500 |
commit | 09363500b9e161670b6bd084cc57bff75d32fc02 (patch) | |
tree | bcf93fa61086156f9754977360438c783981f37a /lib | |
parent | f65d7889b5c8eeefbb9f41c7588199be18b38a20 (diff) | |
download | curl-09363500b9e161670b6bd084cc57bff75d32fc02.tar.gz |
curl_multibyte: always return a heap-allocated copy of string
- Change the Windows char <-> UTF-8 conversion functions to return an
allocated copy of the passed in string instead of the original.
Prior to this change the curlx_convert_ functions would, as what I
assume was an optimization, not make a copy of the passed in string if
no conversion was required. No conversion is required in non-UNICODE
Windows builds since our tchar strings are type char and remain in
whatever the passed in encoding is, which is assumed to be UTF-8 but may
be other encoding.
In contrast the UNICODE Windows builds require conversion
(wchar <-> char) and do return a copy. That inconsistency could lead to
programming errors where the developer expects a copy, and does not
realize that won't happen in all cases.
Closes https://github.com/curl/curl/pull/6602
Diffstat (limited to 'lib')
-rw-r--r-- | lib/curl_multibyte.c | 6 | ||||
-rw-r--r-- | lib/curl_multibyte.h | 41 |
2 files changed, 25 insertions, 22 deletions
diff --git a/lib/curl_multibyte.c b/lib/curl_multibyte.c index 39b2c587c..a7ae47d7e 100644 --- a/lib/curl_multibyte.c +++ b/lib/curl_multibyte.c @@ -21,7 +21,11 @@ ***************************************************************************/ /* - * This file is 'mem-include-scan' clean. See test 1132. + * This file is 'mem-include-scan' clean, which means memdebug.h and + * curl_memory.h are purposely not included in this file. See test 1132. + * + * The functions in this file are curlx functions which are not tracked by the + * curl memory tracker memdebug. */ #include "curl_setup.h" diff --git a/lib/curl_multibyte.h b/lib/curl_multibyte.h index 8adaf4978..f43a75e1b 100644 --- a/lib/curl_multibyte.h +++ b/lib/curl_multibyte.h @@ -31,7 +31,6 @@ wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8); char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w); - #endif /* WIN32 */ /* @@ -40,29 +39,23 @@ char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w); * preprocessor conditional directives needed by code using these * to differentiate UNICODE from non-UNICODE builds. * - * When building with UNICODE defined, these two macros - * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8() - * return a pointer to a newly allocated memory area holding result. - * When the result is no longer needed, allocated memory is intended - * to be free'ed with curlx_unicodefree(). + * In the case of a non-UNICODE build the tchar strings are char strings that + * are duplicated via strdup and remain in whatever the passed in encoding is, + * which is assumed to be UTF-8 but may be other encoding. Therefore the + * significance of the conversion functions is primarily for UNICODE builds. + * + * Allocated memory should be free'd with curlx_unicodefree(). * - * When building without UNICODE defined, this macros - * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8() - * return the pointer received as argument. curlx_unicodefree() does - * no actual free'ing of this pointer it is simply set to NULL. + * Note: Because these are curlx functions their memory usage is not tracked + * by the curl memory tracker memdebug. You'll notice that curlx function-like + * macros call free and strdup in parentheses, eg (strdup)(ptr), and that's to + * ensure that the curl memdebug override macros do not replace them. */ #if defined(UNICODE) && defined(WIN32) #define curlx_convert_UTF8_to_tchar(ptr) curlx_convert_UTF8_to_wchar((ptr)) #define curlx_convert_tchar_to_UTF8(ptr) curlx_convert_wchar_to_UTF8((ptr)) -#define curlx_unicodefree(ptr) \ - do { \ - if(ptr) { \ - (free)(ptr); \ - (ptr) = NULL; \ - } \ - } while(0) typedef union { unsigned short *tchar_ptr; @@ -73,10 +66,8 @@ typedef union { #else -#define curlx_convert_UTF8_to_tchar(ptr) (ptr) -#define curlx_convert_tchar_to_UTF8(ptr) (ptr) -#define curlx_unicodefree(ptr) \ - do {(ptr) = NULL;} while(0) +#define curlx_convert_UTF8_to_tchar(ptr) (strdup)(ptr) +#define curlx_convert_tchar_to_UTF8(ptr) (strdup)(ptr) typedef union { char *tchar_ptr; @@ -87,4 +78,12 @@ typedef union { #endif /* UNICODE && WIN32 */ +#define curlx_unicodefree(ptr) \ + do { \ + if(ptr) { \ + (free)(ptr); \ + (ptr) = NULL; \ + } \ + } while(0) + #endif /* HEADER_CURL_MULTIBYTE_H */ |