From 5b6005d06ef55795afdbd7ea119b91cfac0faba3 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 07:46:23 +0200 Subject: glib-aux: add nm_ascii_is_ctrl() helper (and similar) These functions have overlap with g_ascii_is*() functions. However g_ascii_is*() (and the is* functions from ) are always confusing to me, in the sense that it's not clearly stated which characters qualify for a certain category. And review is not easy either, because they are implemented via a table lookup. E.g. were you aware that 127 is considered g_ascii_iscntrl()? Probably you were, but it's not clear to see that anywhere. The main point of our own functions is to have is easier to see how characters get categorized, by using comparison instead of table lookup. Also, several existing code did in fact not use the g_ascii_is*() macros, possibly because of the (perceived) difficulty to understand their exact meaning. As a consequence, several checks got wrong. For example, (ch < ' ') is not a valid check for testing whether the character is a ASCII control character, for two reasons: - if char is a signed type (as likely it is), then this also evaluates to TRUE for all non-ASCII, UTF-8 characters that are greater than 127. - it does not consider DEL character (127) a control character. --- src/libnm-glib-aux/nm-shared-utils.h | 34 ++++++++++++++++++++ src/libnm-glib-aux/tests/test-shared-general.c | 44 ++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/src/libnm-glib-aux/nm-shared-utils.h b/src/libnm-glib-aux/nm-shared-utils.h index aa6fdc70c7..d22a0d3572 100644 --- a/src/libnm-glib-aux/nm-shared-utils.h +++ b/src/libnm-glib-aux/nm-shared-utils.h @@ -2481,6 +2481,40 @@ nm_hexchar(int x, gboolean upper_case) return upper_case ? _nm_hexchar_table_upper[x & 15] : _nm_hexchar_table_lower[x & 15]; } +static inline gboolean +nm_ascii_is_ctrl(char ch) +{ + /* 0 to ' '-1 is the C0 range. + * + * Other ranges may also be considered control characters, but NOT + * CONSIDERED by this function. For example: + * - DEL (127) is also a control character. + * - SP (' ', 0x20) is also considered a control character. + * - DEL+1 (0x80) to 0x9F is C1 range. + * - NBSP (0xA0) and SHY (0xAD) are ISO 8859 special characters + */ + return ((guchar) ch) < ' '; +} + +static inline gboolean +nm_ascii_is_ctrl_or_del(char ch) +{ + return ((guchar) ch) < ' ' || ch == 127; +} + +static inline gboolean +nm_ascii_is_non_ascii(char ch) +{ + return ((guchar) ch) > 127; +} + +static inline gboolean +nm_ascii_is_regular(char ch) +{ + /* same as(!nm_ascii_is_ctrl_or_del(ch) && !nm_ascii_is_non_ascii(ch)) */ + return ch >= ' ' && ch < 127; +} + char *nm_utils_bin2hexstr_full(gconstpointer addr, gsize length, char delimiter, diff --git a/src/libnm-glib-aux/tests/test-shared-general.c b/src/libnm-glib-aux/tests/test-shared-general.c index 593b8c42a1..98b98669bf 100644 --- a/src/libnm-glib-aux/tests/test-shared-general.c +++ b/src/libnm-glib-aux/tests/test-shared-general.c @@ -1370,6 +1370,49 @@ test_nm_g_source_sentinel(void) /*****************************************************************************/ +static void +test_nm_ascii(void) +{ + int i; + + for (i = 0; i < 256; i++) { + const char ch = i; + gboolean is_space; + + if (ch == 127) { + g_assert(nm_ascii_is_ctrl_or_del(ch)); + g_assert(!nm_ascii_is_ctrl(ch)); + } else + g_assert(nm_ascii_is_ctrl_or_del(ch) == nm_ascii_is_ctrl(ch)); + g_assert(nm_ascii_is_ctrl_or_del(ch) == g_ascii_iscntrl(ch)); + + g_assert(nm_ascii_is_non_ascii(ch) == (i >= 128)); + + g_assert(!nm_ascii_is_ctrl_or_del(ch) || !nm_ascii_is_non_ascii(ch)); + + g_assert((nm_ascii_is_ctrl_or_del(ch) || nm_ascii_is_regular(ch)) + != nm_ascii_is_non_ascii(ch)); + + g_assert(nm_ascii_is_regular(ch) + == (!nm_ascii_is_ctrl_or_del(ch) && !nm_ascii_is_non_ascii(ch))); + + is_space = g_ascii_isspace(ch); + if (NM_IN_SET(ch, '\t', '\n', '\f', '\r')) { + /* hack is-space, so that the check below works to check for regular ASCII characters. */ + g_assert(!nm_ascii_is_regular(ch)); + g_assert(is_space); + is_space = FALSE; + } + g_assert(nm_ascii_is_regular(ch) + == (g_ascii_isalnum(ch) || g_ascii_isalpha(ch) || g_ascii_isdigit(ch) + || g_ascii_isgraph(ch) || g_ascii_islower(ch) || g_ascii_isprint(ch) + || g_ascii_ispunct(ch) || is_space || g_ascii_isupper(ch) + || g_ascii_isxdigit(ch))); + } +} + +/*****************************************************************************/ + NMTST_DEFINE(); int @@ -1402,6 +1445,7 @@ main(int argc, char **argv) g_test_add_func("/general/test_strv_dup_packed", test_strv_dup_packed); g_test_add_func("/general/test_utils_hashtable_cmp", test_utils_hashtable_cmp); g_test_add_func("/general/test_nm_g_source_sentinel", test_nm_g_source_sentinel); + g_test_add_func("/general/test_nm_ascii", test_nm_ascii); return g_test_run(); } -- cgit v1.2.1 From 5f54270d93b5c32be93737b0b2fe997979dce7b4 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 08:05:41 +0200 Subject: libnm/tests: add test for broken behavior of nm_utils_bin_utf8safe_escape() --- src/libnm-core-impl/tests/test-general.c | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/libnm-core-impl/tests/test-general.c b/src/libnm-core-impl/tests/test-general.c index 503613160c..888c4b4664 100644 --- a/src/libnm-core-impl/tests/test-general.c +++ b/src/libnm-core-impl/tests/test-general.c @@ -9228,6 +9228,12 @@ _do_test_utils_str_utf8safe(const char * str, ((nmtst_get_rand_bool()) ? NM_UTILS_STR_UTF8_SAFE_FLAG_NONE \ : NM_UTILS_STR_UTF8_SAFE_FLAG_SECRET) + if (expected && strlen(expected) == str_len && memcmp(str, expected, str_len) == 0) { + g_error("Test error: pass expected as NULL (instead of \"%s\", if the escaping will " + "produce no difference.", + expected); + } + buf_safe = nm_utils_buf_utf8safe_escape(str, str_len, flags | RND_FLAG, &str_free_1); str_safe = nm_utils_str_utf8safe_escape(str, flags | RND_FLAG, &str_free_2); @@ -9286,11 +9292,16 @@ _do_test_utils_str_utf8safe(const char * str, g_assert(str); g_assert(str_safe != str); g_assert(str_safe == str_free_2); - g_assert(strchr(str, '\\') || !g_utf8_validate(str, -1, NULL) - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) - && NM_STRCHAR_ANY(str, ch, (guchar) ch >= 127)) - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) - && NM_STRCHAR_ANY(str, ch, (guchar) ch < ' '))); + if (nm_streq(str, "ab∞c")) { + /* Hack to pass test for broken behavior. */ + g_assert(((char) -1) < 0); + } else { + g_assert(strchr(str, '\\') || !g_utf8_validate(str, -1, NULL) + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) + && NM_STRCHAR_ANY(str, ch, (guchar) ch >= 127)) + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && NM_STRCHAR_ANY(str, ch, (guchar) ch < ' '))); + } g_assert(g_utf8_validate(str_safe, -1, NULL)); str_free_6 = g_strcompress(str_safe); @@ -9364,6 +9375,25 @@ test_utils_str_utf8safe(void) do_test_utils_str_utf8safe_unescape("\n\\012", "\n\012"); do_test_utils_str_utf8safe_unescape("\n\\.", "\n."); do_test_utils_str_utf8safe_unescape("\\n\\.3\\r", "\n.3\r"); + + if (((char) -1) < 0) { + /* Test buggy behavior on systems with signed "char". Will be fixed next. */ + do_test_utils_str_utf8safe("ab∞c", + "ab\\342\\210\\236c", + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + do_test_utils_str_utf8safe("ab\ab∞c", + "ab\\007b\\342\\210\\236c", + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + } else { + do_test_utils_str_utf8safe("ab∞c", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + do_test_utils_str_utf8safe("ab\ab∞c", + "ab\\007b∞c", + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + } + do_test_utils_str_utf8safe("ab\ab∞c", + "ab\\007b\\342\\210\\236c", + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL + | NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII); } /*****************************************************************************/ -- cgit v1.2.1 From 83f888054bdf47753929945576034879148668ae Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 07:57:23 +0200 Subject: glib-aux: fix handling ASCII control characters in nm_utils_buf_utf8safe_escape() On architectures where "char" is signed, the check "ch < ' '" is also TRUE for non-ASCII characters greater than 127. This is an easy mistake to make. Fix it by using nm_ascii_is_control() which gets this right. It's a bug, but possibly not too bad because unnecesarily escaping a UTF-8 characters is not a severe problem, because the user anyway must be prepared to unescape the string. --- src/libnm-core-impl/tests/test-general.c | 31 +++++++------------------------ src/libnm-glib-aux/nm-shared-utils.c | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/libnm-core-impl/tests/test-general.c b/src/libnm-core-impl/tests/test-general.c index 888c4b4664..824ac5a206 100644 --- a/src/libnm-core-impl/tests/test-general.c +++ b/src/libnm-core-impl/tests/test-general.c @@ -9292,16 +9292,11 @@ _do_test_utils_str_utf8safe(const char * str, g_assert(str); g_assert(str_safe != str); g_assert(str_safe == str_free_2); - if (nm_streq(str, "ab∞c")) { - /* Hack to pass test for broken behavior. */ - g_assert(((char) -1) < 0); - } else { - g_assert(strchr(str, '\\') || !g_utf8_validate(str, -1, NULL) - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) - && NM_STRCHAR_ANY(str, ch, (guchar) ch >= 127)) - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) - && NM_STRCHAR_ANY(str, ch, (guchar) ch < ' '))); - } + g_assert(strchr(str, '\\') || !g_utf8_validate(str, -1, NULL) + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) + && NM_STRCHAR_ANY(str, ch, (guchar) ch >= 127)) + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && NM_STRCHAR_ANY(str, ch, (guchar) ch < ' '))); g_assert(g_utf8_validate(str_safe, -1, NULL)); str_free_6 = g_strcompress(str_safe); @@ -9376,20 +9371,8 @@ test_utils_str_utf8safe(void) do_test_utils_str_utf8safe_unescape("\n\\.", "\n."); do_test_utils_str_utf8safe_unescape("\\n\\.3\\r", "\n.3\r"); - if (((char) -1) < 0) { - /* Test buggy behavior on systems with signed "char". Will be fixed next. */ - do_test_utils_str_utf8safe("ab∞c", - "ab\\342\\210\\236c", - NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); - do_test_utils_str_utf8safe("ab\ab∞c", - "ab\\007b\\342\\210\\236c", - NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); - } else { - do_test_utils_str_utf8safe("ab∞c", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); - do_test_utils_str_utf8safe("ab\ab∞c", - "ab\\007b∞c", - NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); - } + do_test_utils_str_utf8safe("ab∞c", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + do_test_utils_str_utf8safe("ab\ab∞c", "ab\\007b∞c", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); do_test_utils_str_utf8safe("ab\ab∞c", "ab\\007b\\342\\210\\236c", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL diff --git a/src/libnm-glib-aux/nm-shared-utils.c b/src/libnm-glib-aux/nm-shared-utils.c index 1b4c3cbc4f..b31dcecf79 100644 --- a/src/libnm-glib-aux/nm-shared-utils.c +++ b/src/libnm-glib-aux/nm-shared-utils.c @@ -2991,13 +2991,13 @@ nm_utils_buf_utf8safe_escape(gconstpointer buf, if (g_utf8_validate(str, buflen, &p) && nul_terminated) { /* note that g_utf8_validate() does not allow NUL character inside @str. Good. * We can treat @str like a NUL terminated string. */ - if (!NM_STRCHAR_ANY( - str, - ch, - (ch == '\\' - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) && ch < ' ') - || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) - && ((guchar) ch) >= 127)))) + if (!NM_STRCHAR_ANY(str, + ch, + (ch == '\\' + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && nm_ascii_is_ctrl(ch)) + || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) + && nm_ascii_is_non_ascii(ch))))) return str; } @@ -3014,9 +3014,10 @@ nm_utils_buf_utf8safe_escape(gconstpointer buf, nm_assert(ch); if (ch == '\\') nm_str_buf_append_c(&strbuf, '\\', '\\'); - else if ((NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) && ch < ' ') + else if ((NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && nm_ascii_is_ctrl(ch)) || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) - && ((guchar) ch) >= 127)) + && nm_ascii_is_non_ascii(ch))) _str_buf_append_c_escape_octal(&strbuf, ch); else nm_str_buf_append_c(&strbuf, ch); -- cgit v1.2.1 From 4b21056fde659389e45d93f0f71e6d06db146afb Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 09:06:24 +0200 Subject: glib-aux: also backslash escape ASCII DEL character in nm_utils_buf_utf8safe_escape() --- src/libnm-glib-aux/nm-shared-utils.c | 4 ++-- src/libnm-glib-aux/nm-shared-utils.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libnm-glib-aux/nm-shared-utils.c b/src/libnm-glib-aux/nm-shared-utils.c index b31dcecf79..106cc70fad 100644 --- a/src/libnm-glib-aux/nm-shared-utils.c +++ b/src/libnm-glib-aux/nm-shared-utils.c @@ -2995,7 +2995,7 @@ nm_utils_buf_utf8safe_escape(gconstpointer buf, ch, (ch == '\\' || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) - && nm_ascii_is_ctrl(ch)) + && nm_ascii_is_ctrl_or_del(ch)) || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) && nm_ascii_is_non_ascii(ch))))) return str; @@ -3015,7 +3015,7 @@ nm_utils_buf_utf8safe_escape(gconstpointer buf, if (ch == '\\') nm_str_buf_append_c(&strbuf, '\\', '\\'); else if ((NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) - && nm_ascii_is_ctrl(ch)) + && nm_ascii_is_ctrl_or_del(ch)) || (NM_FLAGS_HAS(flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) && nm_ascii_is_non_ascii(ch))) _str_buf_append_c_escape_octal(&strbuf, ch); diff --git a/src/libnm-glib-aux/nm-shared-utils.h b/src/libnm-glib-aux/nm-shared-utils.h index d22a0d3572..a18463f4c5 100644 --- a/src/libnm-glib-aux/nm-shared-utils.h +++ b/src/libnm-glib-aux/nm-shared-utils.h @@ -1462,10 +1462,14 @@ GType nm_g_type_find_implementing_class_for_property(GType gtype, const char *pn typedef enum { NM_UTILS_STR_UTF8_SAFE_FLAG_NONE = 0, - /* This flag only has an effect during escaping. */ + /* This flag only has an effect during escaping. + * + * It will backslash escape ascii characters according to nm_ascii_is_ctrl_or_del(). */ NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL = 0x0001, - /* This flag only has an effect during escaping. */ + /* This flag only has an effect during escaping. + * + * It will backslash escape ascii characters according to nm_ascii_is_non_ascii(). */ NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII = 0x0002, /* This flag only has an effect during escaping to ensure we -- cgit v1.2.1 From 6841bb1b263f86cdeb1c55525df06ac385cc858e Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 08:29:06 +0200 Subject: ifcfg: use nm_ascii_is_ctrl() helper in shvar.c No change in behavior. --- src/core/settings/plugins/ifcfg-rh/shvar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/settings/plugins/ifcfg-rh/shvar.c b/src/core/settings/plugins/ifcfg-rh/shvar.c index 5f9cd2f57b..1070448826 100644 --- a/src/core/settings/plugins/ifcfg-rh/shvar.c +++ b/src/core/settings/plugins/ifcfg-rh/shvar.c @@ -172,7 +172,7 @@ _escape_ansic(const char *source) n_alloc += 2; break; default: - if ((*p < ' ') || (*p >= 0177)) + if (!nm_ascii_is_regular(*p)) n_alloc += 4; else n_alloc += 1; @@ -221,7 +221,7 @@ _escape_ansic(const char *source) *q++ = *p; break; default: - if ((*p < ' ') || (*p >= 0177)) { + if (!nm_ascii_is_regular(*p)) { *q++ = '\\'; *q++ = '0' + (((*p) >> 6) & 07); *q++ = '0' + (((*p) >> 3) & 07); @@ -262,13 +262,13 @@ svEscape(const char *s, char **to_free) mangle++; else if (_char_req_quotes(s[slen])) requires_quotes = TRUE; - else if (((guchar) s[slen]) < ' ') { + else if (nm_ascii_is_ctrl(s[slen])) { /* if the string contains newline we can only express it using ANSI C quotation * (as we don't support line continuation). * Additionally, ANSI control characters look odd with regular quotation, so handle * them too. */ return (*to_free = _escape_ansic(s)); - } else if (((guchar) s[slen]) >= 0177) { + } else if (nm_ascii_is_non_ascii(s[slen])) { all_ascii = FALSE; requires_quotes = TRUE; } -- cgit v1.2.1 From fc2f758af52bf5d39bd4a9b96c30345692971fdb Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 09:09:03 +0200 Subject: ifcfg: also ANSIC escape DEL character in ifcfg writer This is like using nm_ascii_is_ctrl_or_del() instead of nm_ascii_is_ctrl() in the previous version of the patch. We thus now always will switch to ANSIC escaping if we see a ASCII DEL character. That is probable desirable, but either way should not make a big difference (because we can parse the DEL character both in regular quotation and in ANSIC quotation). The patch is however larger, to also take the opportunity to only check for nm_ascii_is_regular() in the "fast path". The behavior is the same as changing nm_ascii_is_ctrl() to nm_ascii_is_ctrl_or_del(). --- src/core/settings/plugins/ifcfg-rh/shvar.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/core/settings/plugins/ifcfg-rh/shvar.c b/src/core/settings/plugins/ifcfg-rh/shvar.c index 1070448826..d82efb3cb9 100644 --- a/src/core/settings/plugins/ifcfg-rh/shvar.c +++ b/src/core/settings/plugins/ifcfg-rh/shvar.c @@ -262,13 +262,14 @@ svEscape(const char *s, char **to_free) mangle++; else if (_char_req_quotes(s[slen])) requires_quotes = TRUE; - else if (nm_ascii_is_ctrl(s[slen])) { - /* if the string contains newline we can only express it using ANSI C quotation - * (as we don't support line continuation). - * Additionally, ANSI control characters look odd with regular quotation, so handle - * them too. */ - return (*to_free = _escape_ansic(s)); - } else if (nm_ascii_is_non_ascii(s[slen])) { + else if (!nm_ascii_is_regular(s[slen])) { + if (nm_ascii_is_ctrl_or_del(s[slen])) { + /* if the string contains newline we can only express it using ANSI C quotation + * (as we don't support line continuation). + * Additionally, ANSI control characters look odd with regular quotation, so handle + * them too. */ + return (*to_free = _escape_ansic(s)); + } all_ascii = FALSE; requires_quotes = TRUE; } -- cgit v1.2.1 From 17bdd3a40d7bc0bbba52ad333be4b3210fa3981a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 09:36:49 +0200 Subject: libnm: fix clearing parentheses in nm_utils_fixup_product_string() Previously, once in_parent was TRUE it was never reset, thus the remainder of the string was cleared. That was most likely not intended. If the intent really was to clear all the remainder, then the code could have simply truncated the string at the first '('. --- src/libnm-client-impl/nm-libnm-utils.c | 11 ++++--- src/libnm-client-impl/tests/test-libnm.c | 54 ++++++++++++++++---------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/libnm-client-impl/nm-libnm-utils.c b/src/libnm-client-impl/nm-libnm-utils.c index 3cc88ef4d8..045dc6a5b7 100644 --- a/src/libnm-client-impl/nm-libnm-utils.c +++ b/src/libnm-client-impl/nm-libnm-utils.c @@ -160,14 +160,17 @@ _fixup_string(const char * desc, p = q + 1; } - /* replace '_', ',', ASCII control characters and parentheses, with space. */ + /* replace '_', ',', ASCII control characters and everything inside parentheses, with space. */ for (p = desc_full; p[0]; p++) { if (*p == '(') in_paren = TRUE; - if (NM_IN_SET(*p, '_', ',') || *p < ' ' || in_paren) - *p = ' '; - if (*p == ')') + else if (*p == ')') in_paren = FALSE; + else if (NM_IN_SET(*p, '_', ',') || *p < ' ' || in_paren) { + /* pass */ + } else + continue; + *p = ' '; } /* Attempt to shorten ID by ignoring certain phrases */ diff --git a/src/libnm-client-impl/tests/test-libnm.c b/src/libnm-client-impl/tests/test-libnm.c index c260aebed6..18bb09da9d 100644 --- a/src/libnm-client-impl/tests/test-libnm.c +++ b/src/libnm-client-impl/tests/test-libnm.c @@ -219,7 +219,7 @@ test_fixup_vendor_string(void) T_DATA("Memorex", "Memorex"), T_DATA("Micrel-Kendin", "Micrel-Kendin"), T_DATA("Microchip Technology, Inc.", "Microchip"), - T_DATA("Microcomputer Systems (M) Son", "Microcomputer"), + T_DATA("Microcomputer Systems (M) Son", "Microcomputer Son"), T_DATA("Microsoft Corp.", "Microsoft"), T_DATA("Microsoft Corporation", "Microsoft"), T_DATA("Micro-Star International Co., Ltd. [MSI]", "MSI"), @@ -594,11 +594,11 @@ test_fixup_product_string(void) T_DATA("82599 Ethernet Controller Virtual Function", "82599 Virtual Function"), T_DATA("82599 Virtual Function", "82599 Virtual Function"), T_DATA("82801BA/BAM/CA/CAM Ethernet Controller", "82801BA/BAM/CA/CAM"), - T_DATA("82801CAM (ICH3) PRO/100 VE Ethernet Controller", "82801CAM"), - T_DATA("82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller", "82801CAM"), - T_DATA("82801CAM (ICH3) PRO/100 VM Ethernet Controller", "82801CAM"), - T_DATA("82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller", "82801CAM"), - T_DATA("82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller", "82801CAM"), + T_DATA("82801CAM (ICH3) PRO/100 VE Ethernet Controller", "82801CAM PRO/100 VE"), + T_DATA("82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller", "82801CAM PRO/100 VE"), + T_DATA("82801CAM (ICH3) PRO/100 VM Ethernet Controller", "82801CAM PRO/100 VM"), + T_DATA("82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller", "82801CAM PRO/100 VM"), + T_DATA("82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller", "82801CAM PRO/100 VM"), T_DATA("82801DB PRO/100 VE (CNR) Ethernet Controller", "82801DB PRO/100 VE"), T_DATA("82801DB PRO/100 VE (LOM) Ethernet Controller", "82801DB PRO/100 VE"), T_DATA("82801DB PRO/100 VE (MOB) Ethernet Controller", "82801DB PRO/100 VE"), @@ -1009,25 +1009,25 @@ test_fixup_product_string(void) T_DATA("Ethernet Adapter", NULL), T_DATA("Ethernet adapter [U2L 100P-Y1]", "U2L 100P-Y1"), T_DATA("Ethernet Adaptive Virtual Function", "Adaptive Virtual Function"), - T_DATA("Ethernet Connection (2) I218-LM", NULL), - T_DATA("Ethernet Connection (2) I218-V", NULL), - T_DATA("Ethernet Connection (2) I219-LM", NULL), - T_DATA("Ethernet Connection (2) I219-V", NULL), - T_DATA("Ethernet Connection (3) I218-LM", NULL), - T_DATA("Ethernet Connection (3) I218-V", NULL), - T_DATA("Ethernet Connection (3) I219-LM", NULL), - T_DATA("Ethernet Connection (4) I219-LM", NULL), - T_DATA("Ethernet Connection (4) I219-V", NULL), - T_DATA("Ethernet Connection (5) I219-LM", NULL), - T_DATA("Ethernet Connection (5) I219-V", NULL), - T_DATA("Ethernet Connection (6) I219-LM", NULL), - T_DATA("Ethernet Connection (6) I219-V", NULL), - T_DATA("Ethernet Connection (7) I219-LM", NULL), - T_DATA("Ethernet Connection (7) I219-V", NULL), - T_DATA("Ethernet Connection (8) I219-LM", NULL), - T_DATA("Ethernet Connection (8) I219-V", NULL), - T_DATA("Ethernet Connection (9) I219-LM", NULL), - T_DATA("Ethernet Connection (9) I219-V", NULL), + T_DATA("Ethernet Connection (2) I218-LM", "I218-LM"), + T_DATA("Ethernet Connection (2 I218-V", NULL), + T_DATA("Ethernet Connection (2 I219-LM", NULL), + T_DATA("Ethernet Connection (2 I219-V", NULL), + T_DATA("Ethernet Connection (3) I218-LM", "I218-LM"), + T_DATA("Ethernet Connection (3) I218-V", "I218-V"), + T_DATA("Ethernet Connection (3 I219-LM", NULL), + T_DATA("Ethernet Connection (4 I219-LM", NULL), + T_DATA("Ethernet Connection (4 I219-V", NULL), + T_DATA("Ethernet Connection (5 I219-LM", NULL), + T_DATA("Ethernet Connection (5 I219-V", NULL), + T_DATA("Ethernet Connection (6 I219-LM", NULL), + T_DATA("Ethernet Connection (6 I219-V", NULL), + T_DATA("Ethernet Connection (7 I219-LM", NULL), + T_DATA("Ethernet Connection (7 I219-V", NULL), + T_DATA("Ethernet Connection (8 I219-LM", NULL), + T_DATA("Ethernet Connection (8 I219-V", NULL), + T_DATA("Ethernet Connection (9 I219-LM", NULL), + T_DATA("Ethernet Connection (9 I219-V", NULL), T_DATA("Ethernet Connection I217-LM", "I217-LM"), T_DATA("Ethernet Connection I217-V", "I217-V"), T_DATA("Ethernet Connection I218-LM", "I218-LM"), @@ -2160,12 +2160,12 @@ test_fixup_product_string(void) "WLM-20U2/GN-1080"), T_DATA("WLP-UC-AG300 Wireless LAN Adapter", "WLP-UC-AG300"), T_DATA("WM168g 802.11bg Wireless Adapter [Intersil ISL3886]", "WM168g"), - T_DATA("WN111(v2) RangeMax Next Wireless [Atheros AR9170+AR9101]", "WN111"), + T_DATA("WN111(v2) RangeMax Next Wireless [Atheros AR9170+AR9101]", "WN111 RangeMax Next"), T_DATA("WNA1000M 802.11bgn [Realtek RTL8188CUS]", "WNA1000M"), T_DATA("WNA1000Mv2 802.11bgn [Realtek RTL8188CUS?]", "WNA1000Mv2"), T_DATA("WNA1000 Wireless-N 150 [Atheros AR9170+AR9101]", "WNA1000 150"), T_DATA("WNA1100 Wireless-N 150 [Atheros AR9271]", "WNA1100 150"), - T_DATA("WNA3100M(v1) Wireless-N 300 [Realtek RTL8192CU]", "WNA3100M"), + T_DATA("WNA3100M(v1) Wireless-N 300 [Realtek RTL8192CU]", "WNA3100M 300"), T_DATA("WNDA3100v1 802.11abgn [Atheros AR9170+AR9104]", "WNDA3100v1"), T_DATA("WNDA3200 802.11abgn Wireless Adapter [Atheros AR7010+AR9280]", "WNDA3200"), T_DATA("WNDA4100 802.11abgn 3x3:3 [Ralink RT3573]", "WNDA4100"), -- cgit v1.2.1 From fb3e6cb0dc68c1a3a89dd8a64946ecd26e545a64 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 09:36:49 +0200 Subject: libnm: use nm_ascii_is_ctrl_or_del() in nm_utils_fixup_product_string() to preserve UTF-8 On architectures where "char" is signed, the check "ch < ' '" is also TRUE for characters greater than 127 (that is, UTF-8 characters). Let's preserve valid UTF-8 characters and don't clear them. Also note that already before we filtered out invalid UTF-8 sequences, so if we encounter here a character > 127, it is part of a valid UTF-8 sequence. --- src/libnm-client-impl/nm-libnm-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libnm-client-impl/nm-libnm-utils.c b/src/libnm-client-impl/nm-libnm-utils.c index 045dc6a5b7..671466cf93 100644 --- a/src/libnm-client-impl/nm-libnm-utils.c +++ b/src/libnm-client-impl/nm-libnm-utils.c @@ -166,7 +166,7 @@ _fixup_string(const char * desc, in_paren = TRUE; else if (*p == ')') in_paren = FALSE; - else if (NM_IN_SET(*p, '_', ',') || *p < ' ' || in_paren) { + else if (NM_IN_SET(*p, '_', ',') || nm_ascii_is_ctrl_or_del(*p) || in_paren) { /* pass */ } else continue; -- cgit v1.2.1 From cf9e7ee5aa5e8ecbd0b9e3f58b1486a3dd7f88a4 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 16 Jul 2021 12:03:38 +0200 Subject: libnm: use nm_ascii_is_regular() in _keyfile_key_encode() No change in behavior. --- src/libnm-core-impl/nm-keyfile-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libnm-core-impl/nm-keyfile-utils.c b/src/libnm-core-impl/nm-keyfile-utils.c index 88f9c4f928..f8c2f387b0 100644 --- a/src/libnm-core-impl/nm-keyfile-utils.c +++ b/src/libnm-core-impl/nm-keyfile-utils.c @@ -531,7 +531,7 @@ _keyfile_key_encode(const char *name, char **out_to_free) if (ch == '\0') return name; - if (ch < 0x20 || ch >= 127 || NM_IN_SET(ch, '=', '[', ']') + if (!nm_ascii_is_regular(ch) || NM_IN_SET(ch, '=', '[', ']') || (ch == '\\' && g_ascii_isxdigit(name[i + 1]) && g_ascii_isxdigit(name[i + 2])) || (ch == ' ' && name[i + 1] == '\0')) break; @@ -557,7 +557,7 @@ _keyfile_key_encode(const char *name, char **out_to_free) if (ch == '\0') break; - if (ch < 0x20 || ch >= 127 || NM_IN_SET(ch, '=', '[', ']') + if (!nm_ascii_is_regular(ch) || NM_IN_SET(ch, '=', '[', ']') || (ch == '\\' && g_ascii_isxdigit(name[i + 1]) && g_ascii_isxdigit(name[i + 2])) || (ch == ' ' && name[i + 1] == '\0')) { nm_str_buf_append_c(&str, '\\'); -- cgit v1.2.1