shared: add flags for nm_utils_escaped_tokens_escape_full()

Add flags to explicitly escape leading or trailing spaces. Note that we were already escaping trailing spaces. This will be used later when supporting backslash escapes for option parameters for nmcli (vpn.data).
author: Thomas Haller <thaller@redhat.com> 2020-03-29 12:58:59 +0200
committer: Thomas Haller <thaller@redhat.com> 2020-04-04 19:51:34 +0200
commit: d1a9c2bd429912e081fd5d9dbfce7912fdb9d016 (patch)
tree: acb9f28f12981a9f3b6d4d5708c8e6191416e0ab /shared
parent: ab9dc9f6d4a425c28900c5ed4332df3e9aacc9fe (diff)
download: NetworkManager-d1a9c2bd429912e081fd5d9dbfce7912fdb9d016.tar.gz
2 files changed, 163 insertions, 27 deletions
diff --git a/shared/nm-glib-aux/nm-shared-utils.c b/shared/nm-glib-aux/nm-shared-utils.c
index fffd761b7a..807786c530 100644
--- a/shared/nm-glib-aux/nm-shared-utils.c
+++ b/shared/nm-glib-aux/nm-shared-utils.c
@@ -1528,6 +1528,19 @@ _char_lookup_has (const guint8 lookup[static 256],
 	return lookup[(guint8) ch] != 0;
 }
 
+static gboolean
+_char_lookup_has_all (const guint8 lookup[static 256],
+                      const char *candidates)
+{
+	if (candidates) {
+		while (candidates[0] != '\0') {
+			if (!_char_lookup_has (lookup, (candidates++)[0]))
+				return FALSE;
+		}
+	}
+	return TRUE;
+}
+
 /**
  * nm_utils_strsplit_set_full:
  * @str: the string to split.
@@ -1755,65 +1768,131 @@ done2:
 /*****************************************************************************/
 
 const char *
-nm_utils_escaped_tokens_escape (const char *str,
-                                const char *delimiters,
-                                char **out_to_free)
+nm_utils_escaped_tokens_escape_full (const char *str,
+                                     const char *delimiters,
+                                     const char *delimiters_as_needed,
+                                     NMUtilsEscapedTokensEscapeFlags flags,
+                                     char **out_to_free)
 {
 	guint8 ch_lookup[256];
+	guint8 ch_lookup_as_needed[256];
+	gboolean has_ch_lookup_as_needed = FALSE;
 	char *ret;
 	gsize str_len;
 	gsize alloc_len;
 	gsize n_escapes;
 	gsize i, j;
+	gboolean escape_leading_space;
 	gboolean escape_trailing_space;
+	gboolean escape_backslash_as_needed;
 
-	if (!delimiters) {
-		nm_assert (delimiters);
-		delimiters = NM_ASCII_SPACES;
-	}
+	nm_assert (   !delimiters_as_needed
+	           || (   delimiters_as_needed[0]
+	               && NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED)));
 
 	if (!str || str[0] == '\0') {
 		*out_to_free = NULL;
 		return str;
 	}
 
+	str_len = strlen (str);
+
 	_char_lookup_table_init (ch_lookup, delimiters);
+	if (   !delimiters
+	    || NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_SPACES)) {
+		flags &= ~(  NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE
+		           | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE);
+		_char_lookup_table_set_all (ch_lookup, NM_ASCII_SPACES);
+	}
 
-	/* also mark '\\' as requiring escaping. */
-	_char_lookup_table_set_one (ch_lookup, '\\');
+	if (NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS)) {
+		_char_lookup_table_set_one (ch_lookup, '\\');
+		escape_backslash_as_needed = FALSE;
+	} else if (_char_lookup_has (ch_lookup, '\\'))
+		escape_backslash_as_needed = FALSE;
+	else {
+		escape_backslash_as_needed = NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED);
+		if (escape_backslash_as_needed) {
+			if (    NM_FLAGS_ANY (flags,   NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE
+			                             | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE)
+			    && !_char_lookup_has_all (ch_lookup, NM_ASCII_SPACES)) {
+				/* ESCAPE_LEADING_SPACE and ESCAPE_TRAILING_SPACE implies that we escape backslash
+				 * before whitespaces. */
+				if (!has_ch_lookup_as_needed) {
+					has_ch_lookup_as_needed = TRUE;
+					_char_lookup_table_init (ch_lookup_as_needed, NULL);
+				}
+				_char_lookup_table_set_all (ch_lookup_as_needed, NM_ASCII_SPACES);
+			}
+			if (   delimiters_as_needed
+			    && !_char_lookup_has_all (ch_lookup, delimiters_as_needed)) {
+				if (!has_ch_lookup_as_needed) {
+					has_ch_lookup_as_needed = TRUE;
+					_char_lookup_table_init (ch_lookup_as_needed, NULL);
+				}
+				_char_lookup_table_set_all (ch_lookup_as_needed, delimiters_as_needed);
+			}
+		}
+	}
+
+	escape_leading_space =    NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE)
+	                       && g_ascii_isspace (str[0])
+	                       && !_char_lookup_has (ch_lookup, str[0]);
+	if (str_len == 1)
+		escape_trailing_space = FALSE;
+	else {
+		escape_trailing_space =    NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE)
+		                        && g_ascii_isspace (str[str_len - 1])
+		                        && !_char_lookup_has (ch_lookup, str[str_len - 1]);
+	}
 
 	n_escapes = 0;
 	for (i = 0; str[i] != '\0'; i++) {
 		if (_char_lookup_has (ch_lookup, str[i]))
 			n_escapes++;
+		else if (   str[i] == '\\'
+		         && escape_backslash_as_needed
+		         && (   _char_lookup_has (ch_lookup, str[i + 1])
+		             || NM_IN_SET (str[i + 1], '\0', '\\')
+		             || (   has_ch_lookup_as_needed
+		                 && _char_lookup_has (ch_lookup_as_needed, str[i + 1]))))
+			n_escapes++;
 	}
+	if (escape_leading_space)
+		n_escapes++;
+	if (escape_trailing_space)
+		n_escapes++;
 
-	str_len = i;
-	nm_assert (str_len > 0 && strlen (str) == str_len);
-
-	escape_trailing_space =    !_char_lookup_has (ch_lookup, str[str_len - 1])
-	                        && g_ascii_isspace (str[str_len - 1]);
-
-	if (   n_escapes == 0
-	    && !escape_trailing_space) {
+	if (n_escapes == 0u) {
 		*out_to_free = NULL;
 		return str;
 	}
 
-	alloc_len = str_len + n_escapes + ((gsize) escape_trailing_space) + 1;
+	alloc_len = str_len + n_escapes + 1u;
 	ret = g_new (char, alloc_len);
 
 	j = 0;
-	for (i = 0; str[i] != '\0'; i++) {
-		if (_char_lookup_has (ch_lookup, str[i])) {
-			nm_assert (j < alloc_len);
+	i = 0;
+
+	if (escape_leading_space) {
+		ret[j++] = '\\';
+		ret[j++] = str[i++];
+	}
+	for (; str[i] != '\0'; i++) {
+		if (_char_lookup_has (ch_lookup, str[i]))
+			ret[j++] = '\\';
+		else if (   str[i] == '\\'
+		         && escape_backslash_as_needed
+		         && (   _char_lookup_has (ch_lookup, str[i + 1])
+		             || NM_IN_SET (str[i + 1], '\0', '\\')
+		             || (   has_ch_lookup_as_needed
+		                 && _char_lookup_has (ch_lookup_as_needed, str[i + 1]))))
 			ret[j++] = '\\';
-		}
-		nm_assert (j < alloc_len);
 		ret[j++] = str[i];
 	}
 	if (escape_trailing_space) {
-		nm_assert (!_char_lookup_has (ch_lookup, ret[j - 1]) && g_ascii_isspace (ret[j - 1]));
+		nm_assert (   !_char_lookup_has (ch_lookup, ret[j - 1])
+		           && g_ascii_isspace (ret[j - 1]));
 		ret[j] = ret[j - 1];
 		ret[j - 1] = '\\';
 		j++;
@@ -1821,6 +1900,7 @@ nm_utils_escaped_tokens_escape (const char *str,
 
 	nm_assert (j == alloc_len - 1);
 	ret[j] = '\0';
+	nm_assert (strlen (ret) == j);
 
 	*out_to_free = ret;
 	return ret;
diff --git a/shared/nm-glib-aux/nm-shared-utils.h b/shared/nm-glib-aux/nm-shared-utils.h
index 79f6eb4abb..57a6fb8fcb 100644
--- a/shared/nm-glib-aux/nm-shared-utils.h
+++ b/shared/nm-glib-aux/nm-shared-utils.h
@@ -542,9 +542,65 @@ nm_utils_escaped_tokens_split (const char *str,
 	                                   | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP);
 }
 
-const char *nm_utils_escaped_tokens_escape (const char *str,
-                                            const char *delimiters,
-                                            char **out_to_free);
+typedef enum {
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_NONE                       = 0,
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_SPACES              = (1ull << 0),
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE       = (1ull << 1),
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE      = (1ull << 2),
+
+	/* Backslash characters will be escaped as "\\\\" if they precede another
+	 * character that makes it necessary. Such characters are:
+	 *
+	 *  1) before another '\\' backslash.
+	 *  2) before any delimiter in @delimiters.
+	 *  3) before any delimiter in @delimiters_as_needed.
+	 *  4) before a white space, if ESCAPE_LEADING_SPACE or ESCAPE_TRAILING_SPACE is set.
+	 *  5) before the end of the word
+	 *
+	 * Rule 4) is an extension. It's not immediately clear why with ESCAPE_LEADING_SPACE
+	 * and ESCAPE_TRAILING_SPACE we want *all* backslashes before a white space escaped.
+	 * The reason is, that we obviously want to use ESCAPE_LEADING_SPACE and ESCAPE_TRAILING_SPACE
+	 * in cases, where we later parse the backslash escaped strings back, but allowing to strip
+	 * unescaped white spaces. That means, we want that " a " gets escaped as "\\ a\\ ".
+	 * On the other hand, we also want that " a\\ b " gets escaped as "\\ a\\\\ b\\ ",
+	 * and not "\\ a\\ b\\ ". Because otherwise, the parser would need to treat "\\ "
+	 * differently depending on whether the sequence is at the beginning, end or middle
+	 * of the word.
+	 *
+	 * Rule 5) is also not immediately obvious. When used with ESCAPE_TRAILING_SPACE,
+	 * we clearly want to allow that an escaped word can have arbitrary
+	 * whitespace suffixes. That's why this mode exists. So we must escape "a\\" as
+	 * "a\\\\", so that appending " " does not change the meaning.
+	 * Also without ESCAPE_TRAILING_SPACE, we want in general that we can concatenate
+	 * two escaped words without changing their meaning. If the words would be "a\\"
+	 * and "," (with ',' being a delimiter), then the result must be "a\\\\" and "\\,"
+	 * so that the concatenated word ("a\\\\\\,") is still the same. If we would escape
+	 * them instead as "a\\" + "\\,", then the concatenated word would be "a\\\\," and
+	 * different.
+	 * */
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED = (1ull << 3),
+
+	NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS    = (1ull << 4),
+} NMUtilsEscapedTokensEscapeFlags;
+
+const char *nm_utils_escaped_tokens_escape_full (const char *str,
+                                                 const char *delimiters,
+                                                 const char *delimiters_as_needed,
+                                                 NMUtilsEscapedTokensEscapeFlags flags,
+                                                 char **out_to_free);
+
+static inline const char *
+nm_utils_escaped_tokens_escape (const char *str,
+                                const char *delimiters,
+                                char **out_to_free)
+{
+	return nm_utils_escaped_tokens_escape_full (str,
+	                                            delimiters,
+	                                            NULL,
+	                                              NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS
+	                                            | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE,
+	                                            out_to_free);
+}
 
 static inline GString *
 nm_utils_escaped_tokens_escape_gstr_assert (const char *str,
author	Thomas Haller <thaller@redhat.com>	2020-03-29 12:58:59 +0200
committer	Thomas Haller <thaller@redhat.com>	2020-04-04 19:51:34 +0200
commit	d1a9c2bd429912e081fd5d9dbfce7912fdb9d016 (patch)
tree	acb9f28f12981a9f3b6d4d5708c8e6191416e0ab /shared
parent	ab9dc9f6d4a425c28900c5ed4332df3e9aacc9fe (diff)
download	NetworkManager-d1a9c2bd429912e081fd5d9dbfce7912fdb9d016.tar.gz