diff options
author | Thomas Haller <thaller@redhat.com> | 2017-09-14 19:14:01 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2017-09-18 17:36:11 +0200 |
commit | 00995a533d299ff4ccd64a3b1aa3d09eca9aaf13 (patch) | |
tree | c6e24457a32340ef95791cd02ea595a17605287a | |
parent | d532b71da1e8ac1c619a8e7f314548e9bc1af85c (diff) | |
download | NetworkManager-00995a533d299ff4ccd64a3b1aa3d09eca9aaf13.tar.gz |
shared: add nm_utils_strsplit_set() helper
A replacement for g_strsplit_set(). While g_strsplit_set()
does (n+1) malloc and n slice allocations, this needs
roughtly (O(log(n))) mallocs.
Another difference from g_strsplit_set() is that this function
treats multiple delimiters as one (and thus never returns empty
words). While I can see that sometimes you may want to keep empty
words (like parsing a CSV file and preserve empty cells), we usually
use this function for splitting user input. In such case, we want
to treat multiple delimiters as one.
-rw-r--r-- | libnm-core/tests/test-general.c | 62 | ||||
-rw-r--r-- | shared/nm-utils/nm-shared-utils.c | 112 | ||||
-rw-r--r-- | shared/nm-utils/nm-shared-utils.h | 2 |
3 files changed, 176 insertions, 0 deletions
diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index d33a6e87d2..e977e418fe 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -98,6 +98,67 @@ test_nm_g_slice_free_fcn (void) /*****************************************************************************/ +static void +_do_test_nm_utils_strsplit_set (const char *str, ...) +{ + gs_unref_ptrarray GPtrArray *args_array = g_ptr_array_new (); + const char *const*args; + gs_free const char **words = NULL; + const char *arg; + gsize i; + va_list ap; + + va_start (ap, str); + while ((arg = va_arg (ap, const char *))) + g_ptr_array_add (args_array, (gpointer) arg); + va_end (ap); + g_ptr_array_add (args_array, NULL); + + args = (const char *const*) args_array->pdata; + + words = nm_utils_strsplit_set (str, " \t\n"); + + if (!args[0]) { + g_assert (!words); + g_assert ( !str + || NM_STRCHAR_ALL (str, ch, NM_IN_SET (ch, ' ', '\t', '\n'))); + return; + } + g_assert (words); + for (i = 0; args[i] || words[i]; i++) { + g_assert (args[i]); + g_assert (words[i]); + g_assert (args[i][0]); + g_assert (NM_STRCHAR_ALL (args[i], ch, !NM_IN_SET (ch, ' ', '\t', '\n'))); + g_assert_cmpstr (args[i], ==, words[i]); + } +} + +#define do_test_nm_utils_strsplit_set(str, ...) \ + _do_test_nm_utils_strsplit_set (str, ##__VA_ARGS__, NULL) + +static void +test_nm_utils_strsplit_set (void) +{ + do_test_nm_utils_strsplit_set (NULL); + do_test_nm_utils_strsplit_set (""); + do_test_nm_utils_strsplit_set ("\t"); + do_test_nm_utils_strsplit_set (" \t\n"); + do_test_nm_utils_strsplit_set ("a", "a"); + do_test_nm_utils_strsplit_set ("a b", "a", "b"); + do_test_nm_utils_strsplit_set ("a\rb", "a\rb"); + do_test_nm_utils_strsplit_set (" a\rb ", "a\rb"); + do_test_nm_utils_strsplit_set (" a bbbd afds ere", "a", "bbbd", "afds", "ere"); + do_test_nm_utils_strsplit_set ("1 2 3 4 5 6 7 8 9 0 " + "1 2 3 4 5 6 7 8 9 0 " + "1 2 3 4 5 6 7 8 9 0", + "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", + "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", + "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"); +} + +/*****************************************************************************/ + typedef struct { int val; int idx; @@ -6201,6 +6262,7 @@ int main (int argc, char **argv) g_test_add_func ("/core/general/test_c_list_sort", test_c_list_sort); g_test_add_func ("/core/general/test_dedup_multi", test_dedup_multi); g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe); + g_test_add_func ("/core/general/test_nm_utils_strsplit_set", test_nm_utils_strsplit_set); g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set); g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset); g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items); diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 5381f0d162..0810d7b1c1 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -325,6 +325,118 @@ _nm_utils_ascii_str_to_int64 (const char *str, guint base, gint64 min, gint64 ma /*****************************************************************************/ /** + * nm_utils_strsplit_set: + * @str: the string to split. + * @delimiters: the set of delimiters. If %NULL, defaults to " \t\n", + * like bash's $IFS. + * + * This is a replacement for g_strsplit_set() which avoids copying + * each word once (the entire strv array), but instead copies it once + * and all words point into that internal copy. + * + * Another difference from g_strsplit_set() is that this never returns + * empty words. Multiple delimiters are combined and treated as one. + * + * Returns: %NULL if @str is %NULL or contains only delimiters. + * Otherwise, a %NULL terminated strv array containing non-empty + * words, split at the delimiter characters (delimiter characters + * are removed). + * The strings to which the result strv array points to are allocated + * after the returned result itself. Don't free the strings themself, + * but free everything with g_free(). + */ +const char ** +nm_utils_strsplit_set (const char *str, const char *delimiters) +{ + const char **ptr, **ptr0; + gsize alloc_size, plen, i; + gsize str_len; + char *s0; + char *s; + guint8 delimiters_table[256]; + + if (!str) + return NULL; + + /* initialize lookup table for delimiter */ + if (!delimiters) + delimiters = " \t\n"; + memset (delimiters_table, 0, sizeof (delimiters_table)); + for (i = 0; delimiters[i]; i++) + delimiters_table[(guint8) delimiters[i]] = 1; + +#define _is_delimiter(ch, delimiters_table) \ + ((delimiters_table)[(guint8) (ch)] != 0) + + /* skip initial delimiters, and return of the remaining string is + * empty. */ + while (_is_delimiter (str[0], delimiters_table)) + str++; + if (!str[0]) + return NULL; + + str_len = strlen (str) + 1; + alloc_size = 8; + + /* we allocate the buffer larger, so to copy @str at the + * end of it as @s0. */ + ptr0 = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len); + s0 = (char *) &ptr0[alloc_size + 1]; + memcpy (s0, str, str_len); + + plen = 0; + s = s0; + ptr = ptr0; + + while (TRUE) { + if (plen >= alloc_size) { + const char **ptr_old = ptr; + + /* reallocate the buffer. Note that for now the string + * continues to be in ptr0/s0. We fix that at the end. */ + alloc_size += 2; + ptr = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len); + memcpy (ptr, ptr_old, sizeof (const char *) * plen); + if (ptr_old != ptr0) + g_free (ptr_old); + } + + ptr[plen++] = s; + + nm_assert (s[0] && !_is_delimiter (s[0], delimiters_table)); + + while (TRUE) { + s++; + if (_is_delimiter (s[0], delimiters_table)) + break; + if (s[0] == '\0') + goto done; + } + + s[0] = '\0'; + s++; + while (_is_delimiter (s[0], delimiters_table)) + s++; + if (s[0] == '\0') + break; + } +done: + ptr[plen] = NULL; + + if (ptr != ptr0) { + /* we reallocated the buffer. We must copy over the + * string @s0 and adjust the pointers. */ + s = (char *) &ptr[alloc_size + 1]; + memcpy (s, s0, str_len); + for (i = 0; i < plen; i++) + ptr[i] = &s[ptr[i] - s0]; + g_free (ptr0); + } + + return ptr; +} + +/** * nm_utils_strv_find_first: * @list: the strv list to search * @len: the length of the list, or a negative value if @list is %NULL terminated. diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index f902793371..4a93016af1 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -153,6 +153,8 @@ void nm_utils_strbuf_append_str (char **buf, gsize *len, const char *str); /*****************************************************************************/ +const char **nm_utils_strsplit_set (const char *str, const char *delimiters); + gssize nm_utils_strv_find_first (char **list, gssize len, const char *needle); char **_nm_utils_strv_cleanup (char **strv, |