diff options
author | Thomas Haller <thaller@redhat.com> | 2019-04-05 22:44:49 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2019-04-10 15:05:57 +0200 |
commit | a1425a4c91d2b653dfdeea5972b12262abf251c3 (patch) | |
tree | 5bd723ad960486148912680392d3cd7f01ac7bf9 | |
parent | c1f340401ffeb6d22d90d3dff841bfbc50466c61 (diff) | |
download | NetworkManager-a1425a4c91d2b653dfdeea5972b12262abf251c3.tar.gz |
shared: pre-calculate number of tokens in nm_utils_strsplit_set_full()
Instead of growing the buffer for the tokens (and reallocating),
do one pre-run over the string and count the delimiters. This
way we know how much space we need and we don't need to
reallocate.
Interestingly, this is notably slower than the previous implementation,
because previously if would not bother determining the right number of
tokens but just over-allocate with a reasonable guess of 8 and grow the
buffer exponentially. Still, I like this better because while it may
be slower in common scenarios, it allocates the exact number of buffer
space.
-rw-r--r-- | shared/nm-utils/nm-shared-utils.c | 110 |
1 files changed, 66 insertions, 44 deletions
diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index e8f3563c09..879d1e7b9e 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1020,12 +1020,11 @@ nm_utils_strsplit_set_full (const char *str, const char *delimiters, NMUtilsStrsplitSetFlags flags) { - const char **ptr, **ptr0; - gsize alloc_size; - gsize plen; - gsize i; - gsize str_len; - char *s0; + const char **ptr; + gsize num_tokens; + gsize i_token; + gsize str_len_p1; + const char *c_str; char *s; guint8 ch_lookup[256]; const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); @@ -1056,37 +1055,68 @@ nm_utils_strsplit_set_full (const char *str, return NULL; } - str_len = strlen (str) + 1; - alloc_size = 8; - /* we allocate the buffer larger, so to copy @str at the - * end of it as @s0. */ - ptr0 = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len); - s0 = (char *) &ptr0[alloc_size + 1]; - memcpy (s0, str, str_len); + num_tokens = 1; + c_str = str; + while (TRUE) { - plen = 0; - s = s0; - ptr = ptr0; + while (G_LIKELY (!_char_lookup_has (ch_lookup, c_str[0]))) { + if (c_str[0] == '\0') + goto done1; + c_str++; + } - while (TRUE) { - if (plen >= alloc_size) { - const char **ptr_old = ptr; - - /* reallocate the buffer. Note that for now the string - * continues to be in ptr0/s0. We fix that at the end. */ - alloc_size *= 2; - ptr = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len); - memcpy (ptr, ptr_old, sizeof (const char *) * plen); - if (ptr_old != ptr0) - g_free (ptr_old); + /* we assume escapings are not frequent. After we found + * this delimiter, check whether it was escaped by counting + * the backslashed before. */ + if (f_allow_escaping) { + const char *c2 = c_str; + + while ( c2 > str + && c2[-1] == '\\') + c2--; + if (((c_str - c2) % 2) != 0) { + /* the delimiter is escaped. This was not an accepted delimiter. */ + c_str++; + continue; + } } - ptr[plen++] = s; + c_str++; + + /* if we drop empty tokens, then we now skip over all consecutive delimiters. */ + if (!f_preseve_empty) { + while (_char_lookup_has (ch_lookup, c_str[0])) + c_str++; + if (c_str[0] == '\0') + break; + } + + num_tokens++; + } + +done1: + + nm_assert (c_str[0] == '\0'); + + str_len_p1 = (c_str - str) + 1; + + nm_assert (str[str_len_p1 - 1] == '\0'); + + ptr = g_malloc ((sizeof (const char *) * (num_tokens + 1)) + str_len_p1); + s = (char *) &ptr[num_tokens + 1]; + memcpy (s, str, str_len_p1); + + i_token = 0; + + while (TRUE) { + + nm_assert (i_token < num_tokens); + ptr[i_token++] = s; if (s[0] == '\0') { nm_assert (f_preseve_empty); - goto done; + goto done2; } nm_assert ( f_preseve_empty || !_char_lookup_has (ch_lookup, s[0])); @@ -1096,10 +1126,10 @@ nm_utils_strsplit_set_full (const char *str, && f_allow_escaping)) { s++; if (s[0] == '\0') - goto done; + goto done2; s++; } else if (s[0] == '\0') - goto done; + goto done2; else s++; } @@ -1107,26 +1137,18 @@ nm_utils_strsplit_set_full (const char *str, nm_assert (_char_lookup_has (ch_lookup, s[0])); s[0] = '\0'; s++; + if (!f_preseve_empty) { while (_char_lookup_has (ch_lookup, s[0])) s++; if (s[0] == '\0') - goto done; + goto done2; } } -done: - ptr[plen] = NULL; - - if (ptr != ptr0) { - /* we reallocated the buffer. We must copy over the - * string @s0 and adjust the pointers. */ - s = (char *) &ptr[alloc_size + 1]; - memcpy (s, s0, str_len); - for (i = 0; i < plen; i++) - ptr[i] = &s[ptr[i] - s0]; - g_free (ptr0); - } +done2: + nm_assert (i_token == num_tokens); + ptr[i_token] = NULL; return ptr; } |