summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilan Crha <mcrha@redhat.com>2018-12-11 13:51:36 +0100
committerMilan Crha <mcrha@redhat.com>2018-12-11 13:51:36 +0100
commitf2b976db999d632562ecaa769a31c113756f7eb6 (patch)
tree28ec24c3723f50d693eab733b2ebfd6c468a9b43
parent29ed6579ad71c6ce1ad7002806a0000241b06f61 (diff)
downloadevolution-data-server-f2b976db999d632562ecaa769a31c113756f7eb6.tar.gz
Bug 312581 - Optimise string manipulation in vCard parsing
-rw-r--r--src/addressbook/libebook-contacts/e-vcard.c146
1 files changed, 102 insertions, 44 deletions
diff --git a/src/addressbook/libebook-contacts/e-vcard.c b/src/addressbook/libebook-contacts/e-vcard.c
index 680cf85af..33e16120c 100644
--- a/src/addressbook/libebook-contacts/e-vcard.c
+++ b/src/addressbook/libebook-contacts/e-vcard.c
@@ -195,6 +195,26 @@ e_vcard_init (EVCard *evc)
evc->priv = E_VCARD_GET_PRIVATE (evc);
}
+static EVCardAttribute *
+e_vcard_attribute_new_take (gchar *attr_group,
+ gchar *attr_name)
+{
+ EVCardAttribute *attr;
+
+ attr = g_slice_new0 (EVCardAttribute);
+
+ if (attr_group && !*attr_group) {
+ g_free (attr_group);
+ attr_group = NULL;
+ }
+
+ attr->ref_count = 1;
+ attr->group = attr_group;
+ attr->name = attr_name;
+
+ return attr;
+}
+
/* Case insensitive version of strstr */
static gchar *
strstr_nocase (const gchar *haystack,
@@ -286,8 +306,7 @@ skip_newline (gchar *str,
static void
skip_to_next_line (gchar **p)
{
- gchar *lp;
- lp = *p;
+ gchar *lp = *p;
while (*lp != '\n' && *lp != '\r' && *lp != '\0')
lp = g_utf8_next_char (lp);
@@ -335,10 +354,17 @@ read_attribute_value (EVCardAttribute *attr,
const gchar *charset)
{
gchar *lp = *p;
+ const gchar *chunk_start = NULL;
GString *str;
+ #define WRITE_CHUNK() G_STMT_START { \
+ if (chunk_start) { \
+ g_string_append_len (str, chunk_start, lp - chunk_start); \
+ chunk_start = NULL; \
+ } } G_STMT_END
+
/* read in the value */
- str = g_string_new ("");
+ str = g_string_sized_new (16);
for (lp = skip_newline ( *p, quoted_printable);
*lp != '\n' && *lp != '\r' && *lp != '\0';
lp = skip_newline ( lp, quoted_printable ) ) {
@@ -346,6 +372,8 @@ read_attribute_value (EVCardAttribute *attr,
if (*lp == '=' && quoted_printable) {
gunichar a, b;
+ WRITE_CHUNK ();
+
/* it's for the '=' */
lp++;
lp = skip_newline (lp, quoted_printable);
@@ -373,10 +401,12 @@ read_attribute_value (EVCardAttribute *attr,
g_string_append_c (str, c); /* add decoded byte (this is not a unicode yet) */
} else {
g_string_append_c (str, '=');
- g_string_append_unichar (str, a);
- g_string_append_unichar (str, b);
+ g_string_insert_unichar (str, -1, a);
+ g_string_insert_unichar (str, -1, b);
}
} else if (*lp == '\\') {
+ WRITE_CHUNK ();
+
/* convert back to the non-escaped version of
* the characters */
lp = g_utf8_next_char (lp);
@@ -401,13 +431,14 @@ read_attribute_value (EVCardAttribute *attr,
default:
g_warning ("invalid escape, passing it through");
g_string_append_c (str, '\\');
- g_string_append_unichar (str, g_utf8_get_char (lp));
+ chunk_start = lp;
break;
}
lp = g_utf8_next_char (lp);
}
else if ((*lp == ';') ||
(*lp == ',' && !g_ascii_strcasecmp (attr->name, "CATEGORIES"))) {
+ WRITE_CHUNK ();
if (charset) {
gchar *tmp;
@@ -419,14 +450,24 @@ read_attribute_value (EVCardAttribute *attr,
}
e_vcard_attribute_add_value (attr, str->str);
- g_string_assign (str, "");
+ g_string_set_size (str, 0);
lp = g_utf8_next_char (lp);
}
else {
- g_string_append_unichar (str, g_utf8_get_char (lp));
+ if (!chunk_start)
+ chunk_start = lp;
+
lp = g_utf8_next_char (lp);
}
+
+ if (*lp == '\n' || *lp == '\r')
+ WRITE_CHUNK ();
}
+
+ WRITE_CHUNK ();
+
+ #undef WRITE_CHUNK
+
if (str) {
if (charset) {
gchar *tmp;
@@ -454,21 +495,32 @@ read_attribute_params (EVCardAttribute *attr,
gchar **charset)
{
gchar *lp;
+ const gchar *chunk_start = NULL;
GString *str;
EVCardAttributeParam *param = NULL;
gboolean in_quote = FALSE;
- str = g_string_new ("");
+ #define WRITE_CHUNK() G_STMT_START { \
+ if (chunk_start) { \
+ g_string_append_len (str, chunk_start, lp - chunk_start); \
+ chunk_start = NULL; \
+ } } G_STMT_END
+
+ str = g_string_sized_new (16);
for (lp = skip_newline ( *p, *quoted_printable);
*lp != '\n' && *lp != '\r' && *lp != '\0';
lp = skip_newline ( lp, *quoted_printable ) ) {
+ gunichar uc;
if (*lp == '"') {
+ WRITE_CHUNK ();
+
in_quote = !in_quote;
lp = g_utf8_next_char (lp);
- }
- else if (in_quote || g_unichar_isalnum (g_utf8_get_char (lp)) || *lp == '-' || *lp == '_') {
- g_string_append_unichar (str, g_utf8_get_char (lp));
+ } else if (uc = g_utf8_get_char (lp), in_quote || *lp == '-' || *lp == '_' || g_unichar_isalnum (uc)) {
+ WRITE_CHUNK ();
+
+ g_string_insert_unichar (str, -1, uc);
lp = g_utf8_next_char (lp);
}
/* accumulate until we hit the '=' or ';'. If we hit
@@ -478,9 +530,11 @@ read_attribute_params (EVCardAttribute *attr,
* QUOTED-PRINTABLE) or TYPE (in any other case.)
*/
else if (*lp == '=') {
+ WRITE_CHUNK ();
+
if (str->len > 0) {
param = e_vcard_attribute_param_new (str->str);
- g_string_assign (str, "");
+ g_string_set_size (str, 0);
lp = g_utf8_next_char (lp);
}
else {
@@ -501,10 +555,12 @@ read_attribute_params (EVCardAttribute *attr,
gboolean colon = (*lp == ':');
gboolean comma = (*lp == ',');
+ WRITE_CHUNK ();
+
if (param) {
if (str->len > 0) {
e_vcard_attribute_param_add_value (param, str->str);
- g_string_assign (str, "");
+ g_string_set_size (str, 0);
if (!colon)
lp = g_utf8_next_char (lp);
}
@@ -565,7 +621,7 @@ read_attribute_params (EVCardAttribute *attr,
param = e_vcard_attribute_param_new (param_name);
e_vcard_attribute_param_add_value (param, str->str);
}
- g_string_assign (str, "");
+ g_string_set_size (str, 0);
if (!colon)
lp = g_utf8_next_char (lp);
}
@@ -595,17 +651,25 @@ read_attribute_params (EVCardAttribute *attr,
} else if (param) {
/* reading param value, which is SAFE-CHAR, aka
* any character except CTLs, DQUOTE, ";", ":", "," */
- g_string_append_unichar (str, g_utf8_get_char (lp));
+ if (!chunk_start)
+ chunk_start = lp;
+
lp = g_utf8_next_char (lp);
} else {
g_warning ("invalid character (%c/0x%02x) found in parameter spec (%s)", *lp, *lp, lp);
- g_string_assign (str, "");
+ chunk_start = NULL;
+ g_string_set_size (str, 0);
/* skip_until (&lp, ":;"); */
skip_to_next_line ( &lp );
}
+
+ if (*lp == '\n' || *lp == '\r')
+ WRITE_CHUNK ();
}
+ #undef WRITE_CHUNK
+
if (str)
g_string_free (str, TRUE);
@@ -626,10 +690,11 @@ read_attribute (gchar **p)
gchar *charset = NULL;
/* first read in the group/name */
- str = g_string_new ("");
+ str = g_string_sized_new (16);
for (lp = skip_newline ( *p, is_qp);
*lp != '\n' && *lp != '\r' && *lp != '\0';
lp = skip_newline ( lp, is_qp ) ) {
+ gunichar uc;
if (*lp == ':' || *lp == ';') {
if (str->len != 0) {
@@ -656,16 +721,15 @@ read_attribute (gchar **p)
g_warning (
"extra `.' in attribute specification. ignoring extra group `%s'",
str->str);
- g_string_free (str, TRUE);
- str = g_string_new ("");
+ g_string_set_size (str, 0);
}
if (str->len != 0) {
attr_group = g_string_free (str, FALSE);
- str = g_string_new ("");
+ str = g_string_sized_new (16);
}
}
- else if (g_unichar_isalnum (g_utf8_get_char (lp)) || *lp == '-' || *lp == '_') {
- g_string_append_unichar (str, g_utf8_get_char (lp));
+ else if (uc = g_utf8_get_char (lp), *lp == '-' || *lp == '_' || g_unichar_isalnum (uc)) {
+ g_string_insert_unichar (str, -1, uc);
}
else {
g_warning ("invalid character found in attribute group/name");
@@ -683,9 +747,8 @@ read_attribute (gchar **p)
goto lose;
}
- attr = e_vcard_attribute_new (attr_group, attr_name);
- g_free (attr_group);
- g_free (attr_name);
+ /* This consumes (takes) both strings */
+ attr = e_vcard_attribute_new_take (attr_group, attr_name);
if (*lp == ';') {
/* skip past the ';' */
@@ -841,7 +904,10 @@ e_vcard_escape_semicolons (const gchar *s)
GString *str;
const gchar *p;
- str = g_string_new ("");
+ if (s)
+ str = g_string_sized_new (strlen (s));
+ else
+ str = g_string_new ("");
for (p = s; p && *p; p++) {
if (*p == ';')
@@ -867,7 +933,10 @@ e_vcard_escape_string (const gchar *s)
GString *str;
const gchar *p;
- str = g_string_new ("");
+ if (s)
+ str = g_string_sized_new (strlen (s));
+ else
+ str = g_string_new ("");
/* Escape a string as described in RFC2426, section 5 */
for (p = s; p && *p; p++) {
@@ -914,7 +983,7 @@ e_vcard_unescape_string (const gchar *s)
g_return_val_if_fail (s != NULL, NULL);
- str = g_string_new ("");
+ str = g_string_sized_new (strlen (s));
/* Unescape a string as described in RFC2426, section 5 */
for (p = s; *p; p++) {
@@ -1066,7 +1135,7 @@ e_vcard_qp_encode (const gchar *txt,
gboolean can_wrap)
{
const gchar *p = txt;
- GString *escaped = g_string_new ("");
+ GString *escaped = g_string_sized_new (strlen (txt));
gint count = 0;
while (*p != '\0') {
@@ -1205,7 +1274,7 @@ e_vcard_to_string_vcard_21 (EVCard *evc)
if (empty)
continue;
- attr_str = g_string_new ("");
+ attr_str = g_string_sized_new (strlen (attr->name) + (attr->group ? strlen (attr->group) + 1 : 0));
/* From vCard 2.1 spec page 27, 28
*
@@ -1316,7 +1385,7 @@ e_vcard_to_string_vcard_30 (EVCard *evc)
if (!g_ascii_strcasecmp (attr->name, "VERSION"))
continue;
- attr_str = g_string_new ("");
+ attr_str = g_string_sized_new (strlen (attr->name) + (attr->group ? strlen (attr->group) + 1 : 0));
/* From rfc2425, 5.8.2
*
@@ -1558,18 +1627,7 @@ EVCardAttribute *
e_vcard_attribute_new (const gchar *attr_group,
const gchar *attr_name)
{
- EVCardAttribute *attr;
-
- attr = g_slice_new0 (EVCardAttribute);
-
- if (attr_group != NULL && *attr_group == '\0')
- attr_group = NULL;
-
- attr->ref_count = 1;
- attr->group = g_strdup (attr_group);
- attr->name = g_strdup (attr_name);
-
- return attr;
+ return e_vcard_attribute_new_take ((attr_group && *attr_group) ? g_strdup (attr_group) : NULL, g_strdup (attr_name));
}
/**