diff options
author | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-10-09 13:53:39 +0500 |
---|---|---|
committer | unknown <bar@mysql.com/bar.myoffice.izhnet.ru> | 2007-10-09 13:53:39 +0500 |
commit | 4c21617d4a188de89833ceb0714de0c70fa963da (patch) | |
tree | 5e88ef872d6be0540ddc207add0053dfb0aedadf /sql/item_xmlfunc.cc | |
parent | 0f0e0e1aafcc2d0d467ce5de560d7d6f7151d384 (diff) | |
download | mariadb-git-4c21617d4a188de89833ceb0714de0c70fa963da.tar.gz |
Bug#27287 extractvalue() (and updatexml()) extremely slow for large XML
Performance improvements made.
ExtractValue for large XML values is now much faster
(about 2000 times faster of 1Mb-long XML values).
sql/item_xmlfunc.cc:
Performance improvement for huge XML values:
1. Avoid reallocs - reserve extra memory:
using String::reserve() + String::q_append()
instead of String::append()
2. Parent is now not searched through the all previous
nodes in backward direction. Instead, we do the following:
- we introduce data->parent - a new member in MY_XML_USER_DATA
- when entering a new tag/attribute node, we remember offset
of the current node in data->parent
- when leaving a tag/attribute node, we change data->parent
to offset of the parent of the current node.
Diffstat (limited to 'sql/item_xmlfunc.cc')
-rw-r--r-- | sql/item_xmlfunc.cc | 60 |
1 files changed, 24 insertions, 36 deletions
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc index 15be9c97b6e..d0dbab16b1c 100644 --- a/sql/item_xmlfunc.cc +++ b/sql/item_xmlfunc.cc @@ -2611,35 +2611,27 @@ typedef struct uint level; String *pxml; // parsed XML uint pos[MAX_LEVEL]; // Tag position stack + uint parent; // Offset of the parent of the current node } MY_XML_USER_DATA; -/* - Find the parent node - - SYNOPSYS - Find the parent node, i.e. a tag or attrubute node on the given level. - - RETURN - 1 - success - 0 - failure -*/ -static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level) +static bool +append_node(String *str, MY_XML_NODE *node) { - if (!nitems) - return 0; - - MY_XML_NODE *p, *last= &items[nitems-1]; - for (p= last; p >= items; p--) - { - if (p->level == level && - (p->type == MY_XML_NODE_TAG || - p->type == MY_XML_NODE_ATTR)) - { - return p - items; - } - } - return 0; + /* + If "str" doesn't have space for a new node, + it will allocate two times more space that it has had so far. + (2*len+512) is a heuristic value, + which gave the best performance during tests. + The ideas behind this formula are: + - It allows to have a very small number of reallocs: + about 10 reallocs on a 1Mb-long XML value. + - At the same time, it avoids excessive memory use. + */ + if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512)) + return TRUE; + str->q_append((const char*) node, sizeof(MY_XML_NODE)); + return FALSE; } @@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len); int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len) { MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); MY_XML_NODE node; + node.parent= data->parent; // Set parent for the new node to old parent + data->parent= numnodes; // Remember current node as new parent data->pos[data->level]= numnodes; node.level= data->level++; node.type= st->current_node_type; // TAG or ATTR node.beg= attr; node.end= attr + len; - node.parent= parent; - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); - return MY_XML_OK; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; } @@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len); int xml_value(MY_XML_PARSER *st,const char *attr, size_t len) { MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; - MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); - uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); - uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); MY_XML_NODE node; + node.parent= data->parent; // Set parent for the new text node to old parent node.level= data->level; node.type= MY_XML_NODE_TEXT; node.beg= attr; node.end= attr + len; - node.parent= parent; - data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); - return MY_XML_OK; + return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK; } @@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len) data->level--; MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + data->parent= nodes[data->parent].parent; nodes+= data->pos[data->level]; nodes->tagend= st->cur; @@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; user_data.level= 0; user_data.pxml= parsed_xml_buf; + user_data.parent= 0; my_xml_set_enter_handler(&p, xml_enter); my_xml_set_value_handler(&p, xml_value); my_xml_set_leave_handler(&p, xml_leave); |