summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorivan <ivan@13f79535-47bb-0310-9956-ffa450edef68>2019-05-12 15:03:36 +0000
committerivan <ivan@13f79535-47bb-0310-9956-ffa450edef68>2019-05-12 15:03:36 +0000
commitbd1e1eb4758023699242efbd287bb682c695075d (patch)
treeefa6ecd972c198fec233b51885064f6e013ca17c
parent02ba2ba252bc69739742bc910884ccbdcf4d42f7 (diff)
downloadlibapr-bd1e1eb4758023699242efbd287bb682c695075d.tar.gz
On 'xmllite' branch: Add initial XmlLite based XML parser implementation.
git-svn-id: http://svn.apache.org/repos/asf/apr/apr/branches/xmllite@1859151 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--xml/apr_xml_xmllite.c588
1 files changed, 573 insertions, 15 deletions
diff --git a/xml/apr_xml_xmllite.c b/xml/apr_xml_xmllite.c
index 73d8b1fe4..990c17502 100644
--- a/xml/apr_xml_xmllite.c
+++ b/xml/apr_xml_xmllite.c
@@ -15,11 +15,12 @@
*/
#include "apr.h"
+#include "apr_arch_utf8.h"
#if APU_USE_XMLLITE
#include "apr_xml.h"
-typedef void * XML_Parser;
+typedef struct xmllite_parser_s* XML_Parser;
typedef int XML_Error;
#include "apr_xml_internal.h"
@@ -31,39 +32,581 @@ typedef void * LPMSG;
#include <xmllite.h>
+#include "apr_xml_internal.h"
+
+typedef struct xml_stream_t {
+ ISequentialStream sequental_stream;
+ ULONG refcount;
+ const char *data;
+ apr_size_t remaining;
+ int is_final;
+} xml_stream_t;
+
+struct xmllite_parser_s
+{
+ IXmlReader *xml_reader;
+ xml_stream_t *input_stream;
+ apr_pool_t *iterpool;
+ apr_status_t (*current_state)(apr_xml_parser *parser, apr_pool_t *scratch_pool);
+
+ void (*start_func)(void *userdata, const char *name, const char **attrs);
+ void (*end_func)(void *userdata, const char *name);
+ void (*cdata_func)(void *userdata, const char *data, int len);
+};
+
+static HRESULT STDMETHODCALLTYPE
+stream_QueryInterface(ISequentialStream * This, REFIID riid, void **ppvObject)
+{
+ xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
+
+ if (IsEqualIID(riid, &IID_IUnknown) ||
+ IsEqualIID(riid, &IID_ISequentialStream))
+ {
+ InterlockedIncrement(&obj->refcount);
+ *ppvObject = &obj->sequental_stream;
+ return S_OK;
+ }
+ else
+ {
+ *ppvObject = NULL;
+ return E_NOINTERFACE;
+ }
+}
+
+static ULONG STDMETHODCALLTYPE
+stream_AddRef(ISequentialStream * This)
+{
+ xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
+ return InterlockedIncrement(&obj->refcount);
+}
+
+static ULONG STDMETHODCALLTYPE
+stream_Release(ISequentialStream * This)
+{
+ xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
+ ULONG refcount = InterlockedDecrement(&obj->refcount);
+
+ if (refcount == 0) {
+ free(obj);
+ }
+
+ return refcount;
+}
+
+static HRESULT STDMETHODCALLTYPE
+stream_Read(ISequentialStream * This,
+ void *pv,
+ ULONG cb,
+ ULONG *pcbRead)
+{
+ xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
+ ULONG read = cb;
+
+ if (read > obj->remaining) {
+ read = (ULONG) obj->remaining;
+ }
+
+ memcpy(pv, obj->data, read);
+ obj->data += read;
+ obj->remaining -= read;
+ *pcbRead = read;
+
+ if (read == cb) {
+ return S_OK;
+ }
+ else if (read < cb && !obj->is_final) {
+ return E_PENDING;
+ }
+ else
+ {
+ return S_FALSE;
+ }
+}
+
+static HRESULT STDMETHODCALLTYPE
+stream_Write(ISequentialStream * This,
+ const void *pv,
+ ULONG cb,
+ ULONG *pcbWritten)
+{
+ xml_stream_t *obj = CONTAINING_RECORD(This, xml_stream_t, sequental_stream);
+
+ return E_NOTIMPL;
+}
+
+static ISequentialStreamVtbl stream_vtable =
+{
+ stream_QueryInterface,
+ stream_AddRef,
+ stream_Release,
+ stream_Read,
+ stream_Write
+};
+
+static apr_status_t cleanup_xml_stream(void *ctx)
+{
+ xml_stream_t *xml_stream = ctx;
+
+ ISequentialStream_Release(&xml_stream->sequental_stream);
+
+ return APR_SUCCESS;
+}
+
+static xml_stream_t *create_xml_stream(apr_pool_t *pool)
+{
+ xml_stream_t *xml_stream = malloc(sizeof(*xml_stream));
+ if (xml_stream == NULL) {
+ return NULL;
+ }
+
+ memset(xml_stream, 0, sizeof(*xml_stream));
+ xml_stream->sequental_stream.lpVtbl = &stream_vtable;
+ xml_stream->refcount = 1;
+
+ apr_pool_cleanup_register(pool, xml_stream, cleanup_xml_stream,
+ apr_pool_cleanup_null);
+
+ return xml_stream;
+}
+
static apr_status_t cleanup_parser(void *ctx)
{
apr_xml_parser *parser = ctx;
+ if (parser->xp->xml_reader)
+ {
+ IXmlReader_Release(parser->xp->xml_reader);
+ parser->xp->xml_reader = NULL;
+ }
+
return APR_SUCCESS;
}
-static apr_status_t xmllite_parse(apr_xml_parser* parser, const char* data,
- apr_size_t sz, int final)
+static apr_status_t wstr2utf(const char **utf_p, apr_size_t *utf_len_p,
+ LPCWSTR wstr, apr_size_t wlen,
+ apr_pool_t *pool)
{
+ apr_size_t result_len;
+ char *result;
+
+ if (wlen > 0) {
+ apr_status_t status;
+ apr_size_t bufsize = wlen * 3;
+ apr_size_t outbytes = bufsize;
+
+ result = apr_palloc(pool, outbytes + 1);
+ if (!result) {
+ return APR_ENOMEM;
+ }
+
+ status = apr_conv_ucs2_to_utf8(wstr, &wlen, result, &outbytes);
+ if (status) {
+ return status;
+ }
+
+ result_len = bufsize - outbytes;
+ result[result_len] = 0;
+ }
+ else {
+ result = "";
+ result_len = 0;
+ }
+
+ *utf_p = result;
+ if (utf_len_p) {
+ *utf_len_p = result_len;
+ }
+
return APR_SUCCESS;
}
-static XMLParserImpl xml_parser_xmllite = {
- xmllite_parse,
- cleanup_parser
-};
+static char * get_xmllite_errmsg(HRESULT hr)
+{
+ switch(hr)
+ {
+ case MX_E_INPUTEND:
+ return "unexpected end of input";
+ case MX_E_ENCODING:
+ return "unrecognized encoding";
+ case MX_E_ENCODINGSWITCH:
+ return "unable to switch the encoding";
+ case MX_E_ENCODINGSIGNATURE:
+ return "unrecognized input signature";
+ case WC_E_WHITESPACE:
+ return "whitespace expected";
+ case WC_E_SEMICOLON:
+ return "semicolon expected";
+ case WC_E_GREATERTHAN:
+ return "'>' expected";
+ case WC_E_QUOTE:
+ return "quote expected";
+ case WC_E_EQUAL:
+ return "equal expected";
+ case WC_E_LESSTHAN:
+ return "well-formedness constraint: no '<' in attribute value";
+ case WC_E_HEXDIGIT:
+ return "hexadecimal digit expected";
+ case WC_E_DIGIT:
+ return "decimal digit expected";
+ case WC_E_LEFTBRACKET:
+ return "'[' expected";
+ case WC_E_LEFTPAREN:
+ return "'(' expected";
+ case WC_E_XMLCHARACTER:
+ return "illegal xml character";
+ case WC_E_NAMECHARACTER:
+ return "illegal name character";
+ case WC_E_SYNTAX:
+ return "incorrect document syntax";
+ case WC_E_CDSECT:
+ return "incorrect CDATA section syntax";
+ case WC_E_COMMENT:
+ return "incorrect comment syntax";
+ case WC_E_CONDSECT:
+ return "incorrect conditional section syntax";
+ case WC_E_DECLATTLIST:
+ return "incorrect ATTLIST declaration syntax";
+ case WC_E_DECLDOCTYPE:
+ return "incorrect DOCTYPE declaration syntax";
+ case WC_E_DECLELEMENT:
+ return "incorrect ELEMENT declaration syntax";
+ case WC_E_DECLENTITY:
+ return "incorrect ENTITY declaration syntax";
+ case WC_E_DECLNOTATION:
+ return "incorrect NOTATION declaration syntax";
+ case WC_E_NDATA:
+ return "NDATA expected";
+ case WC_E_PUBLIC:
+ return "PUBLIC expected";
+ case WC_E_SYSTEM:
+ return "SYSTEM expected";
+ case WC_E_NAME:
+ return "name expected";
+ case WC_E_ROOTELEMENT:
+ return "one root element";
+ case WC_E_ELEMENTMATCH:
+ return "well-formedness constraint: element type match";
+ case WC_E_UNIQUEATTRIBUTE:
+ return "well-formedness constraint: unique attribute spec";
+ case WC_E_TEXTXMLDECL:
+ return "text/xmldecl not at the beginning of input";
+ case WC_E_LEADINGXML:
+ return "leading \"xml\"";
+ case WC_E_TEXTDECL:
+ return "incorrect text declaration syntax";
+ case WC_E_XMLDECL:
+ return "incorrect xml declaration syntax";
+ case WC_E_ENCNAME:
+ return "incorrect encoding name syntax";
+ case WC_E_PUBLICID:
+ return "incorrect public identifier syntax";
+ case WC_E_PESINTERNALSUBSET:
+ return "well-formedness constraint: pes in internal subset";
+ case WC_E_PESBETWEENDECLS:
+ return "well-formedness constraint: pes between declarations";
+ case WC_E_NORECURSION:
+ return "well-formedness constraint: no recursion";
+ case WC_E_ENTITYCONTENT:
+ return "entity content not well formed";
+ case WC_E_UNDECLAREDENTITY:
+ return "well-formedness constraint: undeclared entity";
+ case WC_E_PARSEDENTITY:
+ return "well-formedness constraint: parsed entity";
+ case WC_E_NOEXTERNALENTITYREF:
+ return "well-formedness constraint: no external entity references";
+ case WC_E_PI:
+ return "incorrect processing instruction syntax";
+ case WC_E_SYSTEMID:
+ return "incorrect system identifier syntax";
+ case WC_E_QUESTIONMARK:
+ return "'?' expected";
+ case WC_E_CDSECTEND:
+ return "no ']]>' in element content";
+ case WC_E_MOREDATA:
+ return "not all chunks of value have been read";
+ case WC_E_DTDPROHIBITED:
+ return "DTD was found but is prohibited";
+ case WC_E_INVALIDXMLSPACE:
+ return "xml:space attribute with invalid value";
+ case NC_E_QNAMECHARACTER:
+ return "illegal qualified name character";
+ case NC_E_QNAMECOLON:
+ return "multiple colons in qualified name";
+ case NC_E_NAMECOLON:
+ return "colon in name";
+ case NC_E_DECLAREDPREFIX:
+ return "declared prefix";
+ case NC_E_UNDECLAREDPREFIX:
+ return "undeclared prefix";
+ case NC_E_EMPTYURI:
+ return "non default namespace with empty uri";
+ case NC_E_XMLPREFIXRESERVED:
+ return "\"xml\" prefix is reserved and must have the "
+ "http://www.w3.org/XML/1998/namespace URI";
+ case NC_E_XMLNSPREFIXRESERVED:
+ return "\"xmlns\" prefix is reserved for use by XML";
+ case NC_E_XMLURIRESERVED:
+ return "xml namespace URI (http://www.w3.org/XML/1998/namespace) must "
+ "be assigned only to prefix \"xml\"";
+ case NC_E_XMLNSURIRESERVED:
+ return "xmlns namespace URI (http://www.w3.org/2000/xmlns/) is "
+ "reserved and must not be used";
+ case SC_E_MAXELEMENTDEPTH:
+ return "element depth exceeds limit";
+ case SC_E_MAXENTITYEXPANSION:
+ return "entity expansion exceeds limit";
+ case XML_E_INVALID_DECIMAL:
+ return "character in character entity is not a decimal digit "
+ "as was expected.";
+ case XML_E_INVALID_HEXIDECIMAL:
+ return "character in character entity is not a hexadecimal "
+ "digit as was expected.";
+ case XML_E_INVALID_UNICODE:
+ return "character entity has invalid Unicode value.";
+ default:
+ return "";
+ }
+}
-static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+static apr_status_t handle_xmllite_err(apr_xml_parser *parser, HRESULT hr)
+{
+ parser->xp_err = hr;
-XMLParserImpl* apr_xml_get_parser_impl(void)
+ parser->xp_msg = get_xmllite_errmsg(hr);
+ /* this misnomer is used as a test for (any) parser error. */
+ parser->error = APR_XML_ERROR_EXPAT;
+
+ return APR_EGENERAL;
+}
+
+static apr_status_t
+cdata_state(apr_xml_parser *parser,
+ apr_pool_t *scratch_pool);
+
+static apr_status_t
+read_state(apr_xml_parser *parser,
+ apr_pool_t *scratch_pool)
+{
+ HRESULT hr;
+ XmlNodeType node_type;
+ apr_status_t status;
+
+ hr = IXmlReader_Read(parser->xp->xml_reader, &node_type);
+ if (hr == E_PENDING) {
+ return APR_EAGAIN;
+ }
+ else if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ if (node_type == XmlNodeType_Element) {
+ LPCWSTR wname;
+ UINT wname_len;
+ ULONG attr_count;
+ char **attrs;
+ const char* elem_name;
+
+ hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
+ &wname, &wname_len);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool);
+ if (status) {
+ return status;
+ }
+
+ hr = IXmlReader_GetAttributeCount(parser->xp->xml_reader, &attr_count);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ if (attr_count > 0) {
+ ULONG i;
+
+ attrs = apr_palloc(parser->p, sizeof(char*) * (attr_count + 1) * 2);
+
+ hr = IXmlReader_MoveToFirstAttribute(parser->xp->xml_reader);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ for (i = 0; i < attr_count; i++) {
+ hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
+ &wname, &wname_len);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ status = wstr2utf(&attrs[i * 2], NULL, wname, wname_len,
+ scratch_pool);
+ if (status) {
+ return status;
+ }
+
+ hr = IXmlReader_GetValue(parser->xp->xml_reader,
+ &wname, &wname_len);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ status = wstr2utf(&attrs[i * 2 + 1], NULL, wname, wname_len,
+ scratch_pool);
+ if (status) {
+ return status;
+ }
+
+ hr = IXmlReader_MoveToNextAttribute(parser->xp->xml_reader);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+ }
+
+ attrs[i * 2] = NULL;
+ attrs[i * 2 + 1] = NULL;
+
+ hr = IXmlReader_MoveToElement(parser->xp->xml_reader);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+ }
+ else {
+ static char* no_attrs[] = { NULL, NULL };
+ attrs = no_attrs;
+ }
+
+ parser->xp->start_func(parser, elem_name, attrs);
+
+ if (IXmlReader_IsEmptyElement(parser->xp->xml_reader)) {
+ parser->xp->end_func(parser, elem_name);
+ }
+ }
+ else if (node_type == XmlNodeType_EndElement) {
+ LPCWSTR wname;
+ UINT wname_len;
+ const char *elem_name;
+
+ hr = IXmlReader_GetQualifiedName(parser->xp->xml_reader,
+ &wname, &wname_len);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ status = wstr2utf(&elem_name, NULL, wname, wname_len, scratch_pool);
+ if (status) {
+ return status;
+ }
+
+ parser->xp->end_func(parser, elem_name);
+ }
+ else if (node_type == XmlNodeType_CDATA ||
+ node_type == XmlNodeType_Text) {
+ parser->xp->current_state = cdata_state;
+ }
+ else if (node_type == XmlNodeType_Whitespace) {
+ UINT depth;
+ hr = IXmlReader_GetDepth(parser->xp->xml_reader, &depth);
+ if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ /* Report whitespaces as cdata (the same as Expat does), but
+ ignore them when depth == 0. */
+ if (depth > 0) {
+ parser->xp->current_state = cdata_state;
+ }
+ }
+ else if (node_type == XmlNodeType_None)
+ {
+ return APR_EOF;
+ }
+
+ return APR_SUCCESS;
+}
+
+static apr_status_t
+cdata_state(apr_xml_parser *parser,
+ apr_pool_t *scratch_pool)
{
- return &xml_parser_xmllite;
+ HRESULT hr;
+ apr_status_t status;
+ WCHAR buf[512];
+ UINT read_count;
+
+ hr = IXmlReader_ReadValueChunk(parser->xp->xml_reader, buf,
+ sizeof(buf) / sizeof(buf[0]),
+ &read_count);
+ if (hr == E_PENDING) {
+ return APR_EAGAIN;
+ }
+ else if (FAILED(hr)) {
+ return handle_xmllite_err(parser, hr);
+ }
+
+ if (read_count > 0) {
+ const char *cdata;
+ apr_size_t cdata_len;
+
+ status = wstr2utf(&cdata, &cdata_len, buf, read_count, scratch_pool);
+ if (status) {
+ return status;
+ }
+
+ parser->xp->cdata_func(parser, cdata, (int) cdata_len);
+ }
+
+ if (hr == S_FALSE) {
+ parser->xp->current_state = read_state;
+ }
+
+ return APR_SUCCESS;
}
+static apr_status_t do_parse(apr_xml_parser *parser,
+ const char *data, apr_size_t len,
+ int is_final)
+{
+ apr_status_t status;
+ apr_pool_t *iterpool = parser->xp->iterpool;
+
+ parser->xp->input_stream->data = data;
+ parser->xp->input_stream->remaining = len;
+ parser->xp->input_stream->is_final = is_final;
+
+ while (TRUE)
+ {
+ apr_pool_clear(iterpool);
+
+ status = parser->xp->current_state(parser, iterpool);
+ if (status != APR_SUCCESS) {
+ break;
+ }
+ }
+
+ if (status == APR_EAGAIN || status == APR_EOF)
+ {
+ status = APR_SUCCESS;
+ }
+
+ return status;
+}
+
+static XMLParserImpl xml_parser_xmllite = {
+ do_parse,
+ cleanup_parser
+};
+
+XMLParserImpl* apr_xml_get_parser_impl(void) { return &xml_parser_xmllite; }
+static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
+
apr_xml_parser* apr_xml_parser_create_internal(apr_pool_t *pool,
void *start_func,
void *end_func,
void *cdata_func)
{
apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
- IXmlReader *xml_reader;
HRESULT hr;
parser->impl = apr_xml_get_parser_impl();
@@ -73,15 +616,30 @@ apr_xml_parser* apr_xml_parser_create_internal(apr_pool_t *pool,
/* ### is there a way to avoid hard-coding this? */
apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
- apr_pool_cleanup_register(pool, parser, cleanup_parser,
- apr_pool_cleanup_null);
+ parser->xp = apr_pcalloc(pool, sizeof(struct xmllite_parser_s));
+ parser->xp->current_state = read_state;
+ parser->xp->start_func = start_func;
+ parser->xp->end_func = end_func;
+ parser->xp->cdata_func = cdata_func;
+ apr_pool_create(&parser->xp->iterpool, pool);
- hr = CreateXmlReader(&IID_IXmlReader, &xml_reader, NULL);
+ parser->xp->input_stream = create_xml_stream(pool);
+ if (parser->xp->input_stream == NULL) {
+ (*apr_pool_abort_get(pool))(APR_ENOMEM);
+ return NULL;
+ }
+
+ hr = CreateXmlReader(&IID_IXmlReader, &parser->xp->xml_reader, NULL);
if (FAILED(hr)) {
return NULL;
}
+ apr_pool_cleanup_register(pool, parser, cleanup_parser, apr_pool_cleanup_null);
- IXmlReader_Release(xml_reader);
+ hr = IXmlReader_SetInput(parser->xp->xml_reader,
+ (IUnknown*) &parser->xp->input_stream->sequental_stream);
+ if (FAILED(hr)) {
+ return NULL;
+ }
return parser;
}