summaryrefslogtreecommitdiff
path: root/include/apr_xml.h
blob: ac7f003b42dfee2342910bf682a31a52cbb111e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file apr_xml.h
 * @brief APR-UTIL XML Library
 */
#ifndef APR_XML_H
#define APR_XML_H

/**
 * @defgroup APR_Util_XML XML 
 * @ingroup APR_Util
 * @{
 */
#include "apr_pools.h"
#include "apr_tables.h"
#include "apr_file_io.h"

#include "apu.h"
#if APR_CHARSET_EBCDIC
#include "apr_xlate.h"
#endif

#ifdef __cplusplus
extern "C" {
#endif

/**
 * @package Apache XML library
 */

/* -------------------------------------------------------------------- */

/* ### these will need to move at some point to a more logical spot */

/** @see apr_text */
typedef struct apr_text apr_text;

/** Structure to keep a linked list of pieces of text */
struct apr_text {
    /** The current piece of text */
    const char *text;
    /** a pointer to the next piece of text */
    struct apr_text *next;
};

/** @see apr_text_header */
typedef struct apr_text_header apr_text_header;

/** A list of pieces of text */
struct apr_text_header {
    /** The first piece of text in the list */
    apr_text *first;
    /** The last piece of text in the list */
    apr_text *last;
};

/**
 * Append a piece of text to the end of a list
 * @param p The pool to allocate out of
 * @param hdr The text header to append to
 * @param text The new text to append
 */
APU_DECLARE(void) apr_text_append(apr_pool_t *p, apr_text_header *hdr,
                                  const char *text);


/* --------------------------------------------------------------------
**
** XML PARSING
*/

/*
** Qualified namespace values
**
** APR_XML_NS_DAV_ID
**    We always insert the "DAV:" namespace URI at the head of the
**    namespace array. This means that it will always be at ID==0,
**    making it much easier to test for.
**
** APR_XML_NS_NONE
**    This special ID is used for two situations:
**
**    1) The namespace prefix begins with "xml" (and we do not know
**       what it means). Namespace prefixes with "xml" (any case) as
**       their first three characters are reserved by the XML Namespaces
**       specification for future use. mod_dav will pass these through
**       unchanged. When this identifier is used, the prefix is LEFT in
**       the element/attribute name. Downstream processing should not
**       prepend another prefix.
**
**    2) The element/attribute does not have a namespace.
**
**       a) No prefix was used, and a default namespace has not been
**          defined.
**       b) No prefix was used, and the default namespace was specified
**          to mean "no namespace". This is done with a namespace
**          declaration of:  xmlns=""
**          (this declaration is typically used to override a previous
**          specification for the default namespace)
**
**       In these cases, we need to record that the elem/attr has no
**       namespace so that we will not attempt to prepend a prefix.
**       All namespaces that are used will have a prefix assigned to
**       them -- mod_dav will never set or use the default namespace
**       when generating XML. This means that "no prefix" will always
**       mean "no namespace".
**
**    In both cases, the XML generation will avoid prepending a prefix.
**    For the first case, this means the original prefix/name will be
**    inserted into the output stream. For the latter case, it means
**    the name will have no prefix, and since we never define a default
**    namespace, this means it will have no namespace.
**
** Note: currently, mod_dav understands the "xmlns" prefix and the
**     "xml:lang" attribute. These are handled specially (they aren't
**     left within the XML tree), so the APR_XML_NS_NONE value won't ever
**     really apply to these values.
*/
#define APR_XML_NS_DAV_ID	0	/**< namespace ID for "DAV:" */
#define APR_XML_NS_NONE		-10	/**< no namespace for this elem/attr */

#define APR_XML_NS_ERROR_BASE	-100	/**< used only during processing */
/** Is this namespace an error? */
#define APR_XML_NS_IS_ERROR(e)	((e) <= APR_XML_NS_ERROR_BASE)

/** @see apr_xml_attr */
typedef struct apr_xml_attr apr_xml_attr;
/** @see apr_xml_elem */
typedef struct apr_xml_elem apr_xml_elem;
/** @see apr_xml_doc */
typedef struct apr_xml_doc apr_xml_doc;

/** apr_xml_attr: holds a parsed XML attribute */
struct apr_xml_attr {
    /** attribute name */
    const char *name;
    /** index into namespace array */
    int ns;

    /** attribute value */
    const char *value;

    /** next attribute */
    struct apr_xml_attr *next;
};

/** apr_xml_elem: holds a parsed XML element */
struct apr_xml_elem {
    /** element name */
    const char *name;
    /** index into namespace array */
    int ns;
    /** xml:lang for attrs/contents */
    const char *lang;

    /** cdata right after start tag */
    apr_text_header first_cdata;
    /** cdata after MY end tag */
    apr_text_header following_cdata;

    /** parent element */
    struct apr_xml_elem *parent;	
    /** next (sibling) element */
    struct apr_xml_elem *next;	
    /** first child element */
    struct apr_xml_elem *first_child;
    /** first attribute */
    struct apr_xml_attr *attr;		

    /* used only during parsing */
    /** last child element */
    struct apr_xml_elem *last_child;
    /** namespaces scoped by this elem */
    struct apr_xml_ns_scope *ns_scope;

    /* used by modules during request processing */
    /** Place for modules to store private data */
    void *priv;
};

/** Is this XML element empty? */
#define APR_XML_ELEM_IS_EMPTY(e) ((e)->first_child == NULL && \
                                  (e)->first_cdata.first == NULL)

/** apr_xml_doc: holds a parsed XML document */
struct apr_xml_doc {
    /** root element */
    apr_xml_elem *root;	
    /** array of namespaces used */
    apr_array_header_t *namespaces;
};

/** Opaque XML parser structure */
typedef struct apr_xml_parser apr_xml_parser;

/**
 * Create an XML parser
 * @param pool The pool for allocating the parser and the parse results.
 * @return The new parser.
 */
APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool);

/**
 * Parse a File, producing a xml_doc
 * @param p      The pool for allocating the parse results.
 * @param parser A pointer to *parser (needed so calling function can get
 *               errors), will be set to NULL on successful completion.
 * @param ppdoc  A pointer to *apr_xml_doc (which has the parsed results in it)
 * @param xmlfd  A file to read from.
 * @param buffer_length Buffer length which would be suitable 
 * @return Any errors found during parsing.
 */
APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
                                             apr_xml_parser **parser,
                                             apr_xml_doc **ppdoc,
                                             apr_file_t *xmlfd,
                                             apr_size_t buffer_length);


/**
 * Feed input into the parser
 * @param parser The XML parser for parsing this data.
 * @param data The data to parse.
 * @param len The length of the data.
 * @return Any errors found during parsing.
 * @remark Use apr_xml_parser_geterror() to get more error information.
 */
APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
                                              const char *data,
                                              apr_size_t len);

/**
 * Terminate the parsing and return the result
 * @param parser The XML parser for parsing this data.
 * @param pdoc The resulting parse information. May be NULL to simply
 *             terminate the parsing without fetching the info.
 * @return Any errors found during the final stage of parsing.
 * @remark Use apr_xml_parser_geterror() to get more error information.
 */
APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
                                              apr_xml_doc **pdoc);

/**
 * Fetch additional error information from the parser.
 * @param parser The XML parser to query for errors.
 * @param errbuf A buffer for storing error text.
 * @param errbufsize The length of the error text buffer.
 * @return The error buffer
 */
APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
                                            char *errbuf,
                                            apr_size_t errbufsize);


/**
 * Converts an XML element tree to flat text 
 * @param p The pool to allocate out of
 * @param elem The XML element to convert
 * @param style How to covert the XML.  One of:
 * <PRE>
 *     APR_XML_X2T_FULL                start tag, contents, end tag 
 *     APR_XML_X2T_INNER               contents only 
 *     APR_XML_X2T_LANG_INNER          xml:lang + inner contents 
 *     APR_XML_X2T_FULL_NS_LANG        FULL + ns defns + xml:lang 
 * </PRE>
 * @param namespaces The namespace of the current XML element
 * @param ns_map Namespace mapping
 * @param pbuf Buffer to put the converted text into
 * @param psize Size of the converted text
 */
APU_DECLARE(void) apr_xml_to_text(apr_pool_t *p, const apr_xml_elem *elem,
                                  int style, apr_array_header_t *namespaces,
                                  int *ns_map, const char **pbuf,
                                  apr_size_t *psize);

/* style argument values: */
#define APR_XML_X2T_FULL         0	/**< start tag, contents, end tag */
#define APR_XML_X2T_INNER        1	/**< contents only */
#define APR_XML_X2T_LANG_INNER   2	/**< xml:lang + inner contents */
#define APR_XML_X2T_FULL_NS_LANG 3	/**< FULL + ns defns + xml:lang */

/**
 * empty XML element
 * @param p The pool to allocate out of
 * @param elem The XML element to empty
 * @return the string that was stored in the XML element
 */
APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t *p,
                                             const apr_xml_elem *elem);

/**
 * quote an XML string
 * Replace '\<', '\>', and '\&' with '\&lt;', '\&gt;', and '\&amp;'.
 * @param p The pool to allocate out of
 * @param s The string to quote
 * @param quotes If quotes is true, then replace '&quot;' with '\&quot;'.
 * @return The quoted string
 * @note If the string does not contain special characters, it is not
 * duplicated into the pool and the original string is returned.
 */
APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
                                               int quotes);

/**
 * Quote an XML element
 * @param p The pool to allocate out of
 * @param elem The element to quote
 */
APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem);

/* manage an array of unique URIs: apr_xml_insert_uri() and APR_XML_URI_ITEM() */

/**
 * return the URI's (existing) index, or insert it and return a new index 
 * @param uri_array array to insert into
 * @param uri The uri to insert
 * @return int The uri's index
 */
APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
                                    const char *uri);

/** Get the URI item for this XML element */
#define APR_XML_GET_URI_ITEM(ary, i) (((const char * const *)(ary)->elts)[i])

#if APR_CHARSET_EBCDIC
/**
 * Convert parsed tree in EBCDIC 
 * @param p The pool to allocate out of
 * @param pdoc The apr_xml_doc to convert.
 * @param xlate The translation handle to use.
 * @return Any errors found during conversion.
 */
APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *p,
                                                     apr_xml_doc *pdoc,
                                                     apr_xlate_t *convset);
#endif

#ifdef __cplusplus
}
#endif
/** @} */
#endif /* APR_XML_H */