summaryrefslogtreecommitdiff
path: root/xml_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'xml_parser.c')
-rw-r--r--xml_parser.c1183
1 files changed, 1183 insertions, 0 deletions
diff --git a/xml_parser.c b/xml_parser.c
new file mode 100644
index 00000000..dfec5a78
--- /dev/null
+++ b/xml_parser.c
@@ -0,0 +1,1183 @@
+/*
+ * parser.c : an XML 1.0 non-verifying parser
+ *
+ * See Copyright for the status of this software.
+ *
+ * $Id$
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h> /* for memset() only */
+#include <malloc.h>
+#include <sys/stat.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+#include "xml_tree.h"
+#include "xml_parser.h"
+#include "xml_entities.h"
+
+/*
+ * A few macros needed to help building the parser.
+ */
+
+#ifdef UNICODE
+/*
+ * UNICODE version of the macros. Incomplete now TODO !!!!
+ */
+#define IS_CHAR(c) \
+ (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
+ (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
+
+#define SKIP_BLANKS(p) \
+ while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
+ (*(p) == 0x3000)) (p)++;
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_BASECHAR(c) \
+ ((((c) >= 0x41) && ((c) <= 0x5a)) || \
+ (((c) >= 0x61) && ((c) <= 0x7a)) || \
+ (((c) >= 0xaa) && ((c) <= 0x5b)) || \
+ (((c) >= 0xc0) && ((c) <= 0xd6)) || \
+ (((c) >= 0xd8) && ((c) <= 0xf6)) || \
+ (((c) >= 0xf8) && ((c) <= 0xff)) || \
+ ((c) == 0xba))
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_COMBINING(c) 0
+
+#define IS_IGNORABLE(c) \
+ ((((c) >= 0x200c) && ((c) <= 0x200f)) || \
+ (((c) >= 0x202a) && ((c) <= 0x202e)) || \
+ (((c) >= 0x206a) && ((c) <= 0x206f)) || \
+ ((c) == 0xfeff))
+
+#define IS_EXTENDER(c) \
+ (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
+ ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
+ ((c) == 0xec6) || ((c) == 0x3005) \
+ (((c) >= 0x3031) && ((c) <= 0x3035)) || \
+ (((c) >= 0x309b) && ((c) <= 0x309e)) || \
+ (((c) >= 0x30fc) && ((c) <= 0x30fe)) || \
+ (((c) >= 0xff70) && ((c) <= 0xff9e)) || \
+ ((c) == 0xff9f))
+
+#define IS_IDEOGRAPHIC(c) \
+ ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
+ (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
+ (((c) >= 0x3021) && ((c) <= 0x3029)) || \
+ ((c) == 0x3007))
+
+#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
+
+/* I'm too lazy to complete this one ! */
+#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
+#else
+/*
+ * 8bits / ASCII version of the macros.
+ */
+#define IS_CHAR(c) \
+ (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20))
+
+#define IS_BASECHAR(c) \
+ ((((c) >= 0x41) && ((c) <= 0x5a)) || \
+ (((c) >= 0x61) && ((c) <= 0x7a)) || \
+ (((c) >= 0xaa) && ((c) <= 0x5b)) || \
+ (((c) >= 0xc0) && ((c) <= 0xd6)) || \
+ (((c) >= 0xd8) && ((c) <= 0xf6)) || \
+ (((c) >= 0xf8) && ((c) <= 0xff)) || \
+ ((c) == 0xba))
+
+#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
+
+#define IS_LETTER(c) IS_BASECHAR(c)
+
+#define IS_COMBINING(c) 0
+
+#define IS_IGNORABLE(c) 0
+
+#define IS_EXTENDER(c) ((c) == 0xb7)
+
+#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
+#endif
+
+
+#define SKIP_EOL(p) \
+ if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
+ if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
+
+#define SKIP_BLANKS(p) \
+ while (IS_BLANK(*(p))) (p)++;
+
+#define MOVETO_ENDTAG(p) \
+ while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
+
+#define MOVETO_STARTTAG(p) \
+ while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
+
+/*
+ * Forward definition for recusive behaviour.
+ */
+xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
+
+/*
+ * xmlHandleData : this routine represent's the specific application
+ * behaviour when reading a piece of text.
+ *
+ * For example in WebDav, any piece made only of blanks is eliminated
+ */
+
+CHAR *xmlHandleData(CHAR *in) {
+ CHAR *cur;
+
+ if (in == NULL) return(NULL);
+ cur = in;
+ while (IS_CHAR(*cur)) {
+ if (!IS_BLANK(*cur)) goto not_blank;
+ cur++;
+ }
+ free(in);
+ return(NULL);
+
+not_blank:
+ return(in);
+}
+
+/*
+ * xmlStrndup : a strdup for array of CHAR's
+ */
+
+CHAR *xmlStrndup(const CHAR *cur, int len) {
+ CHAR *ret = malloc((len + 1) * sizeof(CHAR));
+
+ if (ret == NULL) {
+ fprintf(stderr, "malloc of %d byte failed\n",
+ (len + 1) * sizeof(CHAR));
+ return(NULL);
+ }
+ memcpy(ret, cur, len * sizeof(CHAR));
+ ret[len] = 0;
+ return(ret);
+}
+
+/*
+ * xmlStrdup : a strdup for CHAR's
+ */
+
+CHAR *xmlStrdup(const CHAR *cur) {
+ const CHAR *p = cur;
+
+ while (IS_CHAR(*p)) p++;
+ return(xmlStrndup(cur, p - cur));
+}
+
+/*
+ * xmlStrcmp : a strcmp for CHAR's
+ */
+
+int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
+ register int tmp;
+
+ do {
+ tmp = *str1++ - *str2++;
+ if (tmp != 0) return(tmp);
+ } while ((*str1 != 0) && (*str2 != 0));
+ return (*str1 - *str2);
+}
+
+/*
+ * xmlStrncmp : a strncmp for CHAR's
+ */
+
+int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
+ register int tmp;
+
+ if (len <= 0) return(0);
+ do {
+ tmp = *str1++ - *str2++;
+ if (tmp != 0) return(tmp);
+ len--;
+ if (len <= 0) return(0);
+ } while ((*str1 != 0) && (*str2 != 0));
+ return (*str1 - *str2);
+}
+
+/*
+ * xmlStrchr : a strchr for CHAR's
+ */
+
+CHAR *xmlStrchr(const CHAR *str, CHAR val) {
+ while (*str != 0) {
+ if (*str == val) return((CHAR *) str);
+ str++;
+ }
+ return(NULL);
+}
+
+/*
+ * xmlParseName : parse an XML name.
+ */
+
+CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
+ const CHAR *q;
+ CHAR *ret = NULL;
+
+ /*
+ * Name ::= (Letter | '_') (NameChar)*
+ */
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || (ctxt->cur[0] == '_') ||
+ (ctxt->cur[0] == ':') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+
+ ret = xmlStrndup(q, ctxt->cur - q);
+
+ return(ret);
+}
+
+/*
+ * Parse and return a string between quotes or doublequotes
+ */
+CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
+ CHAR *ret = NULL;
+ const CHAR *q;
+
+ if (ctxt->cur[0] == '"') {
+ ctxt->cur++;
+ q = ctxt->cur;
+ while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
+ if (ctxt->cur[0] != '"')
+ fprintf(stderr, "String not closed \"%.50s\n", q);
+ else {
+ ret = xmlStrndup(q, ctxt->cur - q);
+ ctxt->cur++;
+ }
+ } else if (ctxt->cur[0] == '\''){
+ ctxt->cur++;
+ q = ctxt->cur;
+ while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
+ if (ctxt->cur[0] != '\'')
+ fprintf(stderr, "String not closed '%.50s\n", q);
+ else {
+ ret = xmlStrndup(q, ctxt->cur - q);
+ ctxt->cur++;
+ }
+ }
+ return(ret);
+}
+
+/*
+ * Skip an XML (SGML) comment <!-- .... -->
+ *
+ * TODO !!!! Save the comment in the tree !!!
+ */
+void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
+ const CHAR *q, *start;
+ const CHAR *r;
+
+ /*
+ * An extra check may avoid errors and isn't that costly !
+ */
+ if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
+ (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;
+
+ ctxt->cur += 4;
+ start = q = ctxt->cur;
+ ctxt->cur++;
+ r = ctxt->cur;
+ ctxt->cur++;
+ while (IS_CHAR(ctxt->cur[0]) &&
+ ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
+ (*r != '-') || (*q != '-'))) {
+ ctxt->cur++;r++;q++;
+ }
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
+ ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
+ } else {
+ ctxt->cur++;
+ }
+}
+
+/*
+ * xmlParseNamespace: parse specific '<?namespace ...' constructs.
+ */
+
+void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
+ CHAR *href = NULL;
+ CHAR *AS = NULL;
+ int garbage = 0;
+
+ /*
+ * We just skipped "namespace" or "xml:namespace"
+ */
+ SKIP_BLANKS(ctxt->cur);
+
+ while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
+ /*
+ * We can have "ns" or "prefix" attributes
+ * Old encoding as 'href' or 'AS' attributes is still supported
+ */
+ if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
+ garbage = 0;
+ ctxt->cur += 2;
+ SKIP_BLANKS(ctxt->cur);
+
+ if (ctxt->cur[0] != '=') continue;
+ ctxt->cur++;
+ SKIP_BLANKS(ctxt->cur);
+
+ href = xmlParseQuotedString(ctxt);
+ SKIP_BLANKS(ctxt->cur);
+ } else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
+ (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
+ garbage = 0;
+ ctxt->cur += 4;
+ SKIP_BLANKS(ctxt->cur);
+
+ if (ctxt->cur[0] != '=') continue;
+ ctxt->cur++;
+ SKIP_BLANKS(ctxt->cur);
+
+ href = xmlParseQuotedString(ctxt);
+ SKIP_BLANKS(ctxt->cur);
+ } else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
+ (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
+ (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
+ garbage = 0;
+ ctxt->cur += 6;
+ SKIP_BLANKS(ctxt->cur);
+
+ if (ctxt->cur[0] != '=') continue;
+ ctxt->cur++;
+ SKIP_BLANKS(ctxt->cur);
+
+ AS = xmlParseQuotedString(ctxt);
+ SKIP_BLANKS(ctxt->cur);
+ } else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
+ garbage = 0;
+ ctxt->cur += 2;
+ SKIP_BLANKS(ctxt->cur);
+
+ if (ctxt->cur[0] != '=') continue;
+ ctxt->cur++;
+ SKIP_BLANKS(ctxt->cur);
+
+ AS = xmlParseQuotedString(ctxt);
+ SKIP_BLANKS(ctxt->cur);
+ } else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
+ garbage = 0;
+ ctxt->cur ++;
+ } else {
+ /*
+ * Found garbage when parsing the namespace
+ */
+ if (!garbage) fprintf(stderr,
+ "\nxmlParseNamespace found garbage: ");
+ fprintf(stderr, "%c", ctxt->cur[0]);
+ ctxt->cur++;
+ }
+ }
+
+ MOVETO_ENDTAG(ctxt->cur);
+ ctxt->cur++;
+
+ /*
+ * Register the DTD.
+ */
+ if (href != NULL)
+ xmlNewDtd(ctxt->doc, href, AS);
+
+ if (AS != NULL) free(AS);
+ if (href != NULL) free(href);
+}
+
+/*
+ * xmlParsePI: parse an XML Processing Instruction.
+ */
+
+void xmlParsePI(xmlParserCtxtPtr ctxt) {
+ if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+ /*
+ * this is a Processing Instruction.
+ */
+ ctxt->cur += 2;
+
+ /*
+ * Special for WebDav, support for the Processing Instruction
+ * '<?namespace ...' contruct in the header of the XML document.
+ */
+ if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'a') &&
+ (ctxt->cur[2] == 'm') && (ctxt->cur[3] == 'e') &&
+ (ctxt->cur[4] == 's') && (ctxt->cur[5] == 'p') &&
+ (ctxt->cur[6] == 'a') && (ctxt->cur[7] == 'c') &&
+ (ctxt->cur[8] == 'e')) {
+ ctxt->cur += 9;
+ xmlParseNamespace(ctxt);
+ } else if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
+ (ctxt->cur[2] == 'l') && (ctxt->cur[3] == ':') &&
+ (ctxt->cur[4] == 'n') && (ctxt->cur[5] == 'a') &&
+ (ctxt->cur[6] == 'm') && (ctxt->cur[7] == 'e') &&
+ (ctxt->cur[8] == 's') && (ctxt->cur[9] == 'p') &&
+ (ctxt->cur[10] == 'a') && (ctxt->cur[11] == 'c') &&
+ (ctxt->cur[12] == 'e')) {
+ ctxt->cur += 13;
+ xmlParseNamespace(ctxt);
+ } else {
+ /* Unknown PI, ignore it ! */
+ fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n",
+ ctxt->cur);
+ MOVETO_ENDTAG(ctxt->cur);
+ ctxt->cur++;
+ }
+ }
+}
+
+/*
+ * xmlParseAttribute: parse a start of tag.
+ *
+ * Attribute ::= Name Eq AttValue
+ */
+
+void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
+ const CHAR *q;
+ CHAR *name, *value = NULL;
+
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+ return;
+ }
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+ (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+ name = xmlStrndup(q, ctxt->cur - q);
+
+ /*
+ * We should have the equal, we are laxist here and allow attributes
+ * without values and extra spaces.
+ */
+ SKIP_BLANKS(ctxt->cur);
+ if (ctxt->cur[0] == '=') {
+ ctxt->cur++;
+ SKIP_BLANKS(ctxt->cur);
+ if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
+ fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
+ q);
+ } else
+ value = xmlParseQuotedString(ctxt);
+ }
+
+ /*
+ * Add the attribute to the node.
+ */
+ if (name != NULL) {
+ xmlNewProp(node, name, value);
+ free(name);
+ }
+ if ( value != NULL )
+ free(value);
+}
+
+/*
+ * xmlParseStartTag: parse a start of tag.
+ */
+
+xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
+ const CHAR *q;
+ CHAR *ns, *name;
+ xmlDtdPtr dtd = NULL;
+ xmlNodePtr ret = NULL;
+
+ /*
+ * Theorically one should just parse a Name, but with the addition
+ * of the namespace needed for WebDav, it's a bit more complicated
+ * since the element name may be prefixed by a namespace prefix.
+ *
+ * QName ::= (NSPart ':')? LocalPart
+ * NSPart ::= Name
+ * LocalPart ::= Name
+ * STag ::= '<' QName (S Attribute)* S? '>'
+ *
+ * instead of :
+ *
+ * STag ::= '<' QName (S Attribute)* S? '>'
+ */
+ if (ctxt->cur[0] != '<') return(NULL);
+ ctxt->cur++;
+
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+ (ctxt->cur[0] == '_') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+
+ if (ctxt->cur[0] == ':') {
+ ns = xmlStrndup(q, ctxt->cur - q);
+
+ ctxt->cur++; /* skip the column */
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+ fprintf(stderr,
+ "Start tag : no element name after namespace identifier %.20s\n",
+ q);
+ free(ns);
+ return(NULL);
+ }
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+ (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+ name = xmlStrndup(q, ctxt->cur - q);
+
+ /*
+ * Search the DTD associated to ns.
+ */
+ dtd = xmlSearchDtd(ctxt->doc, ns);
+ if (dtd == NULL)
+ fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
+ free(ns);
+ } else
+ name = xmlStrndup(q, ctxt->cur - q);
+
+ ret = xmlNewNode(dtd, name, NULL);
+
+ /*
+ * Now parse the attributes, it ends up with the ending
+ *
+ * (S Attribute)* S?
+ */
+ SKIP_BLANKS(ctxt->cur);
+ while ((IS_CHAR(ctxt->cur[0])) &&
+ (ctxt->cur[0] != '>') &&
+ ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
+ if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_'))
+ xmlParseAttribute(ctxt, ret);
+ else {
+ /* We should warn TODO !!! */
+ ctxt->cur++;
+ }
+ SKIP_BLANKS(ctxt->cur);
+ }
+
+ return(ret);
+}
+
+/*
+ * xmlParseEndTag: parse an end of tag, note that the '</' part has
+ * already been read.
+ */
+
+void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
+ const CHAR *q;
+ CHAR *ns, *name;
+ xmlDtdPtr dtd = NULL;
+
+ *dtdPtr = NULL;
+ *tagPtr = NULL;
+
+ /*
+ * Theorically one should just parse a Name, but with the addition
+ * of the namespace needed for WebDav, it's a bit more complicated
+ * since the element name may be prefixed by a namespace prefix.
+ *
+ * QName ::= (NSPart ':')? LocalPart
+ * NSPart ::= Name
+ * LocalPart ::= Name
+ * ETag ::= '</' QName S? '>'
+ *
+ * instead of :
+ *
+ * ETag ::= '</' Name S? '>'
+ */
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+ (ctxt->cur[0] == '_') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+
+ if (ctxt->cur[0] == ':') {
+ ns = xmlStrndup(q, ctxt->cur - q);
+
+ ctxt->cur++; /* skip the column */
+ if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+ fprintf(stderr,
+ "End tag : no element name after namespace identifier %.20s\n",
+ q);
+ free(ns);
+ return;
+ }
+ q = ctxt->cur++;
+ while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+ (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+ (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
+ (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+ (IS_EXTENDER(ctxt->cur[0])))
+ ctxt->cur++;
+ name = xmlStrndup(q, ctxt->cur - q);
+
+ /*
+ * Search the DTD associated to ns.
+ */
+ dtd = xmlSearchDtd(ctxt->doc, ns);
+ if (dtd == NULL)
+ fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
+ free(ns);
+ } else
+ name = xmlStrndup(q, ctxt->cur - q);
+
+ *dtdPtr = dtd;
+ *tagPtr = name;
+
+ /*
+ * We should definitely be at the ending "S? '>'" part
+ */
+ SKIP_BLANKS(ctxt->cur);
+ if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
+ fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
+ /*
+ * Note : skipping to the next '>' is probably otherkill,
+ * especially in case the '>' is hust missing.
+ *
+ * Otherwise add:
+ * MOVETO_ENDTAG(ctxt->cur);
+ */
+ } else
+ ctxt->cur++;
+
+ return;
+}
+
+/*
+ * xmlParseCDSect: escaped pure raw content.
+ */
+CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
+ const CHAR *r, *s, *base;
+ CHAR *ret;
+
+ base = ctxt->cur;
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "CData section not finished : %.20s\n", base);
+ return(NULL);
+ }
+ r = ctxt->cur++;
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "CData section not finished : %.20s\n", base);
+ return(NULL);
+ }
+ s = ctxt->cur++;
+ while (IS_CHAR(ctxt->cur[0]) &&
+ ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
+ r++;s++;ctxt->cur++;
+ }
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "CData section not finished : %.20s\n", base);
+ return(NULL);
+ }
+ ret = xmlStrndup(base, ctxt->cur-base);
+
+ return(ret);
+}
+
+/*
+ * xmlParseContent: a content is
+ * (element | PCData | Reference | CDSect | PI | Comment)
+ *
+ * element : starts by '<'
+ * PCData : any CHAR but '&' or '<'
+ * Reference : starts by '&'
+ * CDSect : starts by '<![CDATA['
+ * PI : starts by '<?'
+ */
+
+xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
+ const CHAR *q;
+ CHAR *data = NULL;
+ xmlNodePtr ret = NULL;
+
+ /*
+ * First case : a Processing Instruction.
+ */
+ if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+ xmlParsePI(ctxt);
+ }
+ /*
+ * Second case : a CDSection
+ */
+ if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
+ (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
+ (ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
+ (ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
+ (ctxt->cur[8] == '[')) {
+ ctxt->cur += 9;
+ data = xmlParseCDSect(ctxt);
+ }
+ /*
+ * Third case : a sub-element.
+ */
+ else if (ctxt->cur[0] == '<') {
+ ret = xmlParseElement(ctxt);
+ }
+ /*
+ * Last case, text. Note that References are handled directly.
+ */
+ else {
+ q = ctxt->cur;
+ while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
+
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "Truncated content : %.50s\n", q);
+ return(NULL);
+ }
+
+ /*
+ * Do the Entities decoding...
+ */
+ data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
+ }
+
+ /*
+ * Handle the data if any. If there is no child
+ * add it as content, otherwise create a new node of type text.
+ */
+ if (data != NULL)
+ data = xmlHandleData(data);
+ if (data != NULL) {
+ if (node->childs == NULL)
+ xmlNodeSetContent(node, data);
+ else
+ ret = xmlNewText(data);
+ free(data);
+ }
+
+ return(ret);
+}
+
+/*
+ * xmlParseElement: parse an XML element
+ */
+
+xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
+ xmlNodePtr ret, child;
+ const CHAR *openTag = ctxt->cur;
+ const CHAR *closeTag = ctxt->cur;
+
+ ret = xmlParseStartTag(ctxt);
+ if (ret == NULL) {
+ return(NULL);
+ }
+
+ /*
+ * Check for an Empty Element.
+ */
+ if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
+ ctxt->cur += 2;
+ return(ret);
+ }
+ if (ctxt->cur[0] == '>') ctxt->cur++;
+ else {
+ fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
+ return(NULL);
+ }
+
+ /*
+ * Parse the content of the element:
+ * (element | PCData | Reference | CDSect | PI | Comment) *
+ *
+ * element : starts by '<'
+ * PCData : any CHAR but '&' or '<'
+ * Reference : starts by '&'
+ * CDSect : starts by '<![CDATA['
+ * PI : starts by '<?'
+ *
+ * The loop stops upon detection of an end of tag '</'
+ */
+ while ((IS_CHAR(ctxt->cur[0])) &&
+ ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/'))) {
+ child = xmlParseContent(ctxt, ret);
+ if (child != NULL)
+ xmlAddChild(ret, child);
+ }
+ if (!IS_CHAR(ctxt->cur[0])) {
+ fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
+ return(NULL);
+ }
+
+ /*
+ * parse the end of tag : '</' has been detected.
+ */
+ ctxt->cur += 2;
+ if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */
+ else {
+ CHAR *endTag;
+ xmlDtdPtr endDtd;
+
+ xmlParseEndTag(ctxt, &endDtd, &endTag);
+
+ /*
+ * Check that the Name in the ETag is the same as in the STag.
+ */
+ if (endDtd != ret->dtd) {
+ fprintf(stderr, "Start and End tags don't use the same DTD:\n");
+ fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
+ }
+ if (strcmp(ret->name, endTag)) {
+ fprintf(stderr, "Start and End tags don't use the same name:\n");
+ fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
+ }
+
+ if ( endTag != NULL )
+ free(endTag);
+ }
+
+ return(ret);
+}
+
+/*
+ * xmlParseXMLDecl: parse an XML declaration header
+ */
+
+void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
+ CHAR *version;
+
+ /*
+ * We know that '<?xml' is here.
+ */
+ ctxt->cur += 5;
+
+ /*
+ * Parse the version info
+ */
+ SKIP_BLANKS(ctxt->cur);
+
+ /*
+ * We should have 'version=' here !
+ */
+ if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
+ (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
+ (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
+ (ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
+ ctxt->cur += 8;
+ version = xmlParseQuotedString(ctxt);
+ if (version == NULL)
+ ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+ else {
+ ctxt->doc = xmlNewDoc(version);
+ free(version);
+ }
+ } else {
+ ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+ }
+
+ /*
+ * We should check for Required Markup Declaration TODO !!!!
+ */
+ MOVETO_ENDTAG(ctxt->cur);
+ ctxt->cur++;
+
+}
+
+/*
+ * xmlParseMisc: parse an XML Misc optionnal field.
+ * (Comment | PI | S)*
+ */
+
+void xmlParseMisc(xmlParserCtxtPtr ctxt) {
+ while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
+ ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
+ (ctxt->cur[2] == '-') && (ctxt->cur[2] == '-')) ||
+ IS_BLANK(ctxt->cur[0])) {
+ if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+ xmlParsePI(ctxt);
+ } else if (IS_BLANK(ctxt->cur[0])) {
+ ctxt->cur++;
+ } else
+ xmlParserSkipComment(ctxt);
+ }
+}
+
+/*
+ * xmlParseDocument : parse an XML document and build a tree.
+ */
+
+int xmlParseDocument(xmlParserCtxtPtr ctxt) {
+ /*
+ * We should check for encoding here and plug-in some
+ * conversion code TODO !!!!
+ */
+
+ /*
+ * Wipe out everything which is before the first '<'
+ */
+ SKIP_BLANKS(ctxt->cur);
+
+ /*
+ * Check for the XMLDecl in the Prolog.
+ */
+ if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
+ (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
+ (ctxt->cur[4] == 'l')) {
+ xmlParseXMLDecl(ctxt);
+ /* SKIP_EOL(cur); */
+ SKIP_BLANKS(ctxt->cur);
+ } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
+ (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
+ (ctxt->cur[4] == 'L')) {
+ /*
+ * The first drafts were using <?XML and the final W3C REC
+ * now use <?xml ...
+ */
+ xmlParseXMLDecl(ctxt);
+ /* SKIP_EOL(cur); */
+ SKIP_BLANKS(ctxt->cur);
+ } else {
+ ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+ }
+
+ /*
+ * The Misc part of the Prolog
+ * (Comment | PI | S) *
+ */
+ xmlParseMisc(ctxt);
+
+ /*
+ * Time to start parsing
+ */
+ ctxt->doc->root = xmlParseElement(ctxt);
+
+ return(0);
+}
+
+/*
+ * xmlParseDoc : parse an XML in-memory document and build a tree.
+ */
+
+xmlDocPtr xmlParseDoc(CHAR *cur) {
+ xmlDocPtr ret;
+ xmlParserCtxtPtr ctxt;
+
+ if (cur == NULL) return(NULL);
+
+ ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+ if (ctxt == NULL) {
+ perror("malloc");
+ return(NULL);
+ }
+
+ xmlInitParserCtxt(ctxt);
+ ctxt->base = cur;
+ ctxt->cur = cur;
+
+ xmlParseDocument(ctxt);
+ ret = ctxt->doc;
+ free(ctxt->nodes);
+ free(ctxt);
+
+ return(ret);
+}
+
+/*
+ * xmlParseFile : parse an XML file and build a tree.
+ */
+
+xmlDocPtr xmlParseFile(const char *filename) {
+ xmlDocPtr ret;
+#ifdef HAVE_ZLIB_H
+ gzFile input;
+#else
+ int input;
+#endif
+ int res;
+ struct stat buf;
+ char *buffer;
+ xmlParserCtxtPtr ctxt;
+
+ res = stat(filename, &buf);
+ if (res < 0) return(NULL);
+
+#ifdef HAVE_ZLIB_H
+retry_bigger:
+ buffer = malloc((buf.st_size * 20) + 100);
+#else
+ buffer = malloc(buf.st_size + 100);
+#endif
+ if (buffer == NULL) {
+ perror("malloc");
+ return(NULL);
+ }
+
+ memset(buffer, 0, sizeof(buffer));
+#ifdef HAVE_ZLIB_H
+ input = gzopen (filename, "r");
+ if (input == NULL) {
+ fprintf (stderr, "Cannot read file %s :\n", filename);
+ perror ("gzopen failed");
+ return(NULL);
+ }
+#else
+ input = open (filename, O_RDONLY);
+ if (input < 0) {
+ fprintf (stderr, "Cannot read file %s :\n", filename);
+ perror ("open failed");
+ return(NULL);
+ }
+#endif
+#ifdef HAVE_ZLIB_H
+ res = gzread(input, buffer, 20 * buf.st_size);
+#else
+ res = read(input, buffer, buf.st_size);
+#endif
+ if (res < 0) {
+ fprintf (stderr, "Cannot read file %s :\n", filename);
+#ifdef HAVE_ZLIB_H
+ perror ("gzread failed");
+#else
+ perror ("read failed");
+#endif
+ return(NULL);
+ }
+#ifdef HAVE_ZLIB_H
+ gzclose(input);
+ if (res >= 20 * buf.st_size) {
+ free(buffer);
+ buf.st_size *= 2;
+ goto retry_bigger;
+ }
+ buf.st_size = res;
+#else
+ close(input);
+#endif
+
+
+ ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+ if (ctxt == NULL) {
+ perror("malloc");
+ return(NULL);
+ }
+ buffer[buf.st_size] = '\0';
+
+ xmlInitParserCtxt(ctxt);
+ ctxt->filename = filename;
+ ctxt->base = buffer;
+ ctxt->cur = buffer;
+
+ xmlParseDocument(ctxt);
+ ret = ctxt->doc;
+ free(buffer);
+ free(ctxt->nodes);
+ free(ctxt);
+
+ return(ret);
+}
+
+/*
+ * xmlParseFile : parse an XML memory block and build a tree.
+ */
+
+xmlDocPtr xmlParseMemory(char *buffer, int size) {
+ xmlDocPtr ret;
+ xmlParserCtxtPtr ctxt;
+
+ ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+ if (ctxt == NULL) {
+ perror("malloc");
+ return(NULL);
+ }
+
+ buffer[size - 1] = '\0';
+
+ xmlInitParserCtxt(ctxt);
+ ctxt->base = buffer;
+ ctxt->cur = buffer;
+
+ xmlParseDocument(ctxt);
+ ret = ctxt->doc;
+ free(ctxt->nodes);
+ free(ctxt);
+
+ return(ret);
+}
+
+
+
+
+/* Initialize parser context */
+void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
+{
+ int i;
+
+ ctxt->filename = NULL;
+ ctxt->base = NULL;
+ ctxt->cur = NULL;
+ ctxt->line = 1;
+ ctxt->col = 1;
+ ctxt->doc = NULL;
+ ctxt->depth = 0;
+ ctxt->max_depth = 10;
+ ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
+ if (ctxt->nodes == NULL) {
+ fprintf(stderr, "malloc of %d byte failed\n",
+ ctxt->max_depth * sizeof(xmlNodePtr));
+ ctxt->max_depth = 0;
+ } else {
+ for (i = 0;i < ctxt->max_depth;i++)
+ ctxt->nodes[i] = NULL;
+ }
+}
+
+
+/*
+ * Clear (release owned resources) and reinitialize context
+ */
+void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
+{
+ xmlInitParserCtxt(ctx);
+}
+
+
+/*
+ * Setup the parser context to parse a new buffer; Clears any prior
+ * contents from the parser context. The buffer parameter must not be
+ * NULL, but the filename parameter can be
+ */
+void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
+ const char* filename)
+{
+ xmlClearParserCtxt(ctxt);
+ ctxt->base = buffer;
+ ctxt->cur = buffer;
+ ctxt->filename = filename;
+}
+
+
+
+void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
+{
+ fputs(msg, stderr);
+}