1 files changed, 1183 insertions, 0 deletions
diff --git a/xml_parser.c b/xml_parser.c
new file mode 100644
index 00000000..dfec5a78
--- /dev/null
+++ b/xml_parser.c
@@ -0,0 +1,1183 @@
+/*
+ * parser.c : an XML 1.0 non-verifying parser
+ *
+ * See Copyright for the status of this software.
+ *
+ * $Id$
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h> /* for memset() only */
+#include <malloc.h>
+#include <sys/stat.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+#include "xml_tree.h"
+#include "xml_parser.h"
+#include "xml_entities.h"
+
+/*
+ * A few macros needed to help building the parser.
+ */
+
+#ifdef UNICODE
+/*
+ * UNICODE version of the macros. Incomplete now TODO !!!!
+ */
+#define IS_CHAR(c)							\
+    (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
+     (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
+
+#define SKIP_BLANKS(p) 							\
+    while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) ||		\
+           (*(p) == 0x3000)) (p)++;
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_BASECHAR(c)							\
+    ((((c) >= 0x41) && ((c) <= 0x5a)) ||				\		
+     (((c) >= 0x61) && ((c) <= 0x7a)) ||				\
+     (((c) >= 0xaa) && ((c) <= 0x5b)) ||				\
+     (((c) >= 0xc0) && ((c) <= 0xd6)) ||				\
+     (((c) >= 0xd8) && ((c) <= 0xf6)) ||				\
+     (((c) >= 0xf8) && ((c) <= 0xff)) ||				\
+      ((c) == 0xba))
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
+
+/* I'm too lazy to complete this one TODO !!!! */
+#define IS_COMBINING(c) 0
+
+#define IS_IGNORABLE(c)							\
+    ((((c) >= 0x200c) && ((c) <= 0x200f)) ||				\
+     (((c) >= 0x202a) && ((c) <= 0x202e)) ||				\
+     (((c) >= 0x206a) && ((c) <= 0x206f)) ||				\
+      ((c) == 0xfeff))
+
+#define IS_EXTENDER(c)							\
+    (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||		\
+     ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||		\
+     ((c) == 0xec6) || ((c) == 0x3005)					\
+     (((c) >= 0x3031) && ((c) <= 0x3035)) ||				\
+     (((c) >= 0x309b) && ((c) <= 0x309e)) ||				\
+     (((c) >= 0x30fc) && ((c) <= 0x30fe)) ||				\
+     (((c) >= 0xff70) && ((c) <= 0xff9e)) ||				\
+      ((c) == 0xff9f))
+
+#define IS_IDEOGRAPHIC(c)						\
+    ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||				\
+     (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||				\
+     (((c) >= 0x3021) && ((c) <= 0x3029)) ||				\
+      ((c) == 0x3007))
+
+#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
+
+/* I'm too lazy to complete this one ! */
+#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
+#else
+/*
+ * 8bits / ASCII version of the macros.
+ */
+#define IS_CHAR(c)							\
+    (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20))
+
+#define IS_BASECHAR(c)							\
+    ((((c) >= 0x41) && ((c) <= 0x5a)) ||				\
+     (((c) >= 0x61) && ((c) <= 0x7a)) ||				\
+     (((c) >= 0xaa) && ((c) <= 0x5b)) ||				\
+     (((c) >= 0xc0) && ((c) <= 0xd6)) ||				\
+     (((c) >= 0xd8) && ((c) <= 0xf6)) ||				\
+     (((c) >= 0xf8) && ((c) <= 0xff)) ||				\
+      ((c) == 0xba))
+
+#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
+
+#define IS_LETTER(c) IS_BASECHAR(c)
+
+#define IS_COMBINING(c) 0
+
+#define IS_IGNORABLE(c) 0
+
+#define IS_EXTENDER(c) ((c) == 0xb7)
+
+#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
+#endif
+
+
+#define SKIP_EOL(p) 							\
+    if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; }			\
+    if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
+
+#define SKIP_BLANKS(p) 							\
+    while (IS_BLANK(*(p))) (p)++;
+
+#define MOVETO_ENDTAG(p)						\
+    while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
+
+#define MOVETO_STARTTAG(p)						\
+    while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
+
+/*
+ * Forward definition for recusive behaviour.
+ */
+xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
+
+/*
+ * xmlHandleData : this routine represent's the specific application
+ *    behaviour when reading a piece of text.
+ *
+ * For example in WebDav, any piece made only of blanks is eliminated
+ */
+
+CHAR *xmlHandleData(CHAR *in) {
+    CHAR *cur;
+
+    if (in == NULL) return(NULL);
+    cur = in;
+    while (IS_CHAR(*cur)) {
+        if (!IS_BLANK(*cur)) goto not_blank;
+	cur++;
+    }
+    free(in);
+    return(NULL);
+
+not_blank:
+    return(in);
+}
+
+/*
+ * xmlStrndup : a strdup for array of CHAR's
+ */
+
+CHAR *xmlStrndup(const CHAR *cur, int len) {
+    CHAR *ret = malloc((len + 1) * sizeof(CHAR));
+
+    if (ret == NULL) {
+        fprintf(stderr, "malloc of %d byte failed\n",
+	        (len + 1) * sizeof(CHAR));
+        return(NULL);
+    }
+    memcpy(ret, cur, len * sizeof(CHAR));
+    ret[len] = 0;
+    return(ret);
+}
+
+/*
+ * xmlStrdup : a strdup for CHAR's
+ */
+
+CHAR *xmlStrdup(const CHAR *cur) {
+    const CHAR *p = cur;
+
+    while (IS_CHAR(*p)) p++;
+    return(xmlStrndup(cur, p - cur));
+}
+
+/*
+ * xmlStrcmp : a strcmp for CHAR's
+ */
+
+int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
+    register int tmp;
+
+    do {
+        tmp = *str1++ - *str2++;
+	if (tmp != 0) return(tmp);
+    } while ((*str1 != 0) && (*str2 != 0));
+    return (*str1 - *str2);
+}
+
+/*
+ * xmlStrncmp : a strncmp for CHAR's
+ */
+
+int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
+    register int tmp;
+
+    if (len <= 0) return(0);
+    do {
+        tmp = *str1++ - *str2++;
+	if (tmp != 0) return(tmp);
+	len--;
+        if (len <= 0) return(0);
+    } while ((*str1 != 0) && (*str2 != 0));
+    return (*str1 - *str2);
+}
+
+/*
+ * xmlStrchr : a strchr for CHAR's
+ */
+
+CHAR *xmlStrchr(const CHAR *str, CHAR val) {
+    while (*str != 0) {
+        if (*str == val) return((CHAR *) str);
+	str++;
+    }
+    return(NULL);
+}
+
+/*
+ * xmlParseName : parse an XML name.
+ */
+
+CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
+    const CHAR *q;
+    CHAR *ret = NULL;
+
+    /*
+     * Name ::= (Letter | '_') (NameChar)*
+     */
+    if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
+    q = ctxt->cur++;
+    while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+           (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || (ctxt->cur[0] == '_') ||
+	   (ctxt->cur[0] == ':') || 
+	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	   (IS_EXTENDER(ctxt->cur[0])))
+	ctxt->cur++;
+    
+    ret = xmlStrndup(q, ctxt->cur - q);
+
+    return(ret);
+}
+
+/*
+ * Parse and return a string between quotes or doublequotes
+ */
+CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
+    CHAR *ret = NULL;
+    const CHAR *q;
+
+    if (ctxt->cur[0] == '"') {
+        ctxt->cur++;
+	q = ctxt->cur;
+	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
+	if (ctxt->cur[0] != '"')
+	    fprintf(stderr, "String not closed \"%.50s\n", q);
+        else {
+            ret = xmlStrndup(q, ctxt->cur - q);
+	    ctxt->cur++;
+	}
+    } else if (ctxt->cur[0] == '\''){
+        ctxt->cur++;
+	q = ctxt->cur;
+	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
+	if (ctxt->cur[0] != '\'')
+	    fprintf(stderr, "String not closed '%.50s\n", q);
+        else {
+            ret = xmlStrndup(q, ctxt->cur - q);
+	    ctxt->cur++;
+	}
+    }
+    return(ret);
+}
+
+/*
+ * Skip an XML (SGML) comment <!-- .... -->
+ *
+ * TODO !!!! Save the comment in the tree !!!
+ */
+void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
+    const CHAR *q, *start;
+    const CHAR *r;
+
+    /*
+     * An extra check may avoid errors and isn't that costly !
+     */
+    if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
+        (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;
+
+    ctxt->cur += 4;
+    start = q = ctxt->cur;
+    ctxt->cur++;
+    r = ctxt->cur;
+    ctxt->cur++;
+    while (IS_CHAR(ctxt->cur[0]) &&
+           ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
+	    (*r != '-') || (*q != '-'))) {
+        ctxt->cur++;r++;q++;
+    }
+    if (!IS_CHAR(ctxt->cur[0])) {
+        fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
+	ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
+    } else {
+        ctxt->cur++;
+    }
+}
+
+/*
+ * xmlParseNamespace: parse specific '<?namespace ...' constructs.
+ */
+
+void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
+    CHAR *href = NULL;
+    CHAR *AS = NULL;
+    int garbage = 0;
+
+    /*
+     * We just skipped "namespace" or "xml:namespace"
+     */
+    SKIP_BLANKS(ctxt->cur);
+
+    while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
+	/*
+	 * We can have "ns" or "prefix" attributes
+	 * Old encoding as 'href' or 'AS' attributes is still supported
+	 */
+	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
+	    garbage = 0;
+	    ctxt->cur += 2;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    if (ctxt->cur[0] != '=') continue;
+	    ctxt->cur++;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    href = xmlParseQuotedString(ctxt);
+	    SKIP_BLANKS(ctxt->cur);
+	} else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
+	    (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
+	    garbage = 0;
+	    ctxt->cur += 4;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    if (ctxt->cur[0] != '=') continue;
+	    ctxt->cur++;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    href = xmlParseQuotedString(ctxt);
+	    SKIP_BLANKS(ctxt->cur);
+	} else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
+	           (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
+	           (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
+	    garbage = 0;
+	    ctxt->cur += 6;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    if (ctxt->cur[0] != '=') continue;
+	    ctxt->cur++;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    AS = xmlParseQuotedString(ctxt);
+	    SKIP_BLANKS(ctxt->cur);
+	} else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
+	    garbage = 0;
+	    ctxt->cur += 2;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    if (ctxt->cur[0] != '=') continue;
+	    ctxt->cur++;
+	    SKIP_BLANKS(ctxt->cur);
+
+	    AS = xmlParseQuotedString(ctxt);
+	    SKIP_BLANKS(ctxt->cur);
+	} else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
+	    garbage = 0;
+	    ctxt->cur ++;
+	} else {
+            /*
+	     * Found garbage when parsing the namespace
+	     */
+	    if (!garbage) fprintf(stderr,
+	          "\nxmlParseNamespace found garbage: ");
+            fprintf(stderr, "%c", ctxt->cur[0]);
+            ctxt->cur++;
+        }
+    }
+
+    MOVETO_ENDTAG(ctxt->cur);
+    ctxt->cur++;
+
+    /*
+     * Register the DTD.
+     */
+    if (href != NULL)
+        xmlNewDtd(ctxt->doc, href, AS);
+
+    if (AS != NULL) free(AS);
+    if (href != NULL) free(href);
+}
+
+/*
+ * xmlParsePI: parse an XML Processing Instruction.
+ */
+
+void xmlParsePI(xmlParserCtxtPtr ctxt) {
+    if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+	/*
+	 * this is a Processing Instruction.
+	 */
+	ctxt->cur += 2;
+
+	/*
+	 * Special for WebDav, support for the Processing Instruction
+	 * '<?namespace ...' contruct in the header of the XML document.
+	 */
+	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'a') &&
+	    (ctxt->cur[2] == 'm') && (ctxt->cur[3] == 'e') &&
+	    (ctxt->cur[4] == 's') && (ctxt->cur[5] == 'p') &&
+	    (ctxt->cur[6] == 'a') && (ctxt->cur[7] == 'c') &&
+	    (ctxt->cur[8] == 'e')) {
+	    ctxt->cur += 9;
+	    xmlParseNamespace(ctxt);
+	} else if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
+	           (ctxt->cur[2] == 'l') && (ctxt->cur[3] == ':') &&
+	           (ctxt->cur[4] == 'n') && (ctxt->cur[5] == 'a') &&
+	           (ctxt->cur[6] == 'm') && (ctxt->cur[7] == 'e') &&
+	           (ctxt->cur[8] == 's') && (ctxt->cur[9] == 'p') &&
+	           (ctxt->cur[10] == 'a') && (ctxt->cur[11] == 'c') &&
+	           (ctxt->cur[12] == 'e')) {
+	    ctxt->cur += 13;
+	    xmlParseNamespace(ctxt);
+	} else {
+	    /* Unknown PI, ignore it ! */
+	    fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n",
+	            ctxt->cur);
+	    MOVETO_ENDTAG(ctxt->cur);
+	    ctxt->cur++;
+	}
+    }
+}
+
+/*
+ * xmlParseAttribute: parse a start of tag.
+ *
+ * Attribute ::= Name Eq AttValue
+ */
+
+void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
+    const CHAR *q;
+    CHAR *name, *value = NULL;
+
+    if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+        return;
+    }
+    q = ctxt->cur++;
+    while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+           (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+	   (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || 
+	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	   (IS_EXTENDER(ctxt->cur[0])))
+	ctxt->cur++;
+    name = xmlStrndup(q, ctxt->cur - q);
+
+    /*
+     * We should have the equal, we are laxist here and allow attributes
+     * without values and extra spaces.
+     */
+    SKIP_BLANKS(ctxt->cur);
+    if (ctxt->cur[0] == '=') {
+        ctxt->cur++;
+	SKIP_BLANKS(ctxt->cur);
+	if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
+	    fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
+	            q);
+	} else
+	    value = xmlParseQuotedString(ctxt);
+    }
+
+    /*
+     * Add the attribute to the node.
+     */
+    if (name != NULL) {
+	xmlNewProp(node, name, value);
+        free(name);
+    }
+    if ( value != NULL )
+      free(value);
+}
+
+/*
+ * xmlParseStartTag: parse a start of tag.
+ */
+
+xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
+    const CHAR *q;
+    CHAR *ns, *name;
+    xmlDtdPtr dtd = NULL;
+    xmlNodePtr ret = NULL;
+
+    /*
+     * Theorically one should just parse a Name, but with the addition
+     * of the namespace needed for WebDav, it's a bit more complicated
+     * since the element name may be prefixed by a namespace prefix.
+     *
+     * QName ::= (NSPart ':')? LocalPart
+     * NSPart ::= Name
+     * LocalPart ::= Name
+     * STag ::= '<' QName (S Attribute)* S? '>'
+     *
+     * instead of :
+     *
+     * STag ::= '<' QName (S Attribute)* S? '>'
+     */
+    if (ctxt->cur[0] != '<') return(NULL);
+    ctxt->cur++;
+
+    if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
+    q = ctxt->cur++;
+    while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+           (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+	   (ctxt->cur[0] == '_') ||
+	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	   (IS_EXTENDER(ctxt->cur[0])))
+	ctxt->cur++;
+
+    if (ctxt->cur[0] == ':') {
+        ns = xmlStrndup(q, ctxt->cur - q);
+        
+	ctxt->cur++; /* skip the column */
+	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+	    fprintf(stderr,
+	       "Start tag : no element name after namespace identifier %.20s\n",
+	            q);
+            free(ns);
+	    return(NULL);
+	}
+	q = ctxt->cur++;
+	while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+	       (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+	       (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || 
+	       (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	       (IS_EXTENDER(ctxt->cur[0])))
+	    ctxt->cur++;
+        name = xmlStrndup(q, ctxt->cur - q);
+
+	/*
+	 * Search the DTD associated to ns.
+	 */
+	dtd = xmlSearchDtd(ctxt->doc, ns);
+	if (dtd == NULL)
+	    fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
+	free(ns);
+    } else
+        name = xmlStrndup(q, ctxt->cur - q);
+
+    ret = xmlNewNode(dtd, name, NULL);
+
+    /*
+     * Now parse the attributes, it ends up with the ending
+     *
+     * (S Attribute)* S?
+     */
+    SKIP_BLANKS(ctxt->cur);
+    while ((IS_CHAR(ctxt->cur[0])) &&
+           (ctxt->cur[0] != '>') && 
+	   ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
+	if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_'))
+	    xmlParseAttribute(ctxt, ret);
+	else {
+	    /* We should warn TODO !!! */
+	    ctxt->cur++;
+	}
+	SKIP_BLANKS(ctxt->cur);
+    }
+
+    return(ret);
+}
+
+/*
+ * xmlParseEndTag: parse an end of tag, note that the '</' part has
+ * already been read.
+ */
+
+void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
+    const CHAR *q;
+    CHAR *ns, *name;
+    xmlDtdPtr dtd = NULL;
+
+    *dtdPtr = NULL;
+    *tagPtr = NULL;
+
+    /*
+     * Theorically one should just parse a Name, but with the addition
+     * of the namespace needed for WebDav, it's a bit more complicated
+     * since the element name may be prefixed by a namespace prefix.
+     *
+     * QName ::= (NSPart ':')? LocalPart
+     * NSPart ::= Name
+     * LocalPart ::= Name
+     * ETag ::= '</' QName S? '>'
+     *
+     * instead of :
+     *
+     * ETag ::= '</' Name S? '>'
+     */
+    if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
+    q = ctxt->cur++;
+    while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+           (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+	   (ctxt->cur[0] == '_') ||
+	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	   (IS_EXTENDER(ctxt->cur[0])))
+	ctxt->cur++;
+
+    if (ctxt->cur[0] == ':') {
+        ns = xmlStrndup(q, ctxt->cur - q);
+        
+	ctxt->cur++; /* skip the column */
+	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
+	    fprintf(stderr,
+	        "End tag : no element name after namespace identifier %.20s\n",
+	            q);
+            free(ns);
+	    return;
+	}
+	q = ctxt->cur++;
+	while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
+	       (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
+	       (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') || 
+	       (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
+	       (IS_EXTENDER(ctxt->cur[0])))
+	    ctxt->cur++;
+        name = xmlStrndup(q, ctxt->cur - q);
+
+	/*
+	 * Search the DTD associated to ns.
+	 */
+	dtd = xmlSearchDtd(ctxt->doc, ns);
+	if (dtd == NULL)
+	    fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
+	free(ns);
+    } else
+        name = xmlStrndup(q, ctxt->cur - q);
+
+    *dtdPtr = dtd;
+    *tagPtr = name;
+
+    /*
+     * We should definitely be at the ending "S? '>'" part
+     */
+    SKIP_BLANKS(ctxt->cur);
+    if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
+        fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
+	/*
+	 * Note : skipping to the next '>' is probably otherkill,
+	 * especially in case the '>' is hust missing.
+	 *
+	 * Otherwise add:
+	 *  MOVETO_ENDTAG(ctxt->cur);
+	 */
+    } else
+	ctxt->cur++;
+
+    return;
+}
+
+/*
+ * xmlParseCDSect: escaped pure raw content.
+ */
+CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
+    const CHAR *r, *s, *base;
+    CHAR *ret;
+
+    base = ctxt->cur;
+    if (!IS_CHAR(ctxt->cur[0])) {
+        fprintf(stderr, "CData section not finished : %.20s\n", base);
+        return(NULL);
+    }
+    r = ctxt->cur++;
+    if (!IS_CHAR(ctxt->cur[0])) {
+        fprintf(stderr, "CData section not finished : %.20s\n", base);
+        return(NULL);
+    }
+    s = ctxt->cur++;
+    while (IS_CHAR(ctxt->cur[0]) &&
+           ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
+        r++;s++;ctxt->cur++;
+    }
+    if (!IS_CHAR(ctxt->cur[0])) {
+        fprintf(stderr, "CData section not finished : %.20s\n", base);
+        return(NULL);
+    }
+    ret = xmlStrndup(base, ctxt->cur-base);
+
+    return(ret);
+}
+
+/*
+ * xmlParseContent: a content is
+ * (element | PCData | Reference | CDSect | PI | Comment)
+ *
+ * element : starts by '<'
+ * PCData : any CHAR but '&' or '<'
+ * Reference : starts by '&'
+ * CDSect : starts by '<![CDATA['
+ * PI : starts by '<?'
+ */
+
+xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
+    const CHAR *q;
+    CHAR *data = NULL;
+    xmlNodePtr ret = NULL;
+
+    /*
+     * First case : a Processing Instruction.
+     */
+    if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+	xmlParsePI(ctxt);
+    }
+    /*
+     * Second case : a CDSection
+     */
+    if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
+        (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
+	(ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
+	(ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
+	(ctxt->cur[8] == '[')) {
+	ctxt->cur += 9;
+	data = xmlParseCDSect(ctxt);
+    }
+    /*
+     * Third case :  a sub-element.
+     */
+    else if (ctxt->cur[0] == '<') {
+        ret = xmlParseElement(ctxt);
+    }
+    /*
+     * Last case, text. Note that References are handled directly.
+     */
+    else {
+        q = ctxt->cur;
+	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
+
+	if (!IS_CHAR(ctxt->cur[0])) {
+	    fprintf(stderr, "Truncated content : %.50s\n", q);
+	    return(NULL);
+	}
+
+	/*
+	 * Do the Entities decoding...
+	 */
+	data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
+    }
+
+    /*
+     * Handle the data if any. If there is no child
+     * add it as content, otherwise create a new node of type text.
+     */
+    if (data != NULL)
+	data = xmlHandleData(data);
+    if (data != NULL) {
+	if (node->childs == NULL)
+	    xmlNodeSetContent(node, data); 
+	else 
+	    ret = xmlNewText(data);
+        free(data);
+    }
+
+    return(ret);
+}
+
+/*
+ * xmlParseElement: parse an XML element
+ */
+
+xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
+    xmlNodePtr ret, child;
+    const CHAR *openTag = ctxt->cur;
+    const CHAR *closeTag = ctxt->cur;
+
+    ret = xmlParseStartTag(ctxt);
+    if (ret == NULL) {
+        return(NULL);
+    }
+
+    /*
+     * Check for an Empty Element.
+     */
+    if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
+        ctxt->cur += 2;
+	return(ret);
+    }
+    if (ctxt->cur[0] == '>') ctxt->cur++;
+    else {
+        fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
+	return(NULL);
+    }
+
+    /*
+     * Parse the content of the element:
+     * (element | PCData | Reference | CDSect | PI | Comment) *
+     *
+     * element : starts by '<'
+     * PCData : any CHAR but '&' or '<'
+     * Reference : starts by '&'
+     * CDSect : starts by '<![CDATA['
+     * PI : starts by '<?'
+     *
+     * The loop stops upon detection of an end of tag '</'
+     */
+    while ((IS_CHAR(ctxt->cur[0])) &&
+           ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/'))) {
+        child = xmlParseContent(ctxt, ret);
+	if (child != NULL)
+	    xmlAddChild(ret, child);
+    }
+    if (!IS_CHAR(ctxt->cur[0])) {
+        fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
+	return(NULL);
+    }
+
+    /*
+     * parse the end of tag : '</' has been detected.
+     */
+    ctxt->cur += 2;
+    if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */
+    else {
+        CHAR *endTag;
+	xmlDtdPtr endDtd;
+
+	xmlParseEndTag(ctxt, &endDtd, &endTag);
+
+        /*
+	 * Check that the Name in the ETag is the same as in the STag.
+	 */
+	if (endDtd != ret->dtd) {
+	    fprintf(stderr, "Start and End tags don't use the same DTD:\n");
+	    fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
+	}
+	if (strcmp(ret->name, endTag)) {
+	    fprintf(stderr, "Start and End tags don't use the same name:\n");
+	    fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
+	}
+
+        if ( endTag != NULL )
+          free(endTag);
+    }
+
+    return(ret);
+}
+
+/*
+ * xmlParseXMLDecl: parse an XML declaration header
+ */
+
+void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
+    CHAR *version;
+
+    /*
+     * We know that '<?xml' is here.
+     */
+    ctxt->cur += 5;
+
+    /*
+     * Parse the version info
+     */
+    SKIP_BLANKS(ctxt->cur);
+
+    /*
+     * We should have 'version=' here !
+     */
+    if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
+        (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
+	(ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
+	(ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
+	ctxt->cur += 8;
+	version = xmlParseQuotedString(ctxt);
+	if (version == NULL)
+	    ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+	else {
+	    ctxt->doc = xmlNewDoc(version);
+	    free(version);
+	}
+    } else {
+        ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+    }
+
+    /*
+     * We should check for Required Markup Declaration TODO !!!!
+     */
+    MOVETO_ENDTAG(ctxt->cur);
+    ctxt->cur++;
+
+}
+
+/*
+ * xmlParseMisc: parse an XML Misc optionnal field.
+ * (Comment | PI | S)*
+ */
+
+void xmlParseMisc(xmlParserCtxtPtr ctxt) {
+    while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
+           ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
+	    (ctxt->cur[2] == '-') && (ctxt->cur[2] == '-')) ||
+           IS_BLANK(ctxt->cur[0])) {
+        if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
+	    xmlParsePI(ctxt);
+	} else if (IS_BLANK(ctxt->cur[0])) {
+	    ctxt->cur++;
+	} else
+	    xmlParserSkipComment(ctxt);
+    }
+}
+
+/*
+ * xmlParseDocument : parse an XML document and build a tree.
+ */
+
+int xmlParseDocument(xmlParserCtxtPtr ctxt) {
+    /*
+     * We should check for encoding here and plug-in some
+     * conversion code TODO !!!!
+     */
+
+    /*
+     * Wipe out everything which is before the first '<'
+     */
+    SKIP_BLANKS(ctxt->cur);
+
+    /*
+     * Check for the XMLDecl in the Prolog.
+     */
+    if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
+        (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
+	(ctxt->cur[4] == 'l')) {
+	xmlParseXMLDecl(ctxt);
+	/* SKIP_EOL(cur); */
+	SKIP_BLANKS(ctxt->cur);
+    } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
+        (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
+	(ctxt->cur[4] == 'L')) {
+	/*
+	 * The first drafts were using <?XML and the final W3C REC
+	 * now use <?xml ...
+	 */
+	xmlParseXMLDecl(ctxt);
+	/* SKIP_EOL(cur); */
+	SKIP_BLANKS(ctxt->cur);
+    } else {
+        ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
+    }
+
+    /*
+     * The Misc part of the Prolog
+     * (Comment | PI | S) *
+     */
+    xmlParseMisc(ctxt);
+
+    /*
+     * Time to start parsing 
+     */
+    ctxt->doc->root = xmlParseElement(ctxt);
+
+    return(0);
+}
+
+/*
+ * xmlParseDoc : parse an XML in-memory document and build a tree.
+ */
+
+xmlDocPtr xmlParseDoc(CHAR *cur) {
+    xmlDocPtr ret;
+    xmlParserCtxtPtr ctxt;
+
+    if (cur == NULL) return(NULL);
+
+    ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+    if (ctxt == NULL) {
+        perror("malloc");
+	return(NULL);
+    }
+
+    xmlInitParserCtxt(ctxt);
+    ctxt->base = cur;
+    ctxt->cur = cur;
+
+    xmlParseDocument(ctxt);
+    ret = ctxt->doc;
+    free(ctxt->nodes);
+    free(ctxt);
+    
+    return(ret);
+}
+
+/*
+ * xmlParseFile : parse an XML file and build a tree.
+ */
+
+xmlDocPtr xmlParseFile(const char *filename) {
+    xmlDocPtr ret;
+#ifdef HAVE_ZLIB_H
+    gzFile input;
+#else
+    int input;
+#endif
+    int res;
+    struct stat buf;
+    char *buffer;
+    xmlParserCtxtPtr ctxt;
+
+    res = stat(filename, &buf);
+    if (res < 0) return(NULL);
+
+#ifdef HAVE_ZLIB_H
+retry_bigger:
+    buffer = malloc((buf.st_size * 20) + 100);
+#else
+    buffer = malloc(buf.st_size + 100);
+#endif
+    if (buffer == NULL) {
+	perror("malloc");
+        return(NULL);
+    }
+
+    memset(buffer, 0, sizeof(buffer));
+#ifdef HAVE_ZLIB_H
+    input = gzopen (filename, "r");
+    if (input == NULL) {
+        fprintf (stderr, "Cannot read file %s :\n", filename);
+	perror ("gzopen failed");
+	return(NULL);
+    }
+#else
+    input = open (filename, O_RDONLY);
+    if (input < 0) {
+        fprintf (stderr, "Cannot read file %s :\n", filename);
+	perror ("open failed");
+	return(NULL);
+    }
+#endif
+#ifdef HAVE_ZLIB_H
+    res = gzread(input, buffer, 20 * buf.st_size);
+#else
+    res = read(input, buffer, buf.st_size);
+#endif
+    if (res < 0) {
+        fprintf (stderr, "Cannot read file %s :\n", filename);
+#ifdef HAVE_ZLIB_H
+	perror ("gzread failed");
+#else
+	perror ("read failed");
+#endif
+	return(NULL);
+    }
+#ifdef HAVE_ZLIB_H
+    gzclose(input);
+    if (res >= 20 * buf.st_size) {
+        free(buffer);
+	buf.st_size *= 2;
+	goto retry_bigger;
+    }
+    buf.st_size = res;
+#else
+    close(input);
+#endif
+
+
+    ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+    if (ctxt == NULL) {
+        perror("malloc");
+	return(NULL);
+    }
+    buffer[buf.st_size] = '\0';
+
+    xmlInitParserCtxt(ctxt);
+    ctxt->filename = filename;
+    ctxt->base = buffer;
+    ctxt->cur = buffer;
+
+    xmlParseDocument(ctxt);
+    ret = ctxt->doc;
+    free(buffer);
+    free(ctxt->nodes);
+    free(ctxt);
+    
+    return(ret);
+}
+
+/*
+ * xmlParseFile : parse an XML memory block and build a tree.
+ */
+
+xmlDocPtr xmlParseMemory(char *buffer, int size) {
+    xmlDocPtr ret;
+    xmlParserCtxtPtr ctxt;
+
+    ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
+    if (ctxt == NULL) {
+        perror("malloc");
+	return(NULL);
+    }
+
+    buffer[size - 1] = '\0';
+
+    xmlInitParserCtxt(ctxt);
+    ctxt->base = buffer;
+    ctxt->cur = buffer;
+
+    xmlParseDocument(ctxt);
+    ret = ctxt->doc;
+    free(ctxt->nodes);
+    free(ctxt);
+    
+    return(ret);
+}
+
+
+
+
+/* Initialize parser context */
+void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
+{
+    int i;
+
+    ctxt->filename = NULL;
+    ctxt->base = NULL;
+    ctxt->cur = NULL;
+    ctxt->line = 1;
+    ctxt->col = 1;
+    ctxt->doc = NULL;
+    ctxt->depth = 0;
+    ctxt->max_depth = 10;
+    ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
+    if (ctxt->nodes == NULL) {
+	fprintf(stderr, "malloc of %d byte failed\n",
+		ctxt->max_depth * sizeof(xmlNodePtr));
+	ctxt->max_depth = 0;
+    } else {
+        for (i = 0;i < ctxt->max_depth;i++) 
+	    ctxt->nodes[i] = NULL;
+    }
+}
+
+
+/*
+ * Clear (release owned resources) and reinitialize context
+ */
+void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
+{
+    xmlInitParserCtxt(ctx);
+}
+
+
+/*
+ * Setup the parser context to parse a new buffer; Clears any prior
+ * contents from the parser context. The buffer parameter must not be
+ * NULL, but the filename parameter can be
+ */
+void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
+                             const char* filename)
+{
+  xmlClearParserCtxt(ctxt);
+  ctxt->base = buffer;
+  ctxt->cur = buffer;
+  ctxt->filename = filename;
+}
+
+
+
+void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
+{
+  fputs(msg, stderr);
+}