added the same htmlRead APIs than their XML counterparts new parser

* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h: added the same htmlRead APIs than their XML counterparts * include/libxml/parser.h: new parser options, not yet implemented, added an options field to the context. * tree.c: patch from Shaun McCance to fix bug #123238 when ]]> is found within a cdata section. * result/noent/cdata2 result/cdata2 result/cdata2.rdr result/cdata2.sax test/cdata2: add one more cdata test Daniel
author: Daniel Veillard <veillard@src.gnome.org> 2003-09-26 12:47:50 +0000
committer: Daniel Veillard <veillard@src.gnome.org> 2003-09-26 12:47:50 +0000
commit: 9475a352bdd1f15b1e0c53472a74938f9d5dc04e (patch)
tree: a29cfb017b1ed70d637fed469b6563ed06dc941c
parent: 60942def6a83d48469d1d9fb41564b8450a939b4 (diff)
download: libxml2-9475a352bdd1f15b1e0c53472a74938f9d5dc04e.tar.gz
12 files changed, 732 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 4caf31ec..a2233424 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Fri Sep 26 14:41:53 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+	* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h:
+	  added the same htmlRead APIs than their XML counterparts
+	* include/libxml/parser.h: new parser options, not yet implemented,
+	  added an options field to the context.
+	* tree.c: patch from Shaun McCance to fix bug #123238 when ]]>
+	  is found within a cdata section.
+	* result/noent/cdata2 result/cdata2 result/cdata2.rdr
+	  result/cdata2.sax test/cdata2: add one more cdata test
+	  
 Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard <daniel@veillard.com>
 
 	* parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h:
diff --git a/HTMLparser.c b/HTMLparser.c
index d2cba584..caed896b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5541,4 +5541,529 @@ htmlNodeStatus(const htmlNodePtr node, int legacy) {
     default: return HTML_NA ;
   }
 }
+/************************************************************************
+ *									*
+ *	New set (2.6.0) of simpler and more flexible APIs		*
+ *									*
+ ************************************************************************/
+/**
+ * DICT_FREE:
+ * @str:  a string
+ *
+ * Free a string if it is not owned by the "dict" dictionnary in the
+ * current scope
+ */
+#define DICT_FREE(str)						\
+	if ((str) && ((!dict) || 				\
+	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
+	    xmlFree((char *)(str));
+
+/**
+ * htmlCtxtReset:
+ * @ctxt: an XML parser context
+ *
+ * Reset a parser context
+ */
+void
+htmlCtxtReset(htmlParserCtxtPtr ctxt)
+{
+    xmlParserInputPtr input;
+    xmlDictPtr dict = ctxt->dict;
+
+    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
+        xmlFreeInputStream(input);
+    }
+    ctxt->inputNr = 0;
+    ctxt->input = NULL;
+
+    ctxt->spaceNr = 0;
+    ctxt->spaceTab[0] = -1;
+    ctxt->space = &ctxt->spaceTab[0];
+
+
+    ctxt->nodeNr = 0;
+    ctxt->node = NULL;
+
+    ctxt->nameNr = 0;
+    ctxt->name = NULL;
+
+    DICT_FREE(ctxt->version);
+    ctxt->version = NULL;
+    DICT_FREE(ctxt->encoding);
+    ctxt->encoding = NULL;
+    DICT_FREE(ctxt->directory);
+    ctxt->directory = NULL;
+    DICT_FREE(ctxt->extSubURI);
+    ctxt->extSubURI = NULL;
+    DICT_FREE(ctxt->extSubSystem);
+    ctxt->extSubSystem = NULL;
+    if (ctxt->myDoc != NULL)
+        xmlFreeDoc(ctxt->myDoc);
+    ctxt->myDoc = NULL;
+
+    ctxt->standalone = -1;
+    ctxt->hasExternalSubset = 0;
+    ctxt->hasPErefs = 0;
+    ctxt->html = 1;
+    ctxt->external = 0;
+    ctxt->instate = XML_PARSER_START;
+    ctxt->token = 0;
+
+    ctxt->wellFormed = 1;
+    ctxt->nsWellFormed = 1;
+    ctxt->valid = 1;
+    ctxt->vctxt.userData = ctxt;
+    ctxt->vctxt.error = xmlParserValidityError;
+    ctxt->vctxt.warning = xmlParserValidityWarning;
+    ctxt->record_info = 0;
+    ctxt->nbChars = 0;
+    ctxt->checkIndex = 0;
+    ctxt->inSubset = 0;
+    ctxt->errNo = XML_ERR_OK;
+    ctxt->depth = 0;
+    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    ctxt->catalogs = NULL;
+    xmlInitNodeInfoSeq(&ctxt->node_seq);
+
+    if (ctxt->attsDefault != NULL) {
+        xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
+        ctxt->attsDefault = NULL;
+    }
+    if (ctxt->attsSpecial != NULL) {
+        xmlHashFree(ctxt->attsSpecial, NULL);
+        ctxt->attsSpecial = NULL;
+    }
+}
+
+/**
+ * htmlCtxtUseOptions:
+ * @ctxt: an HTML parser context
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * Applies the options to the parser context
+ *
+ * Returns 0 in case of success, the set of unknown or unimplemented options
+ *         in case of error.
+ */
+int
+htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
+{
+    if (options & HTML_PARSE_NOWARNING) {
+        ctxt->sax->warning = NULL;
+        options -= XML_PARSE_NOWARNING;
+    }
+    if (options & HTML_PARSE_NOERROR) {
+        ctxt->sax->error = NULL;
+        ctxt->sax->fatalError = NULL;
+        options -= XML_PARSE_NOERROR;
+    }
+    if (options & HTML_PARSE_PEDANTIC) {
+        ctxt->pedantic = 1;
+        options -= XML_PARSE_PEDANTIC;
+    } else
+        ctxt->pedantic = 0;
+    if (options & XML_PARSE_NOBLANKS) {
+        ctxt->keepBlanks = 0;
+        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
+        options -= XML_PARSE_NOBLANKS;
+    } else
+        ctxt->keepBlanks = 1;
+    ctxt->dictNames = 0;
+    return (options);
+}
+
+/**
+ * htmlDoRead:
+ * @ctxt:  an HTML parser context
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ * @reuse:  keep the context for reuse
+ *
+ * Common front-end for the htmlRead functions
+ * 
+ * Returns the resulting document tree or NULL
+ */
+static htmlDocPtr
+htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
+          int options, int reuse)
+{
+    htmlDocPtr ret;
+    
+    htmlCtxtUseOptions(ctxt, options);
+    ctxt->html = 1;
+    if (encoding != NULL) {
+        xmlCharEncodingHandlerPtr hdlr;
+
+	hdlr = xmlFindCharEncodingHandler(encoding);
+	if (hdlr != NULL)
+	    xmlSwitchToEncoding(ctxt, hdlr);
+    }
+    if ((URL != NULL) && (ctxt->input != NULL) &&
+        (ctxt->input->filename == NULL))
+        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
+    htmlParseDocument(ctxt);
+    ret = ctxt->myDoc;
+    ctxt->myDoc = NULL;
+    if (!reuse) {
+        if ((ctxt->dictNames) &&
+	    (ret != NULL) &&
+	    (ret->dict == ctxt->dict))
+	    ctxt->dict = NULL;
+	xmlFreeParserCtxt(ctxt);
+    } else {
+        /* Must duplicate the reference to the dictionary */
+        if ((ctxt->dictNames) &&
+	    (ret != NULL) &&
+	    (ret->dict == ctxt->dict))
+	    xmlDictReference(ctxt->dict);
+    }
+    return (ret);
+}
+
+/**
+ * htmlReadDoc:
+ * @cur:  a pointer to a zero terminated string
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
+{
+    htmlParserCtxtPtr ctxt;
+
+    if (cur == NULL)
+        return (NULL);
+
+    ctxt = xmlCreateDocParserCtxt(cur);
+    if (ctxt == NULL)
+        return (NULL);
+    return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadFile:
+ * @filename:  a file or URL
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML file from the filesystem or the network.
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadFile(const char *filename, const char *encoding, int options)
+{
+    htmlParserCtxtPtr ctxt;
+
+    ctxt = htmlCreateFileParserCtxt(filename, encoding);
+    if (ctxt == NULL)
+        return (NULL);
+    return (htmlDoRead(ctxt, NULL, NULL, options, 0));
+}
+
+/**
+ * htmlReadMemory:
+ * @buffer:  a pointer to a char array
+ * @size:  the size of the array
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
+{
+    htmlParserCtxtPtr ctxt;
+
+    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
+    if (ctxt == NULL)
+        return (NULL);
+    return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadFd:
+ * @fd:  an open file descriptor
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML from a file descriptor and build a tree.
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadFd(int fd, const char *URL, const char *encoding, int options)
+{
+    htmlParserCtxtPtr ctxt;
+    xmlParserInputBufferPtr input;
+    xmlParserInputPtr stream;
+
+    if (fd < 0)
+        return (NULL);
+
+    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
+    if (input == NULL)
+        return (NULL);
+    ctxt = xmlNewParserCtxt();
+    if (ctxt == NULL) {
+        xmlFreeParserInputBuffer(input);
+        return (NULL);
+    }
+    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+    if (stream == NULL) {
+        xmlFreeParserInputBuffer(input);
+	xmlFreeParserCtxt(ctxt);
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadIO:
+ * @ioread:  an I/O read function
+ * @ioclose:  an I/O close function
+ * @ioctx:  an I/O handler
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an HTML document from I/O functions and source and build a tree.
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
+          void *ioctx, const char *URL, const char *encoding, int options)
+{
+    htmlParserCtxtPtr ctxt;
+    xmlParserInputBufferPtr input;
+    xmlParserInputPtr stream;
+
+    if (ioread == NULL)
+        return (NULL);
+
+    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
+                                         XML_CHAR_ENCODING_NONE);
+    if (input == NULL)
+        return (NULL);
+    ctxt = xmlNewParserCtxt();
+    if (ctxt == NULL) {
+        xmlFreeParserInputBuffer(input);
+        return (NULL);
+    }
+    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+    if (stream == NULL) {
+        xmlFreeParserInputBuffer(input);
+	xmlFreeParserCtxt(ctxt);
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlCtxtReadDoc:
+ * @ctxt:  an HTML parser context
+ * @cur:  a pointer to a zero terminated string
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * This reuses the existing @ctxt parser context
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
+               const char *URL, const char *encoding, int options)
+{
+    xmlParserInputPtr stream;
+
+    if (cur == NULL)
+        return (NULL);
+    if (ctxt == NULL)
+        return (NULL);
+
+    htmlCtxtReset(ctxt);
+
+    stream = xmlNewStringInputStream(ctxt, cur);
+    if (stream == NULL) {
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadFile:
+ * @ctxt:  an HTML parser context
+ * @filename:  a file or URL
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML file from the filesystem or the network.
+ * This reuses the existing @ctxt parser context
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
+                const char *encoding, int options)
+{
+    xmlParserInputPtr stream;
+
+    if (filename == NULL)
+        return (NULL);
+    if (ctxt == NULL)
+        return (NULL);
+
+    htmlCtxtReset(ctxt);
+
+    stream = xmlNewInputFromFile(ctxt, filename);
+    if (stream == NULL) {
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, NULL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadMemory:
+ * @ctxt:  an HTML parser context
+ * @buffer:  a pointer to a char array
+ * @size:  the size of the array
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * This reuses the existing @ctxt parser context
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
+                  const char *URL, const char *encoding, int options)
+{
+    xmlParserInputBufferPtr input;
+    xmlParserInputPtr stream;
+
+    if (ctxt == NULL)
+        return (NULL);
+    if (buffer == NULL)
+        return (NULL);
+
+    htmlCtxtReset(ctxt);
+
+    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
+    if (input == NULL) {
+	return(NULL);
+    }
+
+    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+    if (stream == NULL) {
+	xmlFreeParserInputBuffer(input);
+	return(NULL);
+    }
+
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadFd:
+ * @ctxt:  an HTML parser context
+ * @fd:  an open file descriptor
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an XML from a file descriptor and build a tree.
+ * This reuses the existing @ctxt parser context
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
+              const char *URL, const char *encoding, int options)
+{
+    xmlParserInputBufferPtr input;
+    xmlParserInputPtr stream;
+
+    if (fd < 0)
+        return (NULL);
+    if (ctxt == NULL)
+        return (NULL);
+
+    htmlCtxtReset(ctxt);
+
+
+    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
+    if (input == NULL)
+        return (NULL);
+    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+    if (stream == NULL) {
+        xmlFreeParserInputBuffer(input);
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadIO:
+ * @ctxt:  an HTML parser context
+ * @ioread:  an I/O read function
+ * @ioclose:  an I/O close function
+ * @ioctx:  an I/O handler
+ * @URL:  the base URL to use for the document
+ * @encoding:  the document encoding, or NULL
+ * @options:  a combination of htmlParserOption(s)
+ *
+ * parse an HTML document from I/O functions and source and build a tree.
+ * This reuses the existing @ctxt parser context
+ * 
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
+              xmlInputCloseCallback ioclose, void *ioctx,
+	      const char *URL,
+              const char *encoding, int options)
+{
+    xmlParserInputBufferPtr input;
+    xmlParserInputPtr stream;
+
+    if (ioread == NULL)
+        return (NULL);
+    if (ctxt == NULL)
+        return (NULL);
+
+    htmlCtxtReset(ctxt);
+
+    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
+                                         XML_CHAR_ENCODING_NONE);
+    if (input == NULL)
+        return (NULL);
+    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+    if (stream == NULL) {
+        xmlFreeParserInputBuffer(input);
+        return (NULL);
+    }
+    inputPush(ctxt, stream);
+    return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
 #endif /* LIBXML_HTML_ENABLED */
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 7e008bd5..66f2d809 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -154,6 +154,88 @@ XMLPUBFUN int XMLCALL
 						 int size,
 						 int terminate);
 
+/*
+ * New set of simpler/more flexible APIs
+ */
+/**
+ * xmlParserOption:
+ *
+ * This is the set of XML parser options that can be passed down
+ * to the xmlReadDoc() and similar calls.
+ */
+typedef enum {
+    HTML_PARSE_NOERROR	= 1<<5,	/* suppress error reports */
+    HTML_PARSE_NOWARNING= 1<<6,	/* suppress warning reports */
+    HTML_PARSE_PEDANTIC	= 1<<7,	/* pedantic error reporting */
+    HTML_PARSE_NOBLANKS	= 1<<8,	/* remove blank nodes */
+    HTML_PARSE_NONET	= 1<<11 /* Forbid network access */
+} htmlParserOption;
+
+XMLPUBFUN void XMLCALL
+		htmlCtxtReset		(htmlParserCtxtPtr ctxt);
+XMLPUBFUN int XMLCALL
+		htmlCtxtUseOptions	(htmlParserCtxtPtr ctxt,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlReadDoc		(const xmlChar *cur,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlReadFile		(const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlReadMemory		(const char *buffer,
+					 int size,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlReadFd		(int fd,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlReadIO		(xmlInputReadCallback ioread,
+					 xmlInputCloseCallback ioclose,
+					 void *ioctx,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlCtxtReadDoc		(xmlParserCtxtPtr ctxt,
+					 const xmlChar *cur,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlCtxtReadFile		(xmlParserCtxtPtr ctxt,
+					 const char *filename,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlCtxtReadMemory		(xmlParserCtxtPtr ctxt,
+					 const char *buffer,
+					 int size,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlCtxtReadFd		(xmlParserCtxtPtr ctxt,
+					 int fd,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+		htmlCtxtReadIO		(xmlParserCtxtPtr ctxt,
+					 xmlInputReadCallback ioread,
+					 xmlInputCloseCallback ioclose,
+					 void *ioctx,
+					 const char *URL,
+					 const char *encoding,
+					 int options);
+
 /* NRK/Jan2003: further knowledge of HTML structure
  */
 typedef enum {
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index e095babd..73dd71f9 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -262,15 +262,16 @@ struct _xmlParserCtxt {
     xmlHashTablePtr    attsDefault;   /* defaulted attributes if any */
     xmlHashTablePtr    attsSpecial;   /* non-CDATA attributes if any */
     int                nsWellFormed;  /* is the document XML Nanespace okay */
+    int                options;       /* Extra options */
 
     /*
      * Those fields are needed only for treaming parsing so far
      */
-     int               dictNames;    /* Use dictionary names for the tree */
-     int               freeElemsNr;  /* number of freed element nodes */
-     xmlNodePtr        freeElems;    /* List of freed element nodes */
-     int               freeAttrsNr;  /* number of freed attributes nodes */
-     xmlAttrPtr        freeAttrs;    /* List of freed attributes nodes */
+    int               dictNames;    /* Use dictionary names for the tree */
+    int               freeElemsNr;  /* number of freed element nodes */
+    xmlNodePtr        freeElems;    /* List of freed element nodes */
+    int               freeAttrsNr;  /* number of freed attributes nodes */
+    xmlAttrPtr        freeAttrs;    /* List of freed attributes nodes */
 };
 
 /**
@@ -1045,7 +1046,9 @@ typedef enum {
     XML_PARSE_SAX1	= 1<<9,	/* use the SAX1 interface internally */
     XML_PARSE_XINCLUDE	= 1<<10,/* Implement XInclude substitition  */
     XML_PARSE_NONET	= 1<<11,/* Forbid network access */
-    XML_PARSE_NODICT	= 1<<12 /* Do not reuse the context dictionnary */
+    XML_PARSE_NODICT	= 1<<12,/* Do not reuse the context dictionnary */
+    XML_PARSE_NSCLEAN	= 1<<13,/* remove redundant namespaces declarations */
+    XML_PARSE_NOCDATA	= 1<<14 /* merge CDATA as text nodes */
 } xmlParserOption;
 
 XMLPUBFUN void XMLCALL
diff --git a/result/cdata2 b/result/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/result/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+  <test><![CDATA[
+    <![CDATA[abc]]]>]&gt;<![CDATA[
+  ]]></test>
+</collection>
diff --git a/result/cdata2.rdr b/result/cdata2.rdr
new file mode 100644
index 00000000..e69a6729
--- /dev/null
+++ b/result/cdata2.rdr
@@ -0,0 +1,13 @@
+0 1 collection 0 0
+1 14 #text 0 1 
+  
+1 1 test 0 0
+2 4 #cdata-section 0 1 
+    <![CDATA[abc]
+2 3 #text 0 1 ]>
+2 4 #cdata-section 0 1 
+  
+1 15 test 0 0
+1 14 #text 0 1 
+
+0 15 collection 0 0
diff --git a/result/cdata2.sax b/result/cdata2.sax
new file mode 100644
index 00000000..46b025e8
--- /dev/null
+++ b/result/cdata2.sax
@@ -0,0 +1,18 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(collection)
+SAX.characters(
+  , 3)
+SAX.startElement(test)
+SAX.pcdata(
+    <![CDATA[abc], 18)
+SAX.characters(], 1)
+SAX.getEntity(gt)
+SAX.characters(>, 1)
+SAX.pcdata(
+  , 3)
+SAX.endElement(test)
+SAX.characters(
+, 1)
+SAX.endElement(collection)
+SAX.endDocument()
diff --git a/result/noent/cdata2 b/result/noent/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/result/noent/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+  <test><![CDATA[
+    <![CDATA[abc]]]>]&gt;<![CDATA[
+  ]]></test>
+</collection>
diff --git a/test/cdata2 b/test/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/test/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+  <test><![CDATA[
+    <![CDATA[abc]]]>]&gt;<![CDATA[
+  ]]></test>
+</collection>
diff --git a/testHTML.c b/testHTML.c
index f48612d8..81233248 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -46,6 +46,7 @@ static int repeat = 0;
 static int noout = 0;
 static int push = 0;
 static char *encoding = NULL;
+static int options = 0;
 
 xmlSAXHandler emptySAXHandlerStruct = {
     NULL, /* internalSubset */
@@ -725,7 +726,7 @@ parseAndPrintFile(char *filename) {
 	    fclose(f);
 	}
     } else {	
-	doc = htmlParseFile(filename, NULL);
+	doc = htmlReadFile(filename, NULL, options);
     }
     if (doc == NULL) {
         xmlGenericError(xmlGenericErrorContext,
diff --git a/tree.c b/tree.c
index 4eed575a..4b7ef5ee 100644
--- a/tree.c
+++ b/tree.c
@@ -7273,6 +7273,7 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
 	    xmlNodePtr cur, int level, int format, const char *encoding) {
     int i;
     xmlNodePtr tmp;
+    xmlChar *start, *end;
 
     if (cur == NULL) {
 #ifdef DEBUG_TREE
@@ -7356,10 +7357,22 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
 	return;
     }
     if (cur->type == XML_CDATA_SECTION_NODE) {
-        xmlOutputBufferWriteString(buf, "<![CDATA[");
-	if (cur->content != NULL)
-	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
-        xmlOutputBufferWriteString(buf, "]]>");
+	start = end = cur->content;
+	while (*end != '\0') {
+	    if ((*end == ']') && (*(end + 1) == ']') && (*(end + 2) == '>')) {
+		end = end + 2;
+		xmlOutputBufferWriteString(buf, "<![CDATA[");
+		xmlOutputBufferWrite(buf, end - start, (const char *)start);
+		xmlOutputBufferWriteString(buf, "]]>");
+		start = end;
+	    }
+	    end++;
+	}
+	if (start != end) {
+	    xmlOutputBufferWriteString(buf, "<![CDATA[");
+	    xmlOutputBufferWriteString(buf, (const char *)start);
+	    xmlOutputBufferWriteString(buf, "]]>");
+	}
 	return;
     }
     if (cur->type == XML_ATTRIBUTE_NODE) {
@@ -7810,6 +7823,7 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
             int level, int format, const char *encoding) {
     int i;
     xmlNodePtr tmp;
+    xmlChar *start, *end;
 
     if (cur == NULL) {
 #ifdef DEBUG_TREE
@@ -7893,10 +7907,22 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 	return;
     }
     if (cur->type == XML_CDATA_SECTION_NODE) {
-        xmlOutputBufferWriteString(buf, "<![CDATA[");
-	if (cur->content != NULL)
-	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
-        xmlOutputBufferWriteString(buf, "]]>");
+	start = end = cur->content;
+	while (*end != '\0') {
+	    if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
+		end = end + 2;
+		xmlOutputBufferWriteString(buf, "<![CDATA[");
+		xmlOutputBufferWrite(buf, end - start, (const char *)start);
+		xmlOutputBufferWriteString(buf, "]]>");
+		start = end;
+	    }
+	    end++;
+	}
+	if (start != end) {
+	    xmlOutputBufferWriteString(buf, "<![CDATA[");
+	    xmlOutputBufferWriteString(buf, (const char *)start);
+	    xmlOutputBufferWriteString(buf, "]]>");
+	}
 	return;
     }
 
@@ -7989,11 +8015,25 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 		    (xmlStrchr(child->content, '&') == NULL)) {
 		    xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
 		} else {
-		    xmlOutputBufferWriteString(buf, "<![CDATA[");
-		    if (child->content != NULL)
-			xmlOutputBufferWriteString(buf,
-				(const char *)child->content);
-		    xmlOutputBufferWriteString(buf, "]]>");
+		    start = end = child->content;
+		    while (*end != '\0') {
+			if (*end == ']' &&
+			    *(end + 1) == ']' &&
+			    *(end + 2) == '>') {
+			    end = end + 2;
+			    xmlOutputBufferWriteString(buf, "<![CDATA[");
+			    xmlOutputBufferWrite(buf, end - start,
+						 (const char *)start);
+			    xmlOutputBufferWriteString(buf, "]]>");
+			    start = end;
+			}
+			end++;
+		    }
+		    if (start != end) {
+			xmlOutputBufferWriteString(buf, "<![CDATA[");
+			xmlOutputBufferWriteString(buf, (const char *)start);
+			xmlOutputBufferWriteString(buf, "]]>");
+		    }
 		}
 	    } else {
 		xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
diff --git a/xmllint.c b/xmllint.c
index bd835d12..9ef1905f 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -764,7 +764,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
         }
     }
     else if (html) {
-	doc = htmlParseFile(filename, NULL);
+	doc = htmlReadFile(filename, NULL, options);
     }
 #endif /* LIBXML_HTML_ENABLED */
     else {
author	Daniel Veillard <veillard@src.gnome.org>	2003-09-26 12:47:50 +0000
committer	Daniel Veillard <veillard@src.gnome.org>	2003-09-26 12:47:50 +0000
commit	9475a352bdd1f15b1e0c53472a74938f9d5dc04e (patch)
tree	a29cfb017b1ed70d637fed469b6563ed06dc941c
parent	60942def6a83d48469d1d9fb41564b8450a939b4 (diff)
download	libxml2-9475a352bdd1f15b1e0c53472a74938f9d5dc04e.tar.gz