summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2003-09-26 12:47:50 +0000
committerDaniel Veillard <veillard@src.gnome.org>2003-09-26 12:47:50 +0000
commit9475a352bdd1f15b1e0c53472a74938f9d5dc04e (patch)
treea29cfb017b1ed70d637fed469b6563ed06dc941c
parent60942def6a83d48469d1d9fb41564b8450a939b4 (diff)
downloadlibxml2-9475a352bdd1f15b1e0c53472a74938f9d5dc04e.tar.gz
added the same htmlRead APIs than their XML counterparts new parser
* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h: added the same htmlRead APIs than their XML counterparts * include/libxml/parser.h: new parser options, not yet implemented, added an options field to the context. * tree.c: patch from Shaun McCance to fix bug #123238 when ]]> is found within a cdata section. * result/noent/cdata2 result/cdata2 result/cdata2.rdr result/cdata2.sax test/cdata2: add one more cdata test Daniel
-rw-r--r--ChangeLog11
-rw-r--r--HTMLparser.c525
-rw-r--r--include/libxml/HTMLparser.h82
-rw-r--r--include/libxml/parser.h15
-rw-r--r--result/cdata26
-rw-r--r--result/cdata2.rdr13
-rw-r--r--result/cdata2.sax18
-rw-r--r--result/noent/cdata26
-rw-r--r--test/cdata26
-rw-r--r--testHTML.c3
-rw-r--r--tree.c66
-rw-r--r--xmllint.c2
12 files changed, 732 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 4caf31ec..a2233424 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Fri Sep 26 14:41:53 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+ * HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h:
+ added the same htmlRead APIs than their XML counterparts
+ * include/libxml/parser.h: new parser options, not yet implemented,
+ added an options field to the context.
+ * tree.c: patch from Shaun McCance to fix bug #123238 when ]]>
+ is found within a cdata section.
+ * result/noent/cdata2 result/cdata2 result/cdata2.rdr
+ result/cdata2.sax test/cdata2: add one more cdata test
+
Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard <daniel@veillard.com>
* parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h:
diff --git a/HTMLparser.c b/HTMLparser.c
index d2cba584..caed896b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5541,4 +5541,529 @@ htmlNodeStatus(const htmlNodePtr node, int legacy) {
default: return HTML_NA ;
}
}
+/************************************************************************
+ * *
+ * New set (2.6.0) of simpler and more flexible APIs *
+ * *
+ ************************************************************************/
+/**
+ * DICT_FREE:
+ * @str: a string
+ *
+ * Free a string if it is not owned by the "dict" dictionnary in the
+ * current scope
+ */
+#define DICT_FREE(str) \
+ if ((str) && ((!dict) || \
+ (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
+ xmlFree((char *)(str));
+
+/**
+ * htmlCtxtReset:
+ * @ctxt: an XML parser context
+ *
+ * Reset a parser context
+ */
+void
+htmlCtxtReset(htmlParserCtxtPtr ctxt)
+{
+ xmlParserInputPtr input;
+ xmlDictPtr dict = ctxt->dict;
+
+ while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
+ xmlFreeInputStream(input);
+ }
+ ctxt->inputNr = 0;
+ ctxt->input = NULL;
+
+ ctxt->spaceNr = 0;
+ ctxt->spaceTab[0] = -1;
+ ctxt->space = &ctxt->spaceTab[0];
+
+
+ ctxt->nodeNr = 0;
+ ctxt->node = NULL;
+
+ ctxt->nameNr = 0;
+ ctxt->name = NULL;
+
+ DICT_FREE(ctxt->version);
+ ctxt->version = NULL;
+ DICT_FREE(ctxt->encoding);
+ ctxt->encoding = NULL;
+ DICT_FREE(ctxt->directory);
+ ctxt->directory = NULL;
+ DICT_FREE(ctxt->extSubURI);
+ ctxt->extSubURI = NULL;
+ DICT_FREE(ctxt->extSubSystem);
+ ctxt->extSubSystem = NULL;
+ if (ctxt->myDoc != NULL)
+ xmlFreeDoc(ctxt->myDoc);
+ ctxt->myDoc = NULL;
+
+ ctxt->standalone = -1;
+ ctxt->hasExternalSubset = 0;
+ ctxt->hasPErefs = 0;
+ ctxt->html = 1;
+ ctxt->external = 0;
+ ctxt->instate = XML_PARSER_START;
+ ctxt->token = 0;
+
+ ctxt->wellFormed = 1;
+ ctxt->nsWellFormed = 1;
+ ctxt->valid = 1;
+ ctxt->vctxt.userData = ctxt;
+ ctxt->vctxt.error = xmlParserValidityError;
+ ctxt->vctxt.warning = xmlParserValidityWarning;
+ ctxt->record_info = 0;
+ ctxt->nbChars = 0;
+ ctxt->checkIndex = 0;
+ ctxt->inSubset = 0;
+ ctxt->errNo = XML_ERR_OK;
+ ctxt->depth = 0;
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ ctxt->catalogs = NULL;
+ xmlInitNodeInfoSeq(&ctxt->node_seq);
+
+ if (ctxt->attsDefault != NULL) {
+ xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
+ ctxt->attsDefault = NULL;
+ }
+ if (ctxt->attsSpecial != NULL) {
+ xmlHashFree(ctxt->attsSpecial, NULL);
+ ctxt->attsSpecial = NULL;
+ }
+}
+
+/**
+ * htmlCtxtUseOptions:
+ * @ctxt: an HTML parser context
+ * @options: a combination of htmlParserOption(s)
+ *
+ * Applies the options to the parser context
+ *
+ * Returns 0 in case of success, the set of unknown or unimplemented options
+ * in case of error.
+ */
+int
+htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
+{
+ if (options & HTML_PARSE_NOWARNING) {
+ ctxt->sax->warning = NULL;
+ options -= XML_PARSE_NOWARNING;
+ }
+ if (options & HTML_PARSE_NOERROR) {
+ ctxt->sax->error = NULL;
+ ctxt->sax->fatalError = NULL;
+ options -= XML_PARSE_NOERROR;
+ }
+ if (options & HTML_PARSE_PEDANTIC) {
+ ctxt->pedantic = 1;
+ options -= XML_PARSE_PEDANTIC;
+ } else
+ ctxt->pedantic = 0;
+ if (options & XML_PARSE_NOBLANKS) {
+ ctxt->keepBlanks = 0;
+ ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
+ options -= XML_PARSE_NOBLANKS;
+ } else
+ ctxt->keepBlanks = 1;
+ ctxt->dictNames = 0;
+ return (options);
+}
+
+/**
+ * htmlDoRead:
+ * @ctxt: an HTML parser context
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ * @reuse: keep the context for reuse
+ *
+ * Common front-end for the htmlRead functions
+ *
+ * Returns the resulting document tree or NULL
+ */
+static htmlDocPtr
+htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
+ int options, int reuse)
+{
+ htmlDocPtr ret;
+
+ htmlCtxtUseOptions(ctxt, options);
+ ctxt->html = 1;
+ if (encoding != NULL) {
+ xmlCharEncodingHandlerPtr hdlr;
+
+ hdlr = xmlFindCharEncodingHandler(encoding);
+ if (hdlr != NULL)
+ xmlSwitchToEncoding(ctxt, hdlr);
+ }
+ if ((URL != NULL) && (ctxt->input != NULL) &&
+ (ctxt->input->filename == NULL))
+ ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
+ htmlParseDocument(ctxt);
+ ret = ctxt->myDoc;
+ ctxt->myDoc = NULL;
+ if (!reuse) {
+ if ((ctxt->dictNames) &&
+ (ret != NULL) &&
+ (ret->dict == ctxt->dict))
+ ctxt->dict = NULL;
+ xmlFreeParserCtxt(ctxt);
+ } else {
+ /* Must duplicate the reference to the dictionary */
+ if ((ctxt->dictNames) &&
+ (ret != NULL) &&
+ (ret->dict == ctxt->dict))
+ xmlDictReference(ctxt->dict);
+ }
+ return (ret);
+}
+
+/**
+ * htmlReadDoc:
+ * @cur: a pointer to a zero terminated string
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
+{
+ htmlParserCtxtPtr ctxt;
+
+ if (cur == NULL)
+ return (NULL);
+
+ ctxt = xmlCreateDocParserCtxt(cur);
+ if (ctxt == NULL)
+ return (NULL);
+ return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadFile:
+ * @filename: a file or URL
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML file from the filesystem or the network.
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadFile(const char *filename, const char *encoding, int options)
+{
+ htmlParserCtxtPtr ctxt;
+
+ ctxt = htmlCreateFileParserCtxt(filename, encoding);
+ if (ctxt == NULL)
+ return (NULL);
+ return (htmlDoRead(ctxt, NULL, NULL, options, 0));
+}
+
+/**
+ * htmlReadMemory:
+ * @buffer: a pointer to a char array
+ * @size: the size of the array
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
+{
+ htmlParserCtxtPtr ctxt;
+
+ ctxt = xmlCreateMemoryParserCtxt(buffer, size);
+ if (ctxt == NULL)
+ return (NULL);
+ return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadFd:
+ * @fd: an open file descriptor
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML from a file descriptor and build a tree.
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadFd(int fd, const char *URL, const char *encoding, int options)
+{
+ htmlParserCtxtPtr ctxt;
+ xmlParserInputBufferPtr input;
+ xmlParserInputPtr stream;
+
+ if (fd < 0)
+ return (NULL);
+
+ input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
+ if (input == NULL)
+ return (NULL);
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL) {
+ xmlFreeParserInputBuffer(input);
+ return (NULL);
+ }
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+ if (stream == NULL) {
+ xmlFreeParserInputBuffer(input);
+ xmlFreeParserCtxt(ctxt);
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlReadIO:
+ * @ioread: an I/O read function
+ * @ioclose: an I/O close function
+ * @ioctx: an I/O handler
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an HTML document from I/O functions and source and build a tree.
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
+ void *ioctx, const char *URL, const char *encoding, int options)
+{
+ htmlParserCtxtPtr ctxt;
+ xmlParserInputBufferPtr input;
+ xmlParserInputPtr stream;
+
+ if (ioread == NULL)
+ return (NULL);
+
+ input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
+ XML_CHAR_ENCODING_NONE);
+ if (input == NULL)
+ return (NULL);
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL) {
+ xmlFreeParserInputBuffer(input);
+ return (NULL);
+ }
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+ if (stream == NULL) {
+ xmlFreeParserInputBuffer(input);
+ xmlFreeParserCtxt(ctxt);
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 0));
+}
+
+/**
+ * htmlCtxtReadDoc:
+ * @ctxt: an HTML parser context
+ * @cur: a pointer to a zero terminated string
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * This reuses the existing @ctxt parser context
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
+ const char *URL, const char *encoding, int options)
+{
+ xmlParserInputPtr stream;
+
+ if (cur == NULL)
+ return (NULL);
+ if (ctxt == NULL)
+ return (NULL);
+
+ htmlCtxtReset(ctxt);
+
+ stream = xmlNewStringInputStream(ctxt, cur);
+ if (stream == NULL) {
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadFile:
+ * @ctxt: an HTML parser context
+ * @filename: a file or URL
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML file from the filesystem or the network.
+ * This reuses the existing @ctxt parser context
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
+ const char *encoding, int options)
+{
+ xmlParserInputPtr stream;
+
+ if (filename == NULL)
+ return (NULL);
+ if (ctxt == NULL)
+ return (NULL);
+
+ htmlCtxtReset(ctxt);
+
+ stream = xmlNewInputFromFile(ctxt, filename);
+ if (stream == NULL) {
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, NULL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadMemory:
+ * @ctxt: an HTML parser context
+ * @buffer: a pointer to a char array
+ * @size: the size of the array
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML in-memory document and build a tree.
+ * This reuses the existing @ctxt parser context
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
+ const char *URL, const char *encoding, int options)
+{
+ xmlParserInputBufferPtr input;
+ xmlParserInputPtr stream;
+
+ if (ctxt == NULL)
+ return (NULL);
+ if (buffer == NULL)
+ return (NULL);
+
+ htmlCtxtReset(ctxt);
+
+ input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
+ if (input == NULL) {
+ return(NULL);
+ }
+
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+ if (stream == NULL) {
+ xmlFreeParserInputBuffer(input);
+ return(NULL);
+ }
+
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadFd:
+ * @ctxt: an HTML parser context
+ * @fd: an open file descriptor
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an XML from a file descriptor and build a tree.
+ * This reuses the existing @ctxt parser context
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
+ const char *URL, const char *encoding, int options)
+{
+ xmlParserInputBufferPtr input;
+ xmlParserInputPtr stream;
+
+ if (fd < 0)
+ return (NULL);
+ if (ctxt == NULL)
+ return (NULL);
+
+ htmlCtxtReset(ctxt);
+
+
+ input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
+ if (input == NULL)
+ return (NULL);
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+ if (stream == NULL) {
+ xmlFreeParserInputBuffer(input);
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
+/**
+ * htmlCtxtReadIO:
+ * @ctxt: an HTML parser context
+ * @ioread: an I/O read function
+ * @ioclose: an I/O close function
+ * @ioctx: an I/O handler
+ * @URL: the base URL to use for the document
+ * @encoding: the document encoding, or NULL
+ * @options: a combination of htmlParserOption(s)
+ *
+ * parse an HTML document from I/O functions and source and build a tree.
+ * This reuses the existing @ctxt parser context
+ *
+ * Returns the resulting document tree
+ */
+htmlDocPtr
+htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
+ xmlInputCloseCallback ioclose, void *ioctx,
+ const char *URL,
+ const char *encoding, int options)
+{
+ xmlParserInputBufferPtr input;
+ xmlParserInputPtr stream;
+
+ if (ioread == NULL)
+ return (NULL);
+ if (ctxt == NULL)
+ return (NULL);
+
+ htmlCtxtReset(ctxt);
+
+ input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
+ XML_CHAR_ENCODING_NONE);
+ if (input == NULL)
+ return (NULL);
+ stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
+ if (stream == NULL) {
+ xmlFreeParserInputBuffer(input);
+ return (NULL);
+ }
+ inputPush(ctxt, stream);
+ return (htmlDoRead(ctxt, URL, encoding, options, 1));
+}
+
#endif /* LIBXML_HTML_ENABLED */
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 7e008bd5..66f2d809 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -154,6 +154,88 @@ XMLPUBFUN int XMLCALL
int size,
int terminate);
+/*
+ * New set of simpler/more flexible APIs
+ */
+/**
+ * xmlParserOption:
+ *
+ * This is the set of XML parser options that can be passed down
+ * to the xmlReadDoc() and similar calls.
+ */
+typedef enum {
+ HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
+ HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
+ HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
+ HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
+ HTML_PARSE_NONET = 1<<11 /* Forbid network access */
+} htmlParserOption;
+
+XMLPUBFUN void XMLCALL
+ htmlCtxtReset (htmlParserCtxtPtr ctxt);
+XMLPUBFUN int XMLCALL
+ htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlReadDoc (const xmlChar *cur,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlReadFile (const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlReadMemory (const char *buffer,
+ int size,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlReadFd (int fd,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlReadIO (xmlInputReadCallback ioread,
+ xmlInputCloseCallback ioclose,
+ void *ioctx,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
+ const xmlChar *cur,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
+ const char *filename,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
+ const char *buffer,
+ int size,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
+ int fd,
+ const char *URL,
+ const char *encoding,
+ int options);
+XMLPUBFUN htmlDocPtr XMLCALL
+ htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
+ xmlInputReadCallback ioread,
+ xmlInputCloseCallback ioclose,
+ void *ioctx,
+ const char *URL,
+ const char *encoding,
+ int options);
+
/* NRK/Jan2003: further knowledge of HTML structure
*/
typedef enum {
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index e095babd..73dd71f9 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -262,15 +262,16 @@ struct _xmlParserCtxt {
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
int nsWellFormed; /* is the document XML Nanespace okay */
+ int options; /* Extra options */
/*
* Those fields are needed only for treaming parsing so far
*/
- int dictNames; /* Use dictionary names for the tree */
- int freeElemsNr; /* number of freed element nodes */
- xmlNodePtr freeElems; /* List of freed element nodes */
- int freeAttrsNr; /* number of freed attributes nodes */
- xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
+ int dictNames; /* Use dictionary names for the tree */
+ int freeElemsNr; /* number of freed element nodes */
+ xmlNodePtr freeElems; /* List of freed element nodes */
+ int freeAttrsNr; /* number of freed attributes nodes */
+ xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
};
/**
@@ -1045,7 +1046,9 @@ typedef enum {
XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
XML_PARSE_NONET = 1<<11,/* Forbid network access */
- XML_PARSE_NODICT = 1<<12 /* Do not reuse the context dictionnary */
+ XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
+ XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
+ XML_PARSE_NOCDATA = 1<<14 /* merge CDATA as text nodes */
} xmlParserOption;
XMLPUBFUN void XMLCALL
diff --git a/result/cdata2 b/result/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/result/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+ <test><![CDATA[
+ <![CDATA[abc]]]>]&gt;<![CDATA[
+ ]]></test>
+</collection>
diff --git a/result/cdata2.rdr b/result/cdata2.rdr
new file mode 100644
index 00000000..e69a6729
--- /dev/null
+++ b/result/cdata2.rdr
@@ -0,0 +1,13 @@
+0 1 collection 0 0
+1 14 #text 0 1
+
+1 1 test 0 0
+2 4 #cdata-section 0 1
+ <![CDATA[abc]
+2 3 #text 0 1 ]>
+2 4 #cdata-section 0 1
+
+1 15 test 0 0
+1 14 #text 0 1
+
+0 15 collection 0 0
diff --git a/result/cdata2.sax b/result/cdata2.sax
new file mode 100644
index 00000000..46b025e8
--- /dev/null
+++ b/result/cdata2.sax
@@ -0,0 +1,18 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(collection)
+SAX.characters(
+ , 3)
+SAX.startElement(test)
+SAX.pcdata(
+ <![CDATA[abc], 18)
+SAX.characters(], 1)
+SAX.getEntity(gt)
+SAX.characters(>, 1)
+SAX.pcdata(
+ , 3)
+SAX.endElement(test)
+SAX.characters(
+, 1)
+SAX.endElement(collection)
+SAX.endDocument()
diff --git a/result/noent/cdata2 b/result/noent/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/result/noent/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+ <test><![CDATA[
+ <![CDATA[abc]]]>]&gt;<![CDATA[
+ ]]></test>
+</collection>
diff --git a/test/cdata2 b/test/cdata2
new file mode 100644
index 00000000..b4db7917
--- /dev/null
+++ b/test/cdata2
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+ <test><![CDATA[
+ <![CDATA[abc]]]>]&gt;<![CDATA[
+ ]]></test>
+</collection>
diff --git a/testHTML.c b/testHTML.c
index f48612d8..81233248 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -46,6 +46,7 @@ static int repeat = 0;
static int noout = 0;
static int push = 0;
static char *encoding = NULL;
+static int options = 0;
xmlSAXHandler emptySAXHandlerStruct = {
NULL, /* internalSubset */
@@ -725,7 +726,7 @@ parseAndPrintFile(char *filename) {
fclose(f);
}
} else {
- doc = htmlParseFile(filename, NULL);
+ doc = htmlReadFile(filename, NULL, options);
}
if (doc == NULL) {
xmlGenericError(xmlGenericErrorContext,
diff --git a/tree.c b/tree.c
index 4eed575a..4b7ef5ee 100644
--- a/tree.c
+++ b/tree.c
@@ -7273,6 +7273,7 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, int level, int format, const char *encoding) {
int i;
xmlNodePtr tmp;
+ xmlChar *start, *end;
if (cur == NULL) {
#ifdef DEBUG_TREE
@@ -7356,10 +7357,22 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
return;
}
if (cur->type == XML_CDATA_SECTION_NODE) {
- xmlOutputBufferWriteString(buf, "<![CDATA[");
- if (cur->content != NULL)
- xmlOutputBufferWriteString(buf, (const char *)cur->content);
- xmlOutputBufferWriteString(buf, "]]>");
+ start = end = cur->content;
+ while (*end != '\0') {
+ if ((*end == ']') && (*(end + 1) == ']') && (*(end + 2) == '>')) {
+ end = end + 2;
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWrite(buf, end - start, (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ start = end;
+ }
+ end++;
+ }
+ if (start != end) {
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWriteString(buf, (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ }
return;
}
if (cur->type == XML_ATTRIBUTE_NODE) {
@@ -7810,6 +7823,7 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
int level, int format, const char *encoding) {
int i;
xmlNodePtr tmp;
+ xmlChar *start, *end;
if (cur == NULL) {
#ifdef DEBUG_TREE
@@ -7893,10 +7907,22 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
return;
}
if (cur->type == XML_CDATA_SECTION_NODE) {
- xmlOutputBufferWriteString(buf, "<![CDATA[");
- if (cur->content != NULL)
- xmlOutputBufferWriteString(buf, (const char *)cur->content);
- xmlOutputBufferWriteString(buf, "]]>");
+ start = end = cur->content;
+ while (*end != '\0') {
+ if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
+ end = end + 2;
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWrite(buf, end - start, (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ start = end;
+ }
+ end++;
+ }
+ if (start != end) {
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWriteString(buf, (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ }
return;
}
@@ -7989,11 +8015,25 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
(xmlStrchr(child->content, '&') == NULL)) {
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
} else {
- xmlOutputBufferWriteString(buf, "<![CDATA[");
- if (child->content != NULL)
- xmlOutputBufferWriteString(buf,
- (const char *)child->content);
- xmlOutputBufferWriteString(buf, "]]>");
+ start = end = child->content;
+ while (*end != '\0') {
+ if (*end == ']' &&
+ *(end + 1) == ']' &&
+ *(end + 2) == '>') {
+ end = end + 2;
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWrite(buf, end - start,
+ (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ start = end;
+ }
+ end++;
+ }
+ if (start != end) {
+ xmlOutputBufferWriteString(buf, "<![CDATA[");
+ xmlOutputBufferWriteString(buf, (const char *)start);
+ xmlOutputBufferWriteString(buf, "]]>");
+ }
}
} else {
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
diff --git a/xmllint.c b/xmllint.c
index bd835d12..9ef1905f 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -764,7 +764,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
}
}
else if (html) {
- doc = htmlParseFile(filename, NULL);
+ doc = htmlReadFile(filename, NULL, options);
}
#endif /* LIBXML_HTML_ENABLED */
else {