diff options
Diffstat (limited to 'libjava/gnu/xml/transform/StreamSerializer.java')
-rw-r--r-- | libjava/gnu/xml/transform/StreamSerializer.java | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/libjava/gnu/xml/transform/StreamSerializer.java b/libjava/gnu/xml/transform/StreamSerializer.java new file mode 100644 index 00000000000..136105a4ebc --- /dev/null +++ b/libjava/gnu/xml/transform/StreamSerializer.java @@ -0,0 +1,632 @@ +/* StreamSerializer.java -- + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.transform; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import javax.xml.XMLConstants; +import org.w3c.dom.Attr; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; + +/** + * Serializes a DOM node to an output stream. + * + * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a> + */ +public class StreamSerializer +{ + + static final int SPACE = 0x20; + static final int BANG = 0x21; // ! + static final int APOS = 0x27; // ' + static final int SLASH = 0x2f; // / + static final int BRA = 0x3c; // < + static final int KET = 0x3e; // > + static final int EQ = 0x3d; // = + + protected String encoding; + boolean compatibilityMode; + final int mode; + final Map namespaces; + protected String eol; + Collection cdataSectionElements = Collections.EMPTY_SET; + + protected boolean discardDefaultContent; + protected boolean xmlDeclaration = true; + + public StreamSerializer() + { + this(Stylesheet.OUTPUT_XML, null, null); + } + + public StreamSerializer(String encoding) + { + this(Stylesheet.OUTPUT_XML, encoding, null); + } + + public StreamSerializer(int mode, String encoding, String eol) + { + this.mode = mode; + if (encoding == null) + { + encoding = "UTF-8"; + } + this.encoding = encoding.intern(); + compatibilityMode = true; + if (encoding.length() > 3) + { + String p = encoding.substring(0, 3); + if (p.equalsIgnoreCase("UTF") || + p.equalsIgnoreCase("UCS")) + { + compatibilityMode = false; + } + } + this.eol = (eol != null) ? eol : System.getProperty("line.separator"); + namespaces = new HashMap(); + } + + void setCdataSectionElements(Collection c) + { + cdataSectionElements = c; + } + + public void serialize(final Node node, final OutputStream out) + throws IOException + { + serialize(node, out, false); + } + + void serialize(final Node node, final OutputStream out, + boolean convertToCdata) + throws IOException + { + if (out == null) + { + throw new NullPointerException("no output stream"); + } + String value, prefix; + Node children; + Node next = node.getNextSibling(); + String uri = node.getNamespaceURI(); + boolean defined = false; + short nt = node.getNodeType(); + if (convertToCdata && nt == Node.TEXT_NODE) + { + nt = Node.CDATA_SECTION_NODE; + } + switch (nt) + { + case Node.ATTRIBUTE_NODE: + prefix = node.getPrefix(); + if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) || + XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) || + (prefix != null && prefix.startsWith("xmlns:"))) + { + String nsuri = node.getNodeValue(); + if (isDefined(nsuri)) + { + break; + } + define(nsuri, node.getLocalName()); + } + else if (uri != null && !isDefined(uri)) + { + prefix = define(uri, prefix); + String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix; + out.write(SPACE); + out.write(encodeText(nsname)); + out.write(EQ); + String nsvalue = "'" + encode(uri, true, true) + "'"; + out.write(nsvalue.getBytes(encoding)); + defined = true; + } + out.write(SPACE); + String a_nodeName = node.getNodeName(); + out.write(encodeText(a_nodeName)); + String a_nodeValue = node.getNodeValue(); + if (mode == Stylesheet.OUTPUT_HTML && + a_nodeName.equals(a_nodeValue)) + { + break; + } + out.write(EQ); + value = "'" + encode(a_nodeValue, true, true) + "'"; + out.write(encodeText(value)); + break; + case Node.ELEMENT_NODE: + value = node.getNodeName(); + out.write(BRA); + out.write(encodeText(value)); + if (uri != null && !isDefined(uri)) + { + prefix = define(uri, node.getPrefix()); + String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix; + out.write(SPACE); + out.write(encodeText(nsname)); + out.write(EQ); + String nsvalue = "'" + encode(uri, true, true) + "'"; + out.write(encodeText(nsvalue)); + defined = true; + } + NamedNodeMap attrs = node.getAttributes(); + if (attrs != null) + { + int len = attrs.getLength(); + for (int i = 0; i < len; i++) + { + Attr attr = (Attr) attrs.item(i); + if (discardDefaultContent && !attr.getSpecified()) + { + // NOOP + } + else + { + serialize(attr, out, false); + } + } + } + convertToCdata = cdataSectionElements.contains(value); + children = node.getFirstChild(); + if (children == null) + { + out.write(SLASH); + out.write(KET); + } + else + { + out.write(KET); + serialize(children, out, convertToCdata); + out.write(BRA); + out.write(SLASH); + out.write(encodeText(value)); + out.write(KET); + } + break; + case Node.TEXT_NODE: + value = node.getNodeValue(); + if (!"yes".equals(node.getUserData("disable-output-escaping"))) + { + value = encode(value, false, false); + } + out.write(encodeText(value)); + break; + case Node.CDATA_SECTION_NODE: + value = "<![CDATA[" + node.getNodeValue() + "]]>"; + out.write(encodeText(value)); + break; + case Node.COMMENT_NODE: + value = "<!--" + node.getNodeValue() + "-->"; + out.write(encodeText(value)); + Node cp = node.getParentNode(); + if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE) + { + out.write(encodeText(eol)); + } + break; + case Node.DOCUMENT_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + if (mode == Stylesheet.OUTPUT_XML) + { + if ("UTF-16".equalsIgnoreCase(encoding)) + { + out.write(0xfe); + out.write(0xff); + } + if (!"yes".equals(node.getUserData("omit-xml-declaration")) && + xmlDeclaration) + { + Document doc = (node instanceof Document) ? + (Document) node : null; + String version = (doc != null) ? doc.getXmlVersion() : null; + if (version == null) + { + version = (String) node.getUserData("version"); + } + if (version == null) + { + version = "1.0"; + } + out.write(BRA); + out.write(0x3f); + out.write("xml version='".getBytes("US-ASCII")); + out.write(version.getBytes("US-ASCII")); + out.write(APOS); + if (!("UTF-8".equalsIgnoreCase(encoding))) + { + out.write(" encoding='".getBytes("US-ASCII")); + out.write(encoding.getBytes("US-ASCII")); + out.write(APOS); + } + if ((doc != null && doc.getXmlStandalone()) || + "yes".equals(node.getUserData("standalone"))) + { + out.write(" standalone='yes'".getBytes("US-ASCII")); + } + out.write(0x3f); + out.write(KET); + out.write(encodeText(eol)); + } + // TODO warn if not outputting the declaration would be a + // problem + } + else if (mode == Stylesheet.OUTPUT_HTML) + { + // Ensure that encoding is accessible + String mediaType = (String) node.getUserData("media-type"); + if (mediaType == null) + { + mediaType = "text/html"; + } + String contentType = mediaType + "; charset=" + + ((encoding.indexOf(' ') != -1) ? + "\"" + encoding + "\"" : + encoding); + Document doc = (node instanceof Document) ? (Document) node : + node.getOwnerDocument(); + Node html = null; + for (Node ctx = node.getFirstChild(); ctx != null; + ctx = ctx.getNextSibling()) + { + if (ctx.getNodeType() == Node.ELEMENT_NODE) + { + html = ctx; + break; + } + } + if (html == null) + { + html = doc.createElement("html"); + node.appendChild(html); + } + Node head = null; + for (Node ctx = html.getFirstChild(); ctx != null; + ctx = ctx.getNextSibling()) + { + if (ctx.getNodeType() == Node.ELEMENT_NODE && + "head".equalsIgnoreCase(ctx.getLocalName())) + { + head = ctx; + break; + } + } + if (head == null) + { + head = doc.createElement("head"); + Node c1 = null; + for (Node ctx = html.getFirstChild(); ctx != null; + ctx = ctx.getNextSibling()) + { + if (ctx.getNodeType() == Node.ELEMENT_NODE) + { + c1 = ctx; + break; + } + } + if (c1 != null) + { + html.insertBefore(head, c1); + } + else + { + html.appendChild(head); + } + } + Node meta = null; + Node metaContent = null; + for (Node ctx = head.getFirstChild(); ctx != null; + ctx = ctx.getNextSibling()) + { + if (ctx.getNodeType() == Node.ELEMENT_NODE && + "meta".equalsIgnoreCase(ctx.getLocalName())) + { + NamedNodeMap metaAttrs = ctx.getAttributes(); + int len = metaAttrs.getLength(); + String httpEquiv = null; + Node content = null; + for (int i = 0; i < len; i++) + { + Node attr = metaAttrs.item(i); + String attrName = attr.getNodeName(); + if ("http-equiv".equalsIgnoreCase(attrName)) + { + httpEquiv = attr.getNodeValue(); + } + else if ("content".equalsIgnoreCase(attrName)) + { + content = attr; + } + } + if ("Content-Type".equalsIgnoreCase(httpEquiv)) + { + meta = ctx; + metaContent = content; + break; + } + } + } + if (meta == null) + { + meta = doc.createElement("meta"); + // Insert first + Node first = head.getFirstChild(); + if (first == null) + { + head.appendChild(meta); + } + else + { + head.insertBefore(meta, first); + } + Node metaHttpEquiv = doc.createAttribute("http-equiv"); + meta.getAttributes().setNamedItem(metaHttpEquiv); + metaHttpEquiv.setNodeValue("Content-Type"); + } + if (metaContent == null) + { + metaContent = doc.createAttribute("content"); + meta.getAttributes().setNamedItem(metaContent); + } + metaContent.setNodeValue(contentType); + // phew + } + children = node.getFirstChild(); + if (children != null) + { + serialize(children, out, convertToCdata); + } + break; + case Node.DOCUMENT_TYPE_NODE: + DocumentType doctype = (DocumentType) node; + out.write(BRA); + out.write(BANG); + value = doctype.getNodeName(); + out.write(encodeText(value)); + String publicId = doctype.getPublicId(); + if (publicId != null) + { + out.write(encodeText(" PUBLIC ")); + out.write(APOS); + out.write(encodeText(publicId)); + out.write(APOS); + } + String systemId = doctype.getSystemId(); + if (systemId != null) + { + out.write(encodeText(" SYSTEM ")); + out.write(APOS); + out.write(encodeText(systemId)); + out.write(APOS); + } + String internalSubset = doctype.getInternalSubset(); + if (internalSubset != null) + { + out.write(encodeText(internalSubset)); + } + out.write(KET); + out.write(eol.getBytes(encoding)); + break; + case Node.ENTITY_REFERENCE_NODE: + value = "&" + node.getNodeValue() + ";"; + out.write(encodeText(value)); + break; + case Node.PROCESSING_INSTRUCTION_NODE: + value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>"; + out.write(encodeText(value)); + Node pp = node.getParentNode(); + if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE) + { + out.write(encodeText(eol)); + } + break; + } + if (defined) + { + undefine(uri); + } + if (next != null) + { + serialize(next, out, convertToCdata); + } + } + + boolean isDefined(String uri) + { + return XMLConstants.XML_NS_URI.equals(uri) || + XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) || + namespaces.containsKey(uri); + } + + String define(String uri, String prefix) + { + while (namespaces.containsValue(prefix)) + { + // Fabricate new prefix + prefix = prefix + "_"; + } + namespaces.put(uri, prefix); + return prefix; + } + + void undefine(String uri) + { + namespaces.remove(uri); + } + + final byte[] encodeText(String text) + throws UnsupportedEncodingException + { + if (compatibilityMode) + { + int len = text.length(); + StringBuffer buf = null; + for (int i = 0; i < len; i++) + { + char c = text.charAt(i); + if (c >= 127) + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append('&'); + buf.append('#'); + buf.append((int) c); + buf.append(';'); + } + else if (buf != null) + { + buf.append(c); + } + } + if (buf != null) + { + text = buf.toString(); + } + } + return text.getBytes(encoding); + } + + String encode(String text, boolean encodeCtl, boolean inAttr) + { + int len = text.length(); + StringBuffer buf = null; + for (int i = 0; i < len; i++) + { + char c = text.charAt(i); + if (c == '<') + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append("<"); + } + else if (c == '>') + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append(">"); + } + else if (c == '&') + { + if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len && + text.charAt(i + 1) == '{') + { + if (buf != null) + { + buf.append(c); + } + } + else + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append("&"); + } + } + else if (c == '\'' && inAttr) + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append("'"); + } + else if (c == '"' && inAttr) + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append("""); + } + else if (encodeCtl) + { + if (c < 0x20) + { + if (buf == null) + { + buf = new StringBuffer(text.substring(0, i)); + } + buf.append('&'); + buf.append('#'); + buf.append((int) c); + buf.append(';'); + } + else if (buf != null) + { + buf.append(c); + } + } + else if (buf != null) + { + buf.append(c); + } + } + return (buf == null) ? text : buf.toString(); + } + + String toString(Node node) + { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try + { + serialize(node, out); + return new String(out.toByteArray(), encoding); + } + catch (IOException e) + { + throw new RuntimeException(e.getMessage()); + } + } + +} |