/* * $Id: Resolver.java,v 1.1.1.1 2003-02-01 02:10:23 cbj Exp $ * Copyright (C) 1999-2001 David Brownell * * This file is part of GNU JAXP, a library. * * GNU JAXP is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GNU JAXP is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * As a special exception, if you link this library with other files to * produce an executable, this library does not by itself cause the * resulting executable to be covered by the GNU General Public License. * This exception does not however invalidate any other reasons why the * executable file might be covered by the GNU General Public License. */ package gnu.xml.util; import java.io.*; import java.util.Dictionary; import java.util.Hashtable; import org.xml.sax.*; // $Id: Resolver.java,v 1.1.1.1 2003-02-01 02:10:23 cbj Exp $ /** * Utility implementation of a SAX resolver, which can be used to improve * network utilization of SAX based XML components. It does this by * supporting local caches of external entities. * SAX parsers should use such local caches when possible. * * @see XCat * * @version $Date: 2003-02-01 02:10:23 $ */ public class Resolver implements EntityResolver, Cloneable { /** * Updates a dictionary used to map PUBLIC identifiers to file names, * so that it uses the mappings in a specified directory. * * @param mappings Array of string pairs, where the first member * of each pair is a PUBLIC identifier and the second is the * name of a file, relative to the specified directory. * @param directory File holding the specified files. */ public static void addDirectoryMapping ( Dictionary table, String mappings [][], File directory ) throws IOException { for (int i = 0; i < mappings.length; i++) { File file = new File (directory, mappings [i][1]); String temp; if (!file.exists ()) // ?? log a warning ?? continue; temp = fileToURL (file); table.put (mappings [i][0], temp); } } // FIXME: these *URL routines don't quite belong here, except // that they're all in the same spirit of making it easy to // use local filesystem URIs with XML parsers. /** * Provides the URL for a named file, without relying on the JDK 1.2 * {@link java.io.File#toURL File.toURL}() utility method. * * @param filename the file name to convert. Relative file names * are resolved the way the JVM resolves them (current to the * process-global current working directory). * * @exception IOException if the file does not exist */ public static String fileNameToURL (String filename) throws IOException { return fileToURL (new File (filename)); } /** * Provides the URL for a file, without relying on the JDK 1.2 * {@link java.io.File#toURL File.toURL}() utility method. * * @param f the file to convert. Relative file names * are resolved the way the JVM resolves them (current to the * process-global current working directory). * * @exception IOException if the file does not exist */ public static String fileToURL (File f) throws IOException { String temp; // NOTE: the javax.xml.parsers.DocumentBuilder and // javax.xml.transform.stream.StreamSource versions // of this don't have this test. Some JVM versions // don't report this error sanely through URL code. if (!f.exists ()) throw new IOException ("no such file: " + f.getName ()); // FIXME: getAbsolutePath() seems buggy; I'm seeing components // like "/foo/../" which are clearly not "absolute" // and should have been resolved with the filesystem. // Substituting "/" would be wrong, "foo" may have been // symlinked ... the URL code will make that change // later, so that things can get _really_ broken! temp = f.getAbsolutePath (); if (File.separatorChar != '/') temp = temp.replace (File.separatorChar, '/'); if (!temp.startsWith ("/")) temp = "/" + temp; if (!temp.endsWith ("/") && f.isDirectory ()) temp = temp + "/"; return "file:" + temp; } /** * Returns a URL string. Note that if a malformed URL is provided, or * the parameter names a nonexistent file, the resulting URL may be * malformed. * * @param fileOrURL If this is the name of a file which exists, * then its URL is returned. Otherwise the argument is returned. */ public static String getURL (String fileOrURL) { try { return fileNameToURL (fileOrURL); } catch (Exception e) { return fileOrURL; } } // note: cloneable, this is just copied; unguarded against mods private Dictionary pubidMapping; /** * Constructs a resolver which understands how to map PUBLIC identifiers * to other URIs, typically for local copies of standard DTD components. * * @param dictionary maps PUBLIC identifiers to URIs. This is not * copied; subsequent modifications will be reported through the * resolution operations. */ public Resolver (Dictionary dict) { pubidMapping = dict; } // FIXME: want notion of a "system default" resolver, presumably // loaded with all sorts of useful stuff. At the same time need // a notion of resolver chaining (failure --> next) so that subsystems // can set up things that won't interfere with other ones. /** * This parses most MIME content type strings that have charset=... * encoding declarations to and returns the specified encoding. This * conforms to RFC 3023, and is useful when constructing InputSource * objects from URLConnection objects or other objects using MIME * content typing. * * @param contentType the MIME content type that will be parsed; must * not be null. * @return the appropriate encoding, or null if the content type is * not text and there's no charset=... attribute */ static public String getEncoding (String contentType) { // currently a dumb parsing algorithm that works "mostly" and handles // ..anything...charset=ABC // ..anything...charset=ABC;otherAttr=DEF // ..anything...charset=ABC (comment);otherAttr=DEF // ..anything...charset= "ABC" (comment);otherAttr=DEF int temp; String encoding; String defValue = null; if (contentType.startsWith ("text/")) defValue = contentType.startsWith ("text/html") ? "ISO-8859-1" : "US-ASCII"; // Assumes 'charset' is only an attribute name, not part // of a value, comment, or other attribute name // ALSO assumes no escaped values like "\;" or "\)" if ((temp = contentType.indexOf ("charset")) != -1) { // strip out everything up to '=' ... temp = contentType.indexOf ('=', temp); if (temp == -1) return defValue; encoding = contentType.substring (temp + 1); // ... and any subsequent attributes if ((temp = encoding.indexOf (';')) != -1) encoding = encoding.substring (0, temp); // ... and any comments after value if ((temp = encoding.indexOf ('(')) != -1) encoding = encoding.substring (0, temp); // ... then whitespace, and any (double) quotes encoding = encoding.trim (); if (encoding.charAt (0) == '"') encoding = encoding.substring (1, encoding.length () - 1); } else encoding = defValue; return encoding; } /** * Uses a local dictionary of public identifiers to resolve URIs, * normally with the goal of minimizing network traffic or latencies. */ public InputSource resolveEntity (String publicId, String systemId) throws IOException, SAXException { InputSource retval = null; String uri; if (publicId != null && ((uri = (String) pubidMapping.get (publicId)) != null)) { retval = new InputSource (uri); retval.setPublicId (publicId); } // Could do URN resolution here // URL resolution always done by parser // FIXME: chain to "next" resolver return retval; } }