summaryrefslogtreecommitdiff
path: root/libjava
diff options
context:
space:
mode:
authormark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>2001-10-08 21:03:34 +0000
committermark <mark@138bc75d-0d04-0410-961f-82ee72b054a4>2001-10-08 21:03:34 +0000
commitee861ec5ad84d6e3c6ee0383a20e5065945fe799 (patch)
tree1c973fd835c141156e93314f4a94116ab0a74675 /libjava
parent4480f1aba4f4806026759d156b5239a37d77fe5a (diff)
downloadgcc-ee861ec5ad84d6e3c6ee0383a20e5065945fe799.tar.gz
* java/net/URLDecoder.java: Remerge with Classpath
* java/net/URLEncoder.java: Merge with Classpath git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@46098 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libjava')
-rw-r--r--libjava/ChangeLog5
-rw-r--r--libjava/java/net/URLDecoder.java121
-rw-r--r--libjava/java/net/URLEncoder.java176
3 files changed, 232 insertions, 70 deletions
diff --git a/libjava/ChangeLog b/libjava/ChangeLog
index af67c16b8a4..ff7bbf64988 100644
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,8 @@
+2001-10-07 Mark Wielaard <mark@klomp.org>
+
+ * java/net/URLDecoder.java: Remerge with Classpath
+ * java/net/URLEncoder.java: Merge with Classpath
+
2001-10-08 Tom Tromey <tromey@redhat.com>
Fix for PR libgcj/4481:
diff --git a/libjava/java/net/URLDecoder.java b/libjava/java/net/URLDecoder.java
index ae7da78e120..9c51bc03b5c 100644
--- a/libjava/java/net/URLDecoder.java
+++ b/libjava/java/net/URLDecoder.java
@@ -28,56 +28,131 @@ package java.net;
import java.io.UnsupportedEncodingException;
-/**
- * This utility class contains one static method that converts a
+ /**
+ * This utility class contains static methods that converts a
* string encoded in the x-www-form-urlencoded format to the original
- * text. The x-www-form-urlencoded format
- * replaces certain disallowed characters with
- * encoded equivalents. All upper case and lower case letters in the
- * US alphabet remain as is, the space character (' ') is replaced with
- * '+' sign, and all other characters are converted to a "%XX" format
- * where XX is the hexadecimal representation of that character. Note
- * that since unicode characters are 16 bits, and this method encodes only
- * 8 bits of information, the lower 8 bits of the character are used.
+ * text. The x-www-form-urlencoded format replaces certain disallowed
+ * characters with encoded equivalents. All upper case and lower case
+ * letters in the US alphabet remain as is, the space character (' ')
+ * is replaced with '+' sign, and all other characters are converted to a
+ * "%XX" format where XX is the hexadecimal representation of that character
+ * in a given character encoding (default is "UTF-8").
* <p>
* This method is very useful for decoding strings sent to CGI scripts
*
- * Written using on-line Java Platform 1.2 API Specification.
+ * Written using on-line Java Platform 1.2/1.4 API Specification.
* Status: Believed complete and correct.
*
* @since 1.2
*
* @author Warren Levy <warrenl@cygnus.com>
* @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments)
- * @date April 22, 1999.
+ * @author Mark Wielaard (mark@klomp.org)
*/
public class URLDecoder
{
-/**
+ /**
* This method translates the passed in string from x-www-form-urlencoded
- * format and returns it.
+ * format using the default encoding "UTF-8" to decode the hex encoded
+ * unsafe characters.
*
- * @param source The String to convert
+ * @param s the String to convert
*
- * @return The converted String
+ * @return the converted String
*/
public static String decode(String s)
{
+ try
+ {
+ return decode(s, "UTF-8");
+ }
+ catch (UnsupportedEncodingException uee)
+ {
+ // Should never happen since UTF-8 encoding should always be supported
+ return s;
+ }
+ }
+
+ /**
+ * This method translates the passed in string from x-www-form-urlencoded
+ * format using the given character encoding to decode the hex encoded
+ * unsafe characters.
+ * <p>
+ * This implementation will decode the string even if it contains
+ * unsafe characters (characters that should have been encoded) or if the
+ * two characters following a % do not represent a hex encoded byte.
+ * In those cases the unsafe character or the % character will be added
+ * verbatim to the decoded result.
+ *
+ * @param s the String to convert
+ * @param encoding the character encoding to use the decode the hex encoded
+ * unsafe characters
+ *
+ * @return the converted String
+ *
+ * @since 1.4
+ */
+ public static String decode(String s, String encoding)
+ throws UnsupportedEncodingException
+ {
+ StringBuffer result = new StringBuffer();
+
+ // First convert all '+' characters to spaces.
String str = s.replace('+', ' ');
- String result = "";
+
+ // Then go through the whole string looking for byte encoded characters
int i;
int start = 0;
+ byte[] bytes = null;
+ int length = str.length();
while ((i = str.indexOf('%', start)) >= 0)
{
- result = result + str.substring(start, i) +
- (char) Integer.parseInt(str.substring(i + 1, i + 3), 16);
- start = i + 3;
+ // Add all non-encoded characters to the result buffer
+ result.append(str.substring(start, i));
+ start = i;
+
+ // Get all consecutive encoded bytes
+ while ((i+2 < length) && (str.charAt(i) == '%'))
+ i += 3;
+
+ // Decode all these bytes
+ if ((bytes == null) || (bytes.length < ((i-start)/3)))
+ bytes = new byte[((i-start)/3)];
+
+ int index = 0;
+ try
+ {
+ while (start < i)
+ {
+ String sub = str.substring(start + 1, start + 3);
+ bytes[index] = (byte)Integer.parseInt(sub, 16);
+ index++;
+ start += 3;
+ }
+ }
+ catch (NumberFormatException nfe)
+ {
+ // One of the hex encoded strings was bad
+ }
+
+ // Add the bytes as characters according to the given encoding
+ result.append(new String(bytes, 0, index, encoding));
+
+ // Make sure we skip to just after a % sign
+ // There might not have been enough encoded characters after the %
+ // or the hex chars were not actually hex chars (NumberFormatException)
+ if (start < length && s.charAt(start) == '%')
+ {
+ result.append('%');
+ start++;
+ }
}
+ // Add any characters left
if (start < str.length())
- result = result + str.substring(start);
+ result.append(str.substring(start));
- return result;
+ return result.toString();
}
-} // class URLDecoder
+} // class URLDecoder
diff --git a/libjava/java/net/URLEncoder.java b/libjava/java/net/URLEncoder.java
index 6590dcf8feb..f39b30070d4 100644
--- a/libjava/java/net/URLEncoder.java
+++ b/libjava/java/net/URLEncoder.java
@@ -1,71 +1,153 @@
-// URLEncoder.java - Provides a method for encoding strings according to
-// application/x-www-form-urlencoded MIME type.
+/* URLEncoder.java -- Class to convert strings to a properly encoded URL
+ Copyright (C) 1998, 1999, 2001 Free Software Foundation, Inc.
-/* Copyright (C) 1999 Free Software Foundation
+This file is part of GNU Classpath.
- This file is part of libgcj.
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
-This software is copyrighted work licensed under the terms of the
-Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
-details. */
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+As a special exception, if you link this library with other files to
+produce an executable, this library does not by itself cause the
+resulting executable to be covered by the GNU General Public License.
+This exception does not however invalidate any other reasons why the
+executable file might be covered by the GNU General Public License. */
package java.net;
-import java.io.UnsupportedEncodingException;
-/**
- * @author Warren Levy <warrenl@cygnus.com>
- * @date April 22, 1999.
- */
+import java.io.UnsupportedEncodingException;
/**
- * Written using on-line Java Platform 1.2 API Specification, as well
+ * Written using on-line Java Platform 1.2/1.4 API Specification, as well
* as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
* Status: Believed complete and correct.
*/
+ /**
+ * This utility class contains static methods that converts a
+ * string into a fully encoded URL string in x-www-form-urlencoded
+ * format. This format replaces certain disallowed characters with
+ * encoded equivalents. All upper case and lower case letters in the
+ * US alphabet remain as is, the space character (' ') is replaced with
+ * '+' sign, and all other characters are converted to a "%XX" format
+ * where XX is the hexadecimal representation of that character in a
+ * certain encoding (by default "UTF-8").
+ * <p>
+ * This method is very useful for encoding strings to be sent to CGI scripts
+ *
+ * @author Aaron M. Renn (arenn@urbanophile.com)
+ * @author Warren Levy <warrenl@cygnus.com>
+ * @author Mark Wielaard (mark@klomp.org)
+ */
public class URLEncoder
{
- // This method, per the JCL, is conservative in that it encodes
- // some "allowable" characters as % triplets.
+ /**
+ * This method translates the passed in string into x-www-form-urlencoded
+ * format using the standard "UTF-8" character encoding to hex-encode the
+ * unsafe characters.
+ *
+ * @param s The String to convert
+ *
+ * @return The converted String
+ */
public static String encode(String s)
{
- // Get the bytes in ISO-Latin-1 (i.e. 8859_1) per the JCL.
- // Even though it is the default in most cases, it's specified here
- // just in case System.getProperty("file.encoding") is not "8859_1".
- String result = "";
try
{
- byte[] buf = s.getBytes("8859_1");
- int start = 0;
- for (int i = 0; i < buf.length; i++)
- // For efficiency, check the byte in order of most likely
- // possibility so as to minimize the number of comparisons.
- // Hence, exclude all the alphanumeric & allowed special chars first.
- if ((buf[i] >= 'a' && buf[i] <= 'z') ||
- (buf[i] >= 'A' && buf[i] <= 'Z') ||
- (buf[i] >= '0' && buf[i] <= '9') ||
- buf[i] == '-' || buf[i] == '_' || buf[i] == '.' || buf[i] == '*')
- ; // This is the most likely case so exclude first for efficiency.
- else if (buf[i] == ' ')
- buf[i] = (byte) '+'; // Replace space char with plus symbol.
- else
- {
- result = result + new String(buf, start, i - start, "8859_1") +
- "%" + Integer.toHexString(((int) buf[i]) & 0xFF);
- start = i + 1;
- }
-
- // Append remainder of allowable chars from the string, if any.
- if (start < buf.length)
- result = result +
- new String(buf, start, buf.length - start, "8859_1");
+ return encode(s, "UTF-8");
}
- catch (UnsupportedEncodingException ex)
+ catch (UnsupportedEncodingException uee)
{
- // This should never happen as "8859_1" is the default encoding.
+ // Should never happen since UTF-8 should always be supported
return s;
}
+ }
+
+ /**
+ * This method translates the passed in string into x-www-form-urlencoded
+ * format using the character encoding to hex-encode the unsafe characters.
+ *
+ * @param s The String to convert
+ * @param encoding The encoding to use for unsafe characters
+ *
+ * @return The converted String
+ *
+ * @since 1.4
+ */
+ public static String encode(String s, String encoding)
+ throws UnsupportedEncodingException
+ {
+ StringBuffer result = new StringBuffer();
+ int length = s.length();
+ int start = 0;
+ int i = 0;
- return result;
+ while (true)
+ {
+ while ( i < length && isSafe(s.charAt(i)) )
+ i++;
+
+ // Safe character can just be added
+ result.append(s.substring(start, i));
+
+ // Are we done?
+ if (i >= length)
+ return result.toString();
+ else if (s.charAt(i) == ' ')
+ {
+ result.append('+'); // Replace space char with plus symbol.
+ i++;
+ }
+ else
+ {
+ // Get all unsafe characters
+ start = i;
+ char c;
+ while ( i < length && (c = s.charAt(i)) != ' ' && !isSafe(c) )
+ i++;
+
+ // Convert them to %XY encoded strings
+ String unsafe = s.substring(start,i);
+ byte bytes[] = unsafe.getBytes(encoding);
+ for (int j = 0; j < bytes.length; j++)
+ {
+ result.append('%');
+ result.append(Integer.toHexString(((int) bytes[j]) & 0xFF));
+ }
+ }
+ start = i;
+ }
}
-}
+
+ /**
+ * Private static method that returns true if the given char is either
+ * a uppercase or lowercase letter from 'a' till 'z', or a digit froim
+ * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such
+ * 'safe' character don't have to be url encoded.
+ */
+ private static boolean isSafe(char c)
+ {
+ return ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-' || c == '_' || c == '.' || c == '*');
+ }
+
+ /**
+ * Private constructor that does nothing. Included to avoid a default
+ * public constructor being created by the compiler.
+ */
+ private URLEncoder() { }
+
+} // class URLEncoder